1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2022 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_shader.h"
28 #include "sfn_instr.h"
29 #include "sfn_instr_alugroup.h"
30 #include "sfn_instr_export.h"
31 #include "sfn_instr_fetch.h"
32 #include "sfn_instr_lds.h"
33 #include "sfn_instr_mem.h"
34
35 #include "sfn_instr_controlflow.h"
36 #include "sfn_liverangeevaluator.h"
37
38 #include "sfn_shader_cs.h"
39 #include "sfn_shader_fs.h"
40 #include "sfn_shader_vs.h"
41 #include "sfn_shader_gs.h"
42 #include "sfn_shader_tess.h"
43
44 #include "sfn_debug.h"
45 #include "gallium/drivers/r600/r600_shader.h"
46
47 #include "tgsi/tgsi_from_mesa.h"
48
49 #include "nir.h"
50
51 #include <numeric>
52 #include <sstream>
53
54 namespace r600 {
55
56 using std::string;
57
58 std::pair<unsigned, unsigned>
r600_get_varying_semantic(unsigned varying_location)59 r600_get_varying_semantic(unsigned varying_location)
60 {
61 std::pair<unsigned, unsigned> result;
62 tgsi_get_gl_varying_semantic(static_cast<gl_varying_slot>(varying_location),
63 true, &result.first, &result.second);
64
65 if (result.first == TGSI_SEMANTIC_GENERIC) {
66 result.second += 9;
67 } else if (result.first == TGSI_SEMANTIC_PCOORD) {
68 result.second = 8;
69 }
70 return result;
71 }
72
set_sid(int sid)73 void ShaderIO::set_sid(int sid)
74 {
75 m_sid = sid;
76 switch (m_name) {
77 case TGSI_SEMANTIC_POSITION:
78 case TGSI_SEMANTIC_PSIZE:
79 case TGSI_SEMANTIC_EDGEFLAG:
80 case TGSI_SEMANTIC_FACE:
81 case TGSI_SEMANTIC_SAMPLEMASK:
82 case TGSI_SEMANTIC_CLIPVERTEX:
83 m_spi_sid = 0;
84 break;
85 case TGSI_SEMANTIC_GENERIC:
86 case TGSI_SEMANTIC_TEXCOORD:
87 case TGSI_SEMANTIC_PCOORD:
88 m_spi_sid = m_sid + 1;
89 break;
90 default:
91 /* For non-generic params - pack name and sid into 8 bits */
92 m_spi_sid = (0x80 | (m_name << 3) | m_sid) + 1;
93 }
94 }
95
override_spi_sid(int spi)96 void ShaderIO::override_spi_sid(int spi)
97 {
98 m_spi_sid = spi;
99 }
100
print(std::ostream & os) const101 void ShaderIO::print(std::ostream& os) const
102 {
103 os << m_type << " LOC:" << m_location << " NAME:" << m_name;
104 do_print(os);
105
106 if (m_sid > 0) {
107 os << " SID:" << m_sid << " SPI_SID:" << m_spi_sid;
108 }
109 }
110
111
ShaderIO(const char * type,int loc,int name)112 ShaderIO::ShaderIO(const char *type, int loc, int name):
113 m_type(type),
114 m_location(loc),
115 m_name(name)
116 {
117 }
118
ShaderOutput()119 ShaderOutput::ShaderOutput():
120 ShaderIO("OUTPUT", -1, -1)
121 {
122 }
123
ShaderOutput(int location,int name,int writemask)124 ShaderOutput::ShaderOutput(int location, int name, int writemask):
125 ShaderIO("OUTPUT", location, name),
126 m_writemask(writemask)
127 {
128
129 }
130
do_print(std::ostream & os) const131 void ShaderOutput::do_print(std::ostream& os) const
132 {
133 os << " MASK:" << m_writemask;
134 }
135
136
ShaderInput(int location,int name)137 ShaderInput::ShaderInput(int location, int name):
138 ShaderIO("INPUT", location, name)
139 {
140 }
141
ShaderInput()142 ShaderInput::ShaderInput():
143 ShaderInput(-1, -1)
144 {
145 }
146
147
do_print(std::ostream & os) const148 void ShaderInput::do_print(std::ostream& os) const
149 {
150 if (m_interpolator)
151 os << " INTERP:" << m_interpolator;
152 if (m_interpolate_loc)
153 os << " ILOC:" << m_interpolate_loc;
154 if (m_uses_interpolate_at_centroid)
155 os << " USE_CENTROID";
156 }
157
set_interpolator(int interp,int interp_loc,bool uses_interpolate_at_centroid)158 void ShaderInput::set_interpolator(int interp, int interp_loc, bool uses_interpolate_at_centroid)
159 {
160 m_interpolator = interp;
161 m_interpolate_loc = interp_loc;
162 m_uses_interpolate_at_centroid = uses_interpolate_at_centroid;
163 }
164
set_uses_interpolate_at_centroid()165 void ShaderInput::set_uses_interpolate_at_centroid()
166 {
167 m_uses_interpolate_at_centroid = true;
168 }
169
Shader(const char * type_id)170 Shader::Shader(const char *type_id):
171 m_current_block(nullptr),
172 m_type_id(type_id),
173 m_chip_class(ISA_CC_R600),
174 m_next_block(0)
175 {
176 m_instr_factory = new InstrFactory();
177 m_chain_instr.this_shader = this;
178 start_new_block(0);
179 }
180
set_input_gpr(int driver_lcation,int gpr)181 void Shader::set_input_gpr(int driver_lcation, int gpr)
182 {
183 auto i = m_inputs.find(driver_lcation);
184 assert(i != m_inputs.end());
185 i->second.set_gpr(gpr);
186 }
187
add_info_from_string(std::istream & is)188 bool Shader::add_info_from_string(std::istream& is)
189 {
190 std::string type;
191 is >> type;
192
193 if (type == "CHIPCLASS")
194 return read_chipclass(is);
195 if (type == "OUTPUT")
196 return read_output(is);
197 if (type == "INPUT")
198 return read_input(is);
199 if (type == "PROP")
200 return read_prop(is);
201 if (type == "SYSVALUES")
202 return allocate_registers_from_string(is, pin_fully);
203 if (type == "REGISTERS")
204 return allocate_registers_from_string(is, pin_free);
205 if (type == "ARRAYS")
206 return allocate_arrays_from_string(is);
207
208
209 return false;
210 }
211
emit_instruction_from_string(const std::string & s)212 void Shader::emit_instruction_from_string(const std::string& s)
213 {
214
215 sfn_log << SfnLog::instr << "Create Instr from '" << s << "'\n";
216 if (s == "BLOCK_START") {
217 if (!m_current_block->empty()) {
218 start_new_block(m_current_block->nesting_offset());
219 sfn_log << SfnLog::instr << " Emit start block\n";
220 }
221 return;
222 }
223
224 if (s == "BLOCK_END") {
225 return;
226 }
227
228 auto ir = m_instr_factory->from_string(s, m_current_block->nesting_depth());
229 if (ir) {
230 emit_instruction(ir);
231 if (ir->end_block())
232 start_new_block(ir->nesting_offset());
233 sfn_log << SfnLog::instr << " " << *ir << "\n";
234 }
235 }
236
read_output(std::istream & is)237 bool Shader::read_output(std::istream& is)
238 {
239 string value;
240 is >> value;
241 int pos = int_from_string_with_prefix(value, "LOC:");
242 is >> value;
243 int name = int_from_string_with_prefix(value, "NAME:");
244 is >> value;
245 int mask = int_from_string_with_prefix(value, "MASK:");
246 ShaderOutput output(pos, name, mask);
247
248 value.clear();
249 is >> value;
250 if (!value.empty()) {
251 int sid = int_from_string_with_prefix(value, "SID:");
252 output.set_sid(sid);
253 is >> value;
254 int spi_sid = int_from_string_with_prefix(value, "SPI_SID:");
255 assert(spi_sid == output.spi_sid());
256 }
257
258 add_output(output);
259 return true;
260 }
261
262
read_input(std::istream & is)263 bool Shader::read_input(std::istream& is)
264 {
265 string value;
266 is >> value;
267 int pos = int_from_string_with_prefix(value, "LOC:");
268 is >> value;
269 int name = int_from_string_with_prefix(value, "NAME:");
270
271 value.clear();
272
273 ShaderInput input(pos, name);
274
275 int interp = 0;
276 int interp_loc = 0;
277 bool use_centroid = false;
278
279 is >> value;
280 while (!value.empty()) {
281 if (value.substr(0, 4) == "SID:") {
282 int sid = int_from_string_with_prefix(value, "SID:");
283 input.set_sid(sid);
284 } else if (value.substr(0, 8) == "SPI_SID:") {
285 int spi_sid = int_from_string_with_prefix(value, "SPI_SID:");
286 assert(spi_sid == input.spi_sid());
287 } else if (value.substr(0, 7) == "INTERP:") {
288 interp = int_from_string_with_prefix(value, "INTERP:");
289 } else if (value.substr(0, 5) == "ILOC:") {
290 interp_loc = int_from_string_with_prefix(value, "ILOC:");
291 } else if (value == "USE_CENTROID") {
292 use_centroid = true;
293 } else {
294 std::cerr << "Unknown parse value '" << value << "'";
295 assert(!value.c_str());
296 }
297 value.clear();
298 is >> value;
299 }
300
301 input.set_interpolator(interp, interp_loc, use_centroid);
302
303 add_input(input);
304 return true;
305 }
306
allocate_registers_from_string(std::istream & is,Pin pin)307 bool Shader::allocate_registers_from_string(std::istream& is, Pin pin)
308 {
309 std::string line;
310 if (!std::getline(is, line))
311 return false;
312
313 std::istringstream iline(line);
314
315 while (!iline.eof()) {
316 string reg_str;
317 iline >> reg_str;
318
319 if (reg_str.empty())
320 break;
321
322 if (strchr(reg_str.c_str(), '@')) {
323 value_factory().dest_from_string(reg_str);
324 } else {
325 RegisterVec4::Swizzle swz = {0,1,2,3};
326 auto regs = value_factory().dest_vec4_from_string(reg_str, swz, pin);
327 for (int i = 0; i < 4; ++i) {
328 if (swz[i] < 4 && pin == pin_fully) {
329 regs[i]->pin_live_range(true, false);
330 }
331 }
332 }
333 }
334 return true;
335 }
336
allocate_arrays_from_string(std::istream & is)337 bool Shader::allocate_arrays_from_string(std::istream& is)
338 {
339 std::string line;
340 if (!std::getline(is, line))
341 return false;
342
343 std::istringstream iline(line);
344
345 while (!iline.eof()) {
346 string reg_str;
347 iline >> reg_str;
348
349 if (reg_str.empty())
350 break;
351
352 value_factory().array_from_string(reg_str);
353 }
354 return true;
355 }
356
read_chipclass(std::istream & is)357 bool Shader::read_chipclass(std::istream& is)
358 {
359 string name;
360 is >> name;
361 if (name == "R600")
362 m_chip_class = ISA_CC_R600;
363 else if (name == "R700")
364 m_chip_class = ISA_CC_R700;
365 else if (name == "EVERGREEN")
366 m_chip_class = ISA_CC_EVERGREEN;
367 else if (name == "CAYMAN")
368 m_chip_class = ISA_CC_CAYMAN;
369 else
370 return false;
371 return true;
372 }
373
allocate_reserved_registers()374 void Shader::allocate_reserved_registers()
375 {
376 m_instr_factory->value_factory().set_virtual_register_base(0);
377 auto reserved_registers_end = do_allocate_reserved_registers();
378 m_instr_factory->value_factory().set_virtual_register_base(reserved_registers_end);
379 if (!m_atomics.empty()) {
380 m_atomic_update = value_factory().temp_register();
381 auto alu = new AluInstr(op1_mov, m_atomic_update,
382 value_factory().one_i(),
383 AluInstr::last_write);
384 alu->set_alu_flag(alu_no_schedule_bias);
385 emit_instruction(alu);
386 }
387
388 if(m_flags.test(sh_needs_sbo_ret_address)) {
389 m_rat_return_address = value_factory().temp_register(0);
390 auto temp0 = value_factory().temp_register(0);
391 auto temp1 = value_factory().temp_register(1);
392 auto temp2 = value_factory().temp_register(2);
393
394 auto group = new AluGroup();
395 group->add_instruction(new AluInstr(op1_mbcnt_32lo_accum_prev_int, temp0, value_factory().literal(-1), {alu_write}));
396 group->add_instruction(new AluInstr(op1_mbcnt_32hi_int, temp1, value_factory().literal(-1), {alu_write}));
397 emit_instruction(group);
398 emit_instruction(new AluInstr(op3_muladd_uint24, temp2, value_factory().inline_const(ALU_SRC_SE_ID, 0),
399 value_factory().literal(256), value_factory().inline_const(ALU_SRC_HW_WAVE_ID, 0), {alu_write, alu_last_instr}));
400 emit_instruction(new AluInstr(op3_muladd_uint24, m_rat_return_address,
401 temp2, value_factory().literal(0x40), temp0,
402 {alu_write, alu_last_instr}));
403 }
404 }
405
translate_from_nir(nir_shader * nir,const pipe_stream_output_info * so_info,struct r600_shader * gs_shader,r600_shader_key & key,r600_chip_class chip_class)406 Shader *Shader::translate_from_nir(nir_shader *nir, const pipe_stream_output_info *so_info,
407 struct r600_shader* gs_shader,
408 r600_shader_key& key, r600_chip_class chip_class)
409 {
410 Shader *shader = nullptr;
411
412 switch (nir->info.stage) {
413 case MESA_SHADER_FRAGMENT:
414 if (chip_class >= ISA_CC_EVERGREEN)
415 shader = new FragmentShaderEG(key);
416 else
417 shader = new FragmentShaderR600(key);
418 break;
419 case MESA_SHADER_VERTEX:
420 shader = new VertexShader(so_info, gs_shader, key);
421 break;
422 case MESA_SHADER_GEOMETRY:
423 shader = new GeometryShader(key);
424 break;
425 case MESA_SHADER_TESS_CTRL:
426 shader = new TCSShader(key);
427 break;
428 case MESA_SHADER_TESS_EVAL:
429 shader = new TESShader(so_info, gs_shader, key);
430 break;
431 case MESA_SHADER_COMPUTE:
432 shader = new ComputeShader(key);
433 break;
434 default:
435 return nullptr;
436 }
437
438 shader->set_info(nir);
439
440 shader->set_chip_class(chip_class);
441 if (!shader->process(nir))
442 return nullptr;
443
444 return shader;
445 }
446
set_info(nir_shader * nir)447 void Shader::set_info(nir_shader *nir)
448 {
449 m_scratch_size = nir->scratch_size;
450 }
451
value_factory()452 ValueFactory& Shader::value_factory()
453 {
454 return m_instr_factory->value_factory();
455 }
456
457
process(nir_shader * nir)458 bool Shader::process(nir_shader *nir)
459 {
460 m_ssbo_image_offset = nir->info.num_images;
461
462 if (nir->info.use_legacy_math_rules)
463 set_flag(sh_legacy_math_rules);
464
465 nir_foreach_uniform_variable(var, nir)
466 scan_uniforms(var);
467
468 // at this point all functions should be inlined
469 const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&nir->functions));
470
471 if (!scan_shader(func))
472 return false;
473
474 allocate_reserved_registers();
475
476 allocate_local_registers(&func->impl->registers);
477
478 sfn_log << SfnLog::trans << "Process shader \n";
479 foreach_list_typed(nir_cf_node, node, node, &func->impl->body) {
480 if (!process_cf_node(node))
481 return false;
482 }
483
484 finalize();
485
486 return true;
487 }
488
allocate_local_registers(const exec_list * registers)489 void Shader::allocate_local_registers(const exec_list *registers)
490 {
491 if (value_factory().allocate_registers(registers))
492 m_indirect_files |= 1 << TGSI_FILE_TEMPORARY;
493 }
494
scan_shader(const nir_function * func)495 bool Shader::scan_shader(const nir_function *func)
496 {
497
498 nir_foreach_block(block, func->impl) {
499 nir_foreach_instr(instr, block) {
500 if (!scan_instruction(instr)) {
501 fprintf(stderr, "Unhandled sysvalue access ");
502 nir_print_instr(instr, stderr);
503 fprintf(stderr, "\n");
504 return false;
505 }
506 }
507 }
508
509 int lds_pos = 0;
510 for (auto& [index, input] : m_inputs) {
511 if (input.need_lds_pos()) {
512 if (chip_class() < ISA_CC_EVERGREEN)
513 input.set_gpr(lds_pos);
514 input.set_lds_pos(lds_pos++);
515 }
516 }
517
518 int param_id = 0;
519 for (auto& [index, out] : m_outputs) {
520 if (out.is_param())
521 out.set_pos(param_id++);
522 }
523
524 return true;
525 }
526
scan_uniforms(nir_variable * uniform)527 bool Shader::scan_uniforms(nir_variable *uniform)
528 {
529 if (uniform->type->contains_atomic()) {
530 int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
531 m_nhwatomic += natomics;
532
533 if (uniform->type->is_array())
534 m_indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
535
536 m_flags.set(sh_uses_atomics);
537
538 r600_shader_atomic atom = {0};
539
540 atom.buffer_id = uniform->data.binding;
541 atom.hw_idx = m_atomic_base + m_next_hwatomic_loc;
542
543 atom.start = uniform->data.offset >> 2;
544 atom.end = atom.start + natomics - 1;
545
546 if (m_atomic_base_map.find(uniform->data.binding) ==
547 m_atomic_base_map.end())
548 m_atomic_base_map[uniform->data.binding] = m_next_hwatomic_loc;
549
550 m_next_hwatomic_loc += natomics;
551
552 m_atomic_file_count += atom.end - atom.start + 1;
553
554 sfn_log << SfnLog::io << "HW_ATOMIC file count: "
555 << m_atomic_file_count << "\n";
556
557 m_atomics.push_back(atom);
558 }
559
560 auto type = uniform->type->is_array() ? uniform->type->without_array(): uniform->type;
561 if (type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
562 m_flags.set(sh_uses_images);
563 if (uniform->type->is_array() && ! (uniform->data.mode == nir_var_mem_ssbo))
564 m_indirect_files |= 1 << TGSI_FILE_IMAGE;
565 }
566
567 return true;
568 }
569
570
scan_instruction(nir_instr * instr)571 bool Shader::scan_instruction(nir_instr *instr)
572 {
573 if (do_scan_instruction(instr))
574 return true;
575
576 if (instr->type != nir_instr_type_intrinsic)
577 return true;
578
579 auto intr = nir_instr_as_intrinsic(instr);
580
581 // handle unhandled instructions
582 switch (intr->intrinsic) {
583 case nir_intrinsic_ssbo_atomic_add:
584 case nir_intrinsic_ssbo_atomic_comp_swap:
585 case nir_intrinsic_ssbo_atomic_or:
586 case nir_intrinsic_ssbo_atomic_xor:
587 case nir_intrinsic_ssbo_atomic_imax:
588 case nir_intrinsic_ssbo_atomic_imin:
589 case nir_intrinsic_ssbo_atomic_umax:
590 case nir_intrinsic_ssbo_atomic_umin:
591 case nir_intrinsic_ssbo_atomic_and:
592 case nir_intrinsic_ssbo_atomic_exchange:
593 case nir_intrinsic_image_load:
594 case nir_intrinsic_image_atomic_add:
595 case nir_intrinsic_image_atomic_and:
596 case nir_intrinsic_image_atomic_or:
597 case nir_intrinsic_image_atomic_xor:
598 case nir_intrinsic_image_atomic_exchange:
599 case nir_intrinsic_image_atomic_comp_swap:
600 case nir_intrinsic_image_atomic_umin:
601 case nir_intrinsic_image_atomic_umax:
602 case nir_intrinsic_image_atomic_imin:
603 case nir_intrinsic_image_atomic_imax:
604 m_flags.set(sh_needs_sbo_ret_address);
605 FALLTHROUGH;
606 case nir_intrinsic_image_store:
607 case nir_intrinsic_store_ssbo:
608 m_flags.set(sh_writes_memory);
609 m_flags.set(sh_uses_images);
610 break;
611 case nir_intrinsic_memory_barrier_image:
612 case nir_intrinsic_memory_barrier_buffer:
613 case nir_intrinsic_memory_barrier:
614 case nir_intrinsic_group_memory_barrier:
615 m_chain_instr.prepare_mem_barrier = true;
616 default:
617 ;
618 }
619 return true;
620 }
621
process_cf_node(nir_cf_node * node)622 bool Shader::process_cf_node(nir_cf_node *node)
623 {
624 SFN_TRACE_FUNC(SfnLog::flow, "CF");
625
626 switch (node->type) {
627 case nir_cf_node_block:
628 return process_block(nir_cf_node_as_block(node));
629 case nir_cf_node_if:
630 return process_if(nir_cf_node_as_if(node));
631 case nir_cf_node_loop:
632 return process_loop(nir_cf_node_as_loop(node));
633 default:
634 return false;
635 }
636
637 }
638
639 static bool
child_block_empty(const exec_list & list)640 child_block_empty (const exec_list& list)
641 {
642 if (list.is_empty())
643 return true;
644
645 bool result = true;
646
647 foreach_list_typed(nir_cf_node, n, node, &list) {
648
649 if (n->type == nir_cf_node_block) {
650 if (!nir_cf_node_as_block(n)->instr_list.is_empty())
651 return false;
652 }
653 if (n->type == nir_cf_node_if)
654 return false;
655 }
656 return result;
657 }
658
process_if(nir_if * if_stmt)659 bool Shader::process_if(nir_if *if_stmt)
660 {
661 SFN_TRACE_FUNC(SfnLog::flow, "IF");
662
663 if (!emit_if_start(if_stmt))
664 return false;
665
666 foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list) {
667 SFN_TRACE_FUNC(SfnLog::flow, "IF-then");
668 if (!process_cf_node(n))
669 return false;
670 }
671
672 if (!child_block_empty(if_stmt->else_list)) {
673 if (!emit_control_flow(ControlFlowInstr::cf_else))
674 return false;
675 foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list)
676 if (!process_cf_node(n)) return false;
677 }
678
679 if (!emit_control_flow(ControlFlowInstr::cf_endif))
680 return false;
681
682 return true;
683 }
684
emit_if_start(nir_if * if_stmt)685 bool Shader::emit_if_start(nir_if *if_stmt)
686 {
687 auto value = value_factory().src(if_stmt->condition, 0);
688 AluInstr *pred = new AluInstr(op2_pred_setne_int, value_factory().temp_register(),
689 value, value_factory().zero(), AluInstr::last);
690 pred->set_alu_flag(alu_update_exec);
691 pred->set_alu_flag(alu_update_pred);
692 pred->set_cf_type(cf_alu_push_before);
693
694 IfInstr *ir = new IfInstr(pred);
695 emit_instruction(ir);
696 start_new_block(1);
697 return true;
698 }
699
emit_control_flow(ControlFlowInstr::CFType type)700 bool Shader::emit_control_flow(ControlFlowInstr::CFType type)
701 {
702 auto ir = new ControlFlowInstr(type);
703 emit_instruction(ir);
704 int depth = 0;
705 switch (type) {
706 case ControlFlowInstr::cf_loop_begin:
707 m_loops.push_back(ir);
708 m_nloops++;
709 depth = 1;
710 break;
711 case ControlFlowInstr::cf_loop_end:
712 m_loops.pop_back();
713 FALLTHROUGH;
714 case ControlFlowInstr::cf_endif:
715 depth = -1;
716 break;
717 default:
718 ;
719 }
720
721 start_new_block(depth);
722 return true;
723 }
724
process_loop(nir_loop * node)725 bool Shader::process_loop(nir_loop *node)
726 {
727 SFN_TRACE_FUNC(SfnLog::flow, "LOOP");
728 if (!emit_control_flow(ControlFlowInstr::cf_loop_begin))
729 return false;
730
731 foreach_list_typed(nir_cf_node, n, node, &node->body)
732 if (!process_cf_node(n)) return false;
733
734 if (!emit_control_flow(ControlFlowInstr::cf_loop_end))
735 return false;
736
737 return true;
738 }
739
process_block(nir_block * block)740 bool Shader::process_block(nir_block *block)
741 {
742 SFN_TRACE_FUNC(SfnLog::flow, "BLOCK");
743
744 nir_foreach_instr(instr, block) {
745 sfn_log << SfnLog::instr << "FROM:" << *instr << "\n";
746 bool r = process_instr(instr);
747 if (!r) {
748 sfn_log << SfnLog::err << "R600: Unsupported instruction: "
749 << *instr << "\n";
750 return false;
751 }
752 }
753 return true;
754 }
755
process_instr(nir_instr * instr)756 bool Shader::process_instr(nir_instr *instr)
757 {
758 return m_instr_factory->from_nir(instr, *this);
759 }
760
process_intrinsic(nir_intrinsic_instr * intr)761 bool Shader::process_intrinsic(nir_intrinsic_instr *intr)
762 {
763 if (process_stage_intrinsic(intr))
764 return true;
765
766 if (GDSInstr::emit_atomic_counter(intr, *this)) {
767 set_flag(sh_writes_memory);
768 return true;
769 }
770
771 if (RatInstr::emit(intr, *this))
772 return true;
773
774 switch (intr->intrinsic) {
775 case nir_intrinsic_store_output: return store_output(intr);
776 case nir_intrinsic_load_input: return load_input(intr);
777 case nir_intrinsic_load_uniform: return load_uniform(intr);
778 case nir_intrinsic_load_ubo_vec4: return load_ubo(intr);
779 case nir_intrinsic_store_scratch: return emit_store_scratch(intr);
780 case nir_intrinsic_load_scratch: return emit_load_scratch(intr);
781 case nir_intrinsic_store_local_shared_r600: return emit_local_store(intr);
782 case nir_intrinsic_load_local_shared_r600: return emit_local_load(intr);
783 case nir_intrinsic_load_tcs_in_param_base_r600: return emit_load_tcs_param_base(intr, 0);
784 case nir_intrinsic_load_tcs_out_param_base_r600: return emit_load_tcs_param_base(intr, 16);
785 // We only emit the group barrier, barriers across work groups
786 // are not yet implemented
787 case nir_intrinsic_control_barrier:
788 case nir_intrinsic_memory_barrier_tcs_patch:
789 case nir_intrinsic_memory_barrier_shared:
790 return emit_barrier(intr);
791 case nir_intrinsic_memory_barrier_atomic_counter:
792 return true;
793 case nir_intrinsic_group_memory_barrier:
794 case nir_intrinsic_memory_barrier_image:
795 case nir_intrinsic_memory_barrier_buffer:
796 case nir_intrinsic_memory_barrier:
797 return emit_wait_ack();
798
799 case nir_intrinsic_shared_atomic_add:
800 case nir_intrinsic_shared_atomic_and:
801 case nir_intrinsic_shared_atomic_or:
802 case nir_intrinsic_shared_atomic_imax:
803 case nir_intrinsic_shared_atomic_umax:
804 case nir_intrinsic_shared_atomic_imin:
805 case nir_intrinsic_shared_atomic_umin:
806 case nir_intrinsic_shared_atomic_xor:
807 case nir_intrinsic_shared_atomic_exchange:
808 case nir_intrinsic_shared_atomic_comp_swap:
809 return emit_atomic_local_shared(intr);
810 case nir_intrinsic_shader_clock:
811 return emit_shader_clock(intr);
812
813 default:
814 return false;
815 }
816 }
817
818 static ESDOp
lds_op_from_intrinsic(nir_intrinsic_op op,bool ret)819 lds_op_from_intrinsic(nir_intrinsic_op op, bool ret) {
820 switch (op) {
821 case nir_intrinsic_shared_atomic_add:
822 return ret ? LDS_ADD_RET : LDS_ADD;
823 case nir_intrinsic_shared_atomic_and:
824 return ret ? LDS_AND_RET : LDS_AND;
825 case nir_intrinsic_shared_atomic_or:
826 return ret ? LDS_OR_RET : LDS_OR;
827 case nir_intrinsic_shared_atomic_imax:
828 return ret ? LDS_MAX_INT_RET : LDS_MAX_INT;
829 case nir_intrinsic_shared_atomic_umax:
830 return ret ? LDS_MAX_UINT_RET : LDS_MAX_UINT;
831 case nir_intrinsic_shared_atomic_imin:
832 return ret ? LDS_MIN_INT_RET : LDS_MIN_INT;
833 case nir_intrinsic_shared_atomic_umin:
834 return ret ? LDS_MIN_UINT_RET : LDS_MIN_UINT;
835 case nir_intrinsic_shared_atomic_xor:
836 return ret ? LDS_XOR_RET : LDS_XOR;
837 case nir_intrinsic_shared_atomic_exchange:
838 return LDS_XCHG_RET;
839 case nir_intrinsic_shared_atomic_comp_swap:
840 return LDS_CMP_XCHG_RET;
841 default:
842 unreachable("Unsupported shared atomic opcode");
843 }
844 }
845
emit_load_to_register(PVirtualValue src)846 PRegister Shader::emit_load_to_register(PVirtualValue src)
847 {
848 assert(src);
849 PRegister dest = src->as_register();
850
851 if (!dest) {
852 dest = value_factory().temp_register();
853 emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::last_write));
854 }
855 return dest;
856 }
857
emit_atomic_local_shared(nir_intrinsic_instr * instr)858 bool Shader::emit_atomic_local_shared(nir_intrinsic_instr* instr)
859 {
860 bool uses_retval = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
861
862 auto& vf = value_factory();
863
864 auto dest_value = uses_retval ? vf.dest(instr->dest, 0, pin_free) : nullptr;
865
866 auto op = lds_op_from_intrinsic(instr->intrinsic, uses_retval);
867
868 auto address = vf.src(instr->src[0], 0);
869
870 AluInstr::SrcValues src;
871 src.push_back(vf.src(instr->src[1], 0));
872
873 if (unlikely(instr->intrinsic ==nir_intrinsic_shared_atomic_comp_swap))
874 src.push_back(vf.src(instr->src[2], 0));
875 emit_instruction(new LDSAtomicInstr(op, dest_value, address, src));
876 return true;
877 }
878
evaluate_resource_offset(nir_intrinsic_instr * instr,int src_id)879 auto Shader::evaluate_resource_offset(nir_intrinsic_instr *instr, int src_id) -> std::pair<int, PRegister>
880 {
881 auto& vf = value_factory();
882
883 PRegister uav_id{nullptr};
884 int offset = 0;
885
886 auto uav_id_const = nir_src_as_const_value(instr->src[src_id]);
887 if (uav_id_const) {
888 offset += uav_id_const->u32;
889 } else {
890 auto uav_id_val = vf.src(instr->src[src_id], 0);
891 if (uav_id_val->as_register()) {
892 uav_id = uav_id_val->as_register();
893 } else {
894 uav_id = vf.temp_register();
895 emit_instruction(new AluInstr(op1_mov, uav_id, uav_id_val,
896 AluInstr::last_write));
897 }
898 }
899 return std::make_pair(offset, uav_id);
900 }
901
902
emit_store_scratch(nir_intrinsic_instr * intr)903 bool Shader::emit_store_scratch(nir_intrinsic_instr *intr)
904 {
905 auto& vf = m_instr_factory->value_factory();
906
907 int writemask = nir_intrinsic_write_mask(intr);
908
909
910 RegisterVec4::Swizzle swz = {7,7,7,7};
911
912 for (unsigned i = 0; i < intr->num_components; ++i)
913 swz[i] = (1 << i) & writemask ? i : 7;
914
915 auto value = vf.temp_vec4(pin_group, swz);
916 AluInstr *ir = nullptr;
917 for (unsigned i = 0; i < intr->num_components; ++i) {
918 if (value[i]->chan() < 4) {
919 ir = new AluInstr(op1_mov, value[i], vf.src(intr->src[0], i), AluInstr::write);
920 ir->set_alu_flag(alu_no_schedule_bias);
921 emit_instruction(ir);
922 }
923 }
924 if (!ir)
925 return true;
926
927 ir->set_alu_flag(alu_last_instr);
928
929 auto address = vf.src(intr->src[1], 0);
930
931
932 int align = nir_intrinsic_align_mul(intr);
933 int align_offset = nir_intrinsic_align_offset(intr);
934
935 ScratchIOInstr *ws_ir = nullptr;
936
937 int offset = -1;
938 if (address->as_literal()) {
939 offset = address->as_literal()->value();
940 } else if (address->as_inline_const()) {
941 auto il = address->as_inline_const();
942 if (il->sel() == ALU_SRC_0)
943 offset = 0;
944 else if (il->sel() == ALU_SRC_1_INT)
945 offset = 1;
946 }
947
948 if (offset >= 0) {
949 ws_ir = new ScratchIOInstr(value, offset, align, align_offset, writemask);
950 } else {
951 auto addr_temp = vf.temp_register(0);
952 auto load_addr = new AluInstr(op1_mov, addr_temp, address, AluInstr::last_write);
953 load_addr->set_alu_flag(alu_no_schedule_bias);
954 emit_instruction(load_addr);
955
956 ws_ir = new ScratchIOInstr(value, addr_temp, align, align_offset, writemask, m_scratch_size);
957 }
958 emit_instruction(ws_ir);
959
960 m_flags.set(sh_needs_scratch_space);
961 return true;
962 }
963
emit_load_scratch(nir_intrinsic_instr * intr)964 bool Shader::emit_load_scratch(nir_intrinsic_instr *intr)
965 {
966 auto addr = value_factory().src(intr->src[0], 0);
967 auto dest = value_factory().dest_vec4(intr->dest, pin_group);
968
969 if (chip_class() >= ISA_CC_R700) {
970 RegisterVec4::Swizzle dest_swz = {7,7,7,7};
971
972 for (unsigned i = 0; i < intr->num_components; ++i)
973 dest_swz[i] = i;
974
975 auto *ir = new LoadFromScratch(dest, dest_swz, addr, m_scratch_size);
976 emit_instruction(ir);
977 chain_scratch_read(ir);
978 } else {
979 int align = nir_intrinsic_align_mul(intr);
980 int align_offset = nir_intrinsic_align_offset(intr);
981
982
983 int offset = -1;
984 if (addr->as_literal()) {
985 offset = addr->as_literal()->value();
986 } else if (addr->as_inline_const()) {
987 auto il = addr->as_inline_const();
988 if (il->sel() == ALU_SRC_0)
989 offset = 0;
990 else if (il->sel() == ALU_SRC_1_INT)
991 offset = 1;
992 }
993
994 ScratchIOInstr *ir = nullptr;
995 if (offset >= 0) {
996 ir = new ScratchIOInstr(dest, offset, align, align_offset, 0xf, true);
997 } else {
998 auto addr_temp = value_factory().temp_register(0);
999 auto load_addr = new AluInstr(op1_mov, addr_temp, addr, AluInstr::last_write);
1000 load_addr->set_alu_flag(alu_no_schedule_bias);
1001 emit_instruction(load_addr);
1002
1003 ir = new ScratchIOInstr(dest, addr_temp, align, align_offset, 0xf,
1004 m_scratch_size, true);
1005 }
1006 emit_instruction(ir);
1007 }
1008
1009
1010 m_flags.set(sh_needs_scratch_space);
1011
1012 return true;
1013
1014 }
1015
emit_local_store(nir_intrinsic_instr * instr)1016 bool Shader::emit_local_store(nir_intrinsic_instr *instr)
1017 {
1018 unsigned write_mask = nir_intrinsic_write_mask(instr);
1019
1020 auto address = value_factory().src(instr->src[1], 0);
1021 int swizzle_base = 0;
1022 unsigned w = write_mask;
1023 while (!(w & 1)) {
1024 ++swizzle_base;
1025 w >>= 1;
1026 }
1027 write_mask = write_mask >> swizzle_base;
1028
1029 if ((write_mask & 3) != 3) {
1030 auto value = value_factory().src(instr->src[0], swizzle_base);
1031 emit_instruction(new LDSAtomicInstr(LDS_WRITE, nullptr, address, {value}));
1032 } else {
1033 auto value = value_factory().src(instr->src[0], swizzle_base);
1034 auto value1 = value_factory().src(instr->src[0], swizzle_base + 1);
1035 emit_instruction(new LDSAtomicInstr(LDS_WRITE_REL, nullptr, address, {value, value1}));
1036 }
1037 return true;
1038 }
1039
emit_local_load(nir_intrinsic_instr * instr)1040 bool Shader::emit_local_load(nir_intrinsic_instr* instr)
1041 {
1042 auto address = value_factory().src_vec(instr->src[0], instr->num_components);
1043 auto dest_value = value_factory().dest_vec(instr->dest, instr->num_components);
1044 emit_instruction(new LDSReadInstr(dest_value, address));
1045 return true;
1046 }
1047
chain_scratch_read(Instr * instr)1048 void Shader::chain_scratch_read(Instr *instr)
1049 {
1050 m_chain_instr.apply(instr, &m_chain_instr.last_scratch_instr);
1051 }
1052
chain_ssbo_read(Instr * instr)1053 void Shader::chain_ssbo_read(Instr *instr)
1054 {
1055 m_chain_instr.apply(instr, &m_chain_instr.last_ssbo_instr);
1056 }
1057
emit_wait_ack()1058 bool Shader::emit_wait_ack()
1059 {
1060 start_new_block(0);
1061 emit_instruction(new ControlFlowInstr(ControlFlowInstr::cf_wait_ack));
1062 start_new_block(0);
1063 return true;
1064 }
1065
visit(ScratchIOInstr * instr)1066 void Shader::InstructionChain::visit(ScratchIOInstr *instr)
1067 {
1068 apply(instr, &last_scratch_instr);
1069 }
1070
visit(GDSInstr * instr)1071 void Shader::InstructionChain::visit(GDSInstr *instr)
1072 {
1073 apply(instr, &last_gds_instr);
1074 for (auto& loop : this_shader->m_loops) {
1075 loop->set_instr_flag(Instr::vpm);
1076 }
1077 }
1078
visit(RatInstr * instr)1079 void Shader::InstructionChain::visit(RatInstr *instr)
1080 {
1081 apply(instr, &last_ssbo_instr);
1082 for (auto& loop : this_shader->m_loops) {
1083 loop->set_instr_flag(Instr::vpm);
1084 }
1085
1086 if (prepare_mem_barrier)
1087 instr->set_ack();
1088
1089 if (this_shader->m_current_block->inc_rat_emitted() > 15)
1090 this_shader->start_new_block(0);
1091 }
1092
apply(Instr * current,Instr ** last)1093 void Shader::InstructionChain::apply(Instr *current, Instr **last) {
1094 if (*last)
1095 current->add_required_instr(*last);
1096 *last = current;
1097 }
1098
emit_instruction(PInst instr)1099 void Shader::emit_instruction(PInst instr)
1100 {
1101 sfn_log << SfnLog::instr << " " << *instr << "\n";
1102 instr->accept(m_chain_instr);
1103 m_current_block->push_back(instr);
1104 }
1105
load_uniform(nir_intrinsic_instr * intr)1106 bool Shader::load_uniform(nir_intrinsic_instr *intr)
1107 {
1108 auto literal = nir_src_as_const_value(intr->src[0]);
1109
1110 if (literal) {
1111 AluInstr *ir = nullptr;
1112 auto pin = intr->dest.is_ssa && nir_dest_num_components(intr->dest) == 1 ?
1113 pin_free : pin_none;
1114 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
1115
1116 sfn_log << SfnLog::io << "uniform "
1117 << intr->dest.ssa.index << " const["<< i << "]: "<< intr->const_index[i] << "\n";
1118
1119 auto uniform = value_factory().uniform(intr, i);
1120 ir = new AluInstr(op1_mov, value_factory().dest(intr->dest, i, pin),
1121 uniform, {alu_write});
1122 emit_instruction(ir);
1123 }
1124 if (ir)
1125 ir->set_alu_flag(alu_last_instr);
1126 return true;
1127 } else {
1128 auto addr = value_factory().src(intr->src[0], 0);
1129 return load_uniform_indirect(intr, addr, 16 * nir_intrinsic_base(intr), 0);
1130 }
1131 }
1132
load_uniform_indirect(nir_intrinsic_instr * intr,PVirtualValue addr,int offset,int buffer_id)1133 bool Shader::load_uniform_indirect(nir_intrinsic_instr *intr, PVirtualValue addr,
1134 int offset , int buffer_id)
1135 {
1136 auto addr_reg = addr->as_register();
1137 if (!addr) {
1138 auto tmp = value_factory().temp_register();
1139 emit_instruction(new AluInstr(op1_mov, tmp, addr, AluInstr::last_write));
1140 addr = tmp;
1141 }
1142
1143 RegisterVec4 dest = value_factory().dest_vec4(intr->dest, pin_group);
1144
1145 auto ir = new LoadFromBuffer(dest, {0,1,2,3}, addr_reg, offset, buffer_id,
1146 nullptr, fmt_32_32_32_32_float);
1147 emit_instruction(ir);
1148 m_flags.set(sh_indirect_const_file);
1149 return true;
1150 }
1151
emit_load_tcs_param_base(nir_intrinsic_instr * instr,int offset)1152 bool Shader::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset)
1153 {
1154 auto src = value_factory().temp_register();
1155 emit_instruction(new AluInstr(op1_mov, src, value_factory().zero(),
1156 AluInstr::last_write));
1157
1158 auto dest = value_factory().dest_vec4(instr->dest, pin_group);
1159 auto fetch = new LoadFromBuffer(dest, {0,1,2,3}, src, offset,
1160 R600_LDS_INFO_CONST_BUFFER, nullptr,
1161 fmt_32_32_32_32);
1162
1163 fetch->set_fetch_flag(LoadFromBuffer::srf_mode);
1164 emit_instruction(fetch);
1165
1166 return true;
1167 }
1168
emit_shader_clock(nir_intrinsic_instr * instr)1169 bool Shader::emit_shader_clock(nir_intrinsic_instr* instr)
1170 {
1171 auto& vf = value_factory();
1172 auto group = new AluGroup();
1173 group->add_instruction(new AluInstr(op1_mov, vf.dest(instr->dest, 0, pin_chan),
1174 vf.inline_const(ALU_SRC_TIME_LO, 0), AluInstr::write));
1175 group->add_instruction(new AluInstr(op1_mov, vf.dest(instr->dest, 1, pin_chan),
1176 vf.inline_const(ALU_SRC_TIME_HI, 0), AluInstr::last_write));
1177 emit_instruction(group);
1178 return true;
1179 }
1180
1181
emit_barrier(nir_intrinsic_instr * intr)1182 bool Shader::emit_barrier(nir_intrinsic_instr* intr)
1183 {
1184 (void)intr;
1185 /* Put barrier into it's own block, so that optimizers and the
1186 * scheduler don't move code */
1187 start_new_block(0);
1188 auto op = new AluInstr(op0_group_barrier, 0);
1189 op->set_alu_flag(alu_last_instr);
1190 emit_instruction(op);
1191 start_new_block(0);
1192 return true;
1193 }
1194
load_ubo(nir_intrinsic_instr * instr)1195 bool Shader::load_ubo(nir_intrinsic_instr *instr)
1196 {
1197 auto bufid = nir_src_as_const_value(instr->src[0]);
1198 auto buf_offset = nir_src_as_const_value(instr->src[1]);
1199
1200 if (!buf_offset) {
1201 /* TODO: if bufid is constant then this can also be solved by using the CF indes
1202 * on the ALU block, and this would probably make sense when there are more then one
1203 * loads with the same buffer ID. */
1204
1205 auto addr = value_factory().src(instr->src[1], 0)->as_register();
1206 RegisterVec4::Swizzle dest_swz {7,7,7,7};
1207 auto dest = value_factory().dest_vec4(instr->dest, pin_group);
1208
1209 for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
1210 dest_swz[i] = i + nir_intrinsic_component(instr);
1211 }
1212
1213 LoadFromBuffer *ir;
1214 if (bufid) {
1215 ir = new LoadFromBuffer(dest, dest_swz, addr, 0, 1 + bufid->u32,
1216 nullptr, fmt_32_32_32_32_float);
1217 } else {
1218 auto buffer_id = emit_load_to_register(value_factory().src(instr->src[0], 0));
1219 ir = new LoadFromBuffer(dest, dest_swz, addr, 0, 1, buffer_id,
1220 fmt_32_32_32_32_float);
1221 }
1222 emit_instruction(ir);
1223 return true;
1224 }
1225
1226 /* direct load using the constant cache */
1227 if (bufid) {
1228 int buf_cmp = nir_intrinsic_component(instr);
1229
1230 AluInstr *ir = nullptr;
1231 auto pin = instr->dest.is_ssa && nir_dest_num_components(instr->dest) == 1 ?
1232 pin_free : pin_none;
1233 for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
1234
1235 sfn_log << SfnLog::io << "UBO[" << bufid << "] "
1236 << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
1237
1238 auto uniform = value_factory().uniform(512 + buf_offset->u32, i + buf_cmp, bufid->u32 + 1);
1239 ir = new AluInstr(op1_mov, value_factory().dest(instr->dest, i, pin),
1240 uniform, {alu_write});
1241 emit_instruction(ir);
1242 }
1243 if (ir)
1244 ir->set_alu_flag(alu_last_instr);
1245 return true;
1246 } else {
1247 int buf_cmp = nir_intrinsic_component(instr);
1248 AluInstr *ir = nullptr;
1249 auto kc_id = value_factory().src(instr->src[0], 0);
1250
1251 for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
1252 int cmp = buf_cmp + i;
1253 auto u = new UniformValue(512 + buf_offset->u32, cmp, kc_id);
1254 auto dest = value_factory().dest(instr->dest, i, pin_none);
1255 ir = new AluInstr(op1_mov, dest, u, AluInstr::write);
1256 emit_instruction(ir);
1257 }
1258 if (ir)
1259 ir->set_alu_flag(alu_last_instr);
1260 m_indirect_files |= 1 << TGSI_FILE_CONSTANT;
1261 return true;
1262 }
1263 }
1264
start_new_block(int depth)1265 void Shader::start_new_block(int depth)
1266 {
1267 int depth_offset = m_current_block ? m_current_block->nesting_depth() : 0;
1268 m_current_block = new Block(depth + depth_offset, m_next_block++);
1269 m_root.push_back(m_current_block);
1270 }
1271
emit_simple_mov(nir_dest & dest,int chan,PVirtualValue src,Pin pin)1272 bool Shader::emit_simple_mov(nir_dest& dest, int chan, PVirtualValue src, Pin pin)
1273 {
1274 auto dst = value_factory().dest(dest, chan, pin);
1275 emit_instruction(new AluInstr(op1_mov, dst, src, AluInstr::last_write));
1276 return true;
1277 }
1278
print(std::ostream & os) const1279 void Shader::print(std::ostream& os) const
1280 {
1281 print_header(os);
1282
1283 for (auto& [dummy, i]: m_inputs) {
1284 i.print(os);
1285 os << "\n";
1286 }
1287
1288 for (auto& [dummy, o]: m_outputs) {
1289 o.print(os);
1290 os << "\n";
1291 }
1292
1293 os << "SHADER\n";
1294 for (auto& b : m_root)
1295 b->print(os);
1296 }
1297
1298 const char *chip_class_names[] = {
1299 "R600",
1300 "R700",
1301 "EVERGREEN",
1302 "CAYMAN"
1303 };
1304
print_header(std::ostream & os) const1305 void Shader::print_header(std::ostream& os) const
1306 {
1307 assert(m_chip_class <= ISA_CC_CAYMAN);
1308 os << m_type_id << "\n";
1309 os << "CHIPCLASS " << chip_class_names[m_chip_class] << "\n";
1310 print_properties(os);
1311 }
1312
print_properties(std::ostream & os) const1313 void Shader::print_properties(std::ostream& os) const
1314 {
1315 do_print_properties(os);
1316 }
1317
equal_to(const Shader & other) const1318 bool Shader::equal_to(const Shader& other) const
1319 {
1320 if (m_root.size() != other.m_root.size())
1321 return false;
1322 return std::inner_product(m_root.begin(), m_root.end(),
1323 other.m_root.begin(),
1324 true,
1325 [](bool lhs, bool rhs){ return lhs & rhs;},
1326 [](const Block::Pointer lhs, const Block::Pointer rhs) -> bool {
1327 return lhs->is_equal_to(*rhs);
1328 });
1329 }
1330
get_shader_info(r600_shader * sh_info)1331 void Shader::get_shader_info(r600_shader *sh_info)
1332 {
1333 sh_info->ninput = m_inputs.size();
1334 int lds_pos = 0;
1335 int input_array_array_loc = 0;
1336 for (auto& [index, info] : m_inputs) {
1337 r600_shader_io& io = sh_info->input[input_array_array_loc++];
1338
1339 io.sid = info.sid();
1340 io.gpr = info.gpr();
1341 io.spi_sid = info.spi_sid();
1342 io.ij_index = info.ij_index();
1343 io.name = info.name();
1344 io.interpolate = info.interpolator();
1345 io.interpolate_location = info.interpolate_loc();
1346 if (info.need_lds_pos())
1347 io.lds_pos = lds_pos++;
1348 else
1349 io.lds_pos = 0;
1350
1351 io.ring_offset = info.ring_offset();
1352 io.uses_interpolate_at_centroid = info.uses_interpolate_at_centroid();
1353
1354 sfn_log << SfnLog::io << "Emit Input [" << index << "] sid:" << io.sid << " spi_sid:" << io.spi_sid << "\n";
1355 assert(io.spi_sid >= 0);
1356 }
1357
1358 sh_info->nlds = lds_pos;
1359 sh_info->noutput = m_outputs.size();
1360 sh_info->num_loops = m_nloops;
1361 int output_array_array_loc = 0;
1362
1363 for (auto& [index, info] : m_outputs) {
1364 r600_shader_io& io = sh_info->output[output_array_array_loc++];
1365 io.sid = info.sid();
1366 io.gpr = info.gpr();
1367 io.spi_sid = info.spi_sid();
1368 io.name = info.name();
1369 io.write_mask = info.writemask();
1370
1371 sfn_log << SfnLog::io << "Emit output[" << index << "] sid:" << io.sid << " spi_sid:" << io.spi_sid << "\n";
1372 assert(io.spi_sid >= 0);
1373 }
1374
1375 sh_info->nhwatomic = m_nhwatomic;
1376 sh_info->atomic_base = m_atomic_base;
1377 sh_info->nhwatomic_ranges = m_atomics.size();
1378 for (unsigned i = 0; i < m_atomics.size(); ++i)
1379 sh_info->atomics[i] = m_atomics[i];
1380
1381 if (m_flags.test(sh_indirect_const_file))
1382 sh_info->indirect_files |= 1 << TGSI_FILE_CONSTANT;
1383
1384 if (m_flags.test(sh_indirect_atomic))
1385 sh_info->indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
1386
1387 sh_info->uses_tex_buffers = m_flags.test(sh_uses_tex_buffer);
1388
1389 value_factory().get_shader_info(sh_info);
1390
1391 sh_info->needs_scratch_space = m_flags.test(sh_needs_scratch_space);
1392 sh_info->uses_images = m_flags.test(sh_uses_images);
1393 sh_info->uses_atomics = m_flags.test(sh_uses_atomics);
1394 sh_info->has_txq_cube_array_z_comp = m_flags.test(sh_txs_cube_array_comp);
1395 sh_info->indirect_files = m_indirect_files;
1396 do_get_shader_info(sh_info);
1397 }
1398
atomic_update()1399 PRegister Shader::atomic_update()
1400 {
1401 assert(m_atomic_update);
1402 return m_atomic_update;
1403 }
1404
remap_atomic_base(int base)1405 int Shader::remap_atomic_base(int base)
1406 {
1407 return m_atomic_base_map[base];
1408 }
1409
do_get_shader_info(r600_shader * sh_info)1410 void Shader::do_get_shader_info(r600_shader *sh_info)
1411 {
1412 sh_info->uses_atomics = m_nhwatomic > 0;
1413 }
1414
1415
input(int base) const1416 const ShaderInput& Shader::input(int base) const
1417 {
1418 auto io = m_inputs.find(base);
1419 assert(io != m_inputs.end());
1420 return io->second;
1421 }
1422
output(int base) const1423 const ShaderOutput& Shader::output(int base) const
1424 {
1425 auto io = m_outputs.find(base);
1426 assert(io != m_outputs.end());
1427 return io->second;
1428 }
1429
prepare_live_range_map()1430 LiveRangeMap Shader::prepare_live_range_map()
1431 {
1432 return m_instr_factory->value_factory().prepare_live_range_map();
1433
1434 }
1435
reset_function(ShaderBlocks & new_root)1436 void Shader::reset_function(ShaderBlocks& new_root)
1437 {
1438 std::swap(m_root, new_root);
1439 }
1440
finalize()1441 void Shader::finalize()
1442 {
1443 do_finalize();
1444 }
1445
do_finalize()1446 void Shader::do_finalize()
1447 {
1448
1449 }
1450
1451 }
1452