1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2019 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_nir.h"
28 #include "nir_builder.h"
29
30 #include "../r600_pipe.h"
31 #include "../r600_shader.h"
32
33 #include "sfn_instruction_tex.h"
34
35 #include "sfn_shader_vertex.h"
36 #include "sfn_shader_fragment.h"
37 #include "sfn_shader_geometry.h"
38 #include "sfn_shader_compute.h"
39 #include "sfn_shader_tcs.h"
40 #include "sfn_shader_tess_eval.h"
41 #include "sfn_nir_lower_fs_out_to_vector.h"
42 #include "sfn_ir_to_assembly.h"
43 #include "sfn_nir_lower_alu.h"
44
45 #include <vector>
46
47 namespace r600 {
48
49 using std::vector;
50
51
NirLowerInstruction()52 NirLowerInstruction::NirLowerInstruction():
53 b(nullptr)
54 {
55
56 }
57
filter_instr(const nir_instr * instr,const void * data)58 bool NirLowerInstruction::filter_instr(const nir_instr *instr, const void *data)
59 {
60 auto me = reinterpret_cast<const NirLowerInstruction*>(data);
61 return me->filter(instr);
62 }
63
lower_instr(nir_builder * b,nir_instr * instr,void * data)64 nir_ssa_def *NirLowerInstruction::lower_instr(nir_builder *b, nir_instr *instr, void *data)
65 {
66 auto me = reinterpret_cast<NirLowerInstruction*>(data);
67 me->set_builder(b);
68 return me->lower(instr);
69 }
70
run(nir_shader * shader)71 bool NirLowerInstruction::run(nir_shader *shader)
72 {
73 return nir_shader_lower_instructions(shader,
74 filter_instr,
75 lower_instr,
76 (void *)this);
77 }
78
79
ShaderFromNir()80 ShaderFromNir::ShaderFromNir():sh(nullptr),
81 chip_class(CLASS_UNKNOWN),
82 m_current_if_id(0),
83 m_current_loop_id(0),
84 scratch_size(0)
85 {
86 }
87
lower(const nir_shader * shader,r600_pipe_shader * pipe_shader,r600_pipe_shader_selector * sel,r600_shader_key & key,struct r600_shader * gs_shader,enum chip_class _chip_class)88 bool ShaderFromNir::lower(const nir_shader *shader, r600_pipe_shader *pipe_shader,
89 r600_pipe_shader_selector *sel, r600_shader_key& key,
90 struct r600_shader* gs_shader, enum chip_class _chip_class)
91 {
92 sh = shader;
93 chip_class = _chip_class;
94 assert(sh);
95
96 switch (shader->info.stage) {
97 case MESA_SHADER_VERTEX:
98 impl.reset(new VertexShaderFromNir(pipe_shader, *sel, key, gs_shader, chip_class));
99 break;
100 case MESA_SHADER_TESS_CTRL:
101 sfn_log << SfnLog::trans << "Start TCS\n";
102 impl.reset(new TcsShaderFromNir(pipe_shader, *sel, key, chip_class));
103 break;
104 case MESA_SHADER_TESS_EVAL:
105 sfn_log << SfnLog::trans << "Start TESS_EVAL\n";
106 impl.reset(new TEvalShaderFromNir(pipe_shader, *sel, key, gs_shader, chip_class));
107 break;
108 case MESA_SHADER_GEOMETRY:
109 sfn_log << SfnLog::trans << "Start GS\n";
110 impl.reset(new GeometryShaderFromNir(pipe_shader, *sel, key, chip_class));
111 break;
112 case MESA_SHADER_FRAGMENT:
113 sfn_log << SfnLog::trans << "Start FS\n";
114 impl.reset(new FragmentShaderFromNir(*shader, pipe_shader->shader, *sel, key, chip_class));
115 break;
116 case MESA_SHADER_COMPUTE:
117 sfn_log << SfnLog::trans << "Start CS\n";
118 impl.reset(new ComputeShaderFromNir(pipe_shader, *sel, key, chip_class));
119 break;
120 default:
121 return false;
122 }
123
124 sfn_log << SfnLog::trans << "Process declarations\n";
125 if (!process_declaration())
126 return false;
127
128 // at this point all functions should be inlined
129 const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sh->functions));
130
131 sfn_log << SfnLog::trans << "Scan shader\n";
132
133 if (sfn_log.has_debug_flag(SfnLog::instr))
134 nir_print_shader(const_cast<nir_shader *>(shader), stderr);
135
136 nir_foreach_block(block, func->impl) {
137 nir_foreach_instr(instr, block) {
138 if (!impl->scan_instruction(instr)) {
139 fprintf(stderr, "Unhandled sysvalue access ");
140 nir_print_instr(instr, stderr);
141 fprintf(stderr, "\n");
142 return false;
143 }
144 }
145 }
146
147 sfn_log << SfnLog::trans << "Reserve registers\n";
148 if (!impl->allocate_reserved_registers()) {
149 return false;
150 }
151
152 ValuePool::array_list arrays;
153 sfn_log << SfnLog::trans << "Allocate local registers\n";
154 foreach_list_typed(nir_register, reg, node, &func->impl->registers) {
155 impl->allocate_local_register(*reg, arrays);
156 }
157
158 sfn_log << SfnLog::trans << "Emit shader start\n";
159 impl->allocate_arrays(arrays);
160
161 impl->emit_shader_start();
162
163 sfn_log << SfnLog::trans << "Process shader \n";
164 foreach_list_typed(nir_cf_node, node, node, &func->impl->body) {
165 if (!process_cf_node(node))
166 return false;
167 }
168
169 // Add optimizations here
170 sfn_log << SfnLog::trans << "Finalize\n";
171 impl->finalize();
172
173 impl->get_array_info(pipe_shader->shader);
174
175 if (!sfn_log.has_debug_flag(SfnLog::nomerge)) {
176 sfn_log << SfnLog::trans << "Merge registers\n";
177 impl->remap_registers();
178 }
179
180 sfn_log << SfnLog::trans << "Finished translating to R600 IR\n";
181 return true;
182 }
183
shader() const184 Shader ShaderFromNir::shader() const
185 {
186 return Shader{impl->m_output, impl->get_temp_registers()};
187 }
188
189
process_cf_node(nir_cf_node * node)190 bool ShaderFromNir::process_cf_node(nir_cf_node *node)
191 {
192 SFN_TRACE_FUNC(SfnLog::flow, "CF");
193 switch (node->type) {
194 case nir_cf_node_block:
195 return process_block(nir_cf_node_as_block(node));
196 case nir_cf_node_if:
197 return process_if(nir_cf_node_as_if(node));
198 case nir_cf_node_loop:
199 return process_loop(nir_cf_node_as_loop(node));
200 default:
201 return false;
202 }
203 }
204
process_if(nir_if * if_stmt)205 bool ShaderFromNir::process_if(nir_if *if_stmt)
206 {
207 SFN_TRACE_FUNC(SfnLog::flow, "IF");
208
209 if (!impl->emit_if_start(m_current_if_id, if_stmt))
210 return false;
211
212 int if_id = m_current_if_id++;
213 m_if_stack.push(if_id);
214
215 foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list)
216 if (!process_cf_node(n)) return false;
217
218 if (!if_stmt->then_list.is_empty()) {
219 if (!impl->emit_else_start(if_id))
220 return false;
221
222 foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list)
223 if (!process_cf_node(n)) return false;
224 }
225
226 if (!impl->emit_ifelse_end(if_id))
227 return false;
228
229 m_if_stack.pop();
230 return true;
231 }
232
process_loop(nir_loop * node)233 bool ShaderFromNir::process_loop(nir_loop *node)
234 {
235 SFN_TRACE_FUNC(SfnLog::flow, "LOOP");
236 int loop_id = m_current_loop_id++;
237
238 if (!impl->emit_loop_start(loop_id))
239 return false;
240
241 foreach_list_typed(nir_cf_node, n, node, &node->body)
242 if (!process_cf_node(n)) return false;
243
244 if (!impl->emit_loop_end(loop_id))
245 return false;
246
247 return true;
248 }
249
process_block(nir_block * block)250 bool ShaderFromNir::process_block(nir_block *block)
251 {
252 SFN_TRACE_FUNC(SfnLog::flow, "BLOCK");
253 nir_foreach_instr(instr, block) {
254 int r = emit_instruction(instr);
255 if (!r) {
256 sfn_log << SfnLog::err << "R600: Unsupported instruction: "
257 << *instr << "\n";
258 return false;
259 }
260 }
261 return true;
262 }
263
264
~ShaderFromNir()265 ShaderFromNir::~ShaderFromNir()
266 {
267 }
268
processor_type() const269 pipe_shader_type ShaderFromNir::processor_type() const
270 {
271 return impl->m_processor_type;
272 }
273
274
emit_instruction(nir_instr * instr)275 bool ShaderFromNir::emit_instruction(nir_instr *instr)
276 {
277 assert(impl);
278
279 sfn_log << SfnLog::instr << "Read instruction " << *instr << "\n";
280
281 switch (instr->type) {
282 case nir_instr_type_alu:
283 return impl->emit_alu_instruction(instr);
284 case nir_instr_type_deref:
285 return impl->emit_deref_instruction(nir_instr_as_deref(instr));
286 case nir_instr_type_intrinsic:
287 return impl->emit_intrinsic_instruction(nir_instr_as_intrinsic(instr));
288 case nir_instr_type_load_const: /* const values are loaded when needed */
289 return true;
290 case nir_instr_type_tex:
291 return impl->emit_tex_instruction(instr);
292 case nir_instr_type_jump:
293 return impl->emit_jump_instruction(nir_instr_as_jump(instr));
294 default:
295 fprintf(stderr, "R600: %s: ShaderFromNir Unsupported instruction: type %d:'", __func__, instr->type);
296 nir_print_instr(instr, stderr);
297 fprintf(stderr, "'\n");
298 return false;
299 case nir_instr_type_ssa_undef:
300 return impl->create_undef(nir_instr_as_ssa_undef(instr));
301 return true;
302 }
303 }
304
process_declaration()305 bool ShaderFromNir::process_declaration()
306 {
307 impl->set_shader_info(sh);
308
309 if (!impl->scan_inputs_read(sh))
310 return false;
311
312 // scan declarations
313 nir_foreach_variable_with_modes(variable, sh, nir_var_uniform |
314 nir_var_mem_ubo |
315 nir_var_mem_ssbo) {
316 if (!impl->process_uniforms(variable)) {
317 fprintf(stderr, "R600: error parsing outputs variable %s\n", variable->name);
318 return false;
319 }
320 }
321
322 return true;
323 }
324
shader_ir() const325 const std::vector<InstructionBlock>& ShaderFromNir::shader_ir() const
326 {
327 assert(impl);
328 return impl->m_output;
329 }
330
331
~AssemblyFromShader()332 AssemblyFromShader::~AssemblyFromShader()
333 {
334 }
335
lower(const std::vector<InstructionBlock> & ir)336 bool AssemblyFromShader::lower(const std::vector<InstructionBlock>& ir)
337 {
338 return do_lower(ir);
339 }
340
341 static void
r600_nir_lower_scratch_address_impl(nir_builder * b,nir_intrinsic_instr * instr)342 r600_nir_lower_scratch_address_impl(nir_builder *b, nir_intrinsic_instr *instr)
343 {
344 b->cursor = nir_before_instr(&instr->instr);
345
346 int address_index = 0;
347 int align;
348
349 if (instr->intrinsic == nir_intrinsic_store_scratch) {
350 align = instr->src[0].ssa->num_components;
351 address_index = 1;
352 } else{
353 align = instr->dest.ssa.num_components;
354 }
355
356 nir_ssa_def *address = instr->src[address_index].ssa;
357 nir_ssa_def *new_address = nir_ishr(b, address, nir_imm_int(b, 4 * align));
358
359 nir_instr_rewrite_src(&instr->instr, &instr->src[address_index],
360 nir_src_for_ssa(new_address));
361 }
362
r600_lower_scratch_addresses(nir_shader * shader)363 bool r600_lower_scratch_addresses(nir_shader *shader)
364 {
365 bool progress = false;
366 nir_foreach_function(function, shader) {
367 nir_builder build;
368 nir_builder_init(&build, function->impl);
369
370 nir_foreach_block(block, function->impl) {
371 nir_foreach_instr(instr, block) {
372 if (instr->type != nir_instr_type_intrinsic)
373 continue;
374 nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
375 if (op->intrinsic != nir_intrinsic_load_scratch &&
376 op->intrinsic != nir_intrinsic_store_scratch)
377 continue;
378 r600_nir_lower_scratch_address_impl(&build, op);
379 progress = true;
380 }
381 }
382 }
383 return progress;
384 }
385
386 static void
insert_uniform_sorted(struct exec_list * var_list,nir_variable * new_var)387 insert_uniform_sorted(struct exec_list *var_list, nir_variable *new_var)
388 {
389 nir_foreach_variable_in_list(var, var_list) {
390 if (var->data.binding > new_var->data.binding ||
391 (var->data.binding == new_var->data.binding &&
392 var->data.offset > new_var->data.offset)) {
393 exec_node_insert_node_before(&var->node, &new_var->node);
394 return;
395 }
396 }
397 exec_list_push_tail(var_list, &new_var->node);
398 }
399
sort_uniforms(nir_shader * shader)400 void sort_uniforms(nir_shader *shader)
401 {
402 struct exec_list new_list;
403 exec_list_make_empty(&new_list);
404
405 nir_foreach_uniform_variable_safe(var, shader) {
406 exec_node_remove(&var->node);
407 insert_uniform_sorted(&new_list, var);
408 }
409 exec_list_append(&shader->variables, &new_list);
410 }
411
412 static void
insert_fsoutput_sorted(struct exec_list * var_list,nir_variable * new_var)413 insert_fsoutput_sorted(struct exec_list *var_list, nir_variable *new_var)
414 {
415
416 nir_foreach_variable_in_list(var, var_list) {
417 if (var->data.location > new_var->data.location ||
418 (var->data.location == new_var->data.location &&
419 var->data.index > new_var->data.index)) {
420 exec_node_insert_node_before(&var->node, &new_var->node);
421 return;
422 }
423 }
424
425 exec_list_push_tail(var_list, &new_var->node);
426 }
427
sort_fsoutput(nir_shader * shader)428 void sort_fsoutput(nir_shader *shader)
429 {
430 struct exec_list new_list;
431 exec_list_make_empty(&new_list);
432
433 nir_foreach_shader_out_variable_safe(var, shader) {
434 exec_node_remove(&var->node);
435 insert_fsoutput_sorted(&new_list, var);
436 }
437
438 unsigned driver_location = 0;
439 nir_foreach_variable_in_list(var, &new_list)
440 var->data.driver_location = driver_location++;
441
442 exec_list_append(&shader->variables, &new_list);
443 }
444
445 }
446
447 static nir_intrinsic_op
r600_map_atomic(nir_intrinsic_op op)448 r600_map_atomic(nir_intrinsic_op op)
449 {
450 switch (op) {
451 case nir_intrinsic_atomic_counter_read_deref:
452 return nir_intrinsic_atomic_counter_read;
453 case nir_intrinsic_atomic_counter_inc_deref:
454 return nir_intrinsic_atomic_counter_inc;
455 case nir_intrinsic_atomic_counter_pre_dec_deref:
456 return nir_intrinsic_atomic_counter_pre_dec;
457 case nir_intrinsic_atomic_counter_post_dec_deref:
458 return nir_intrinsic_atomic_counter_post_dec;
459 case nir_intrinsic_atomic_counter_add_deref:
460 return nir_intrinsic_atomic_counter_add;
461 case nir_intrinsic_atomic_counter_min_deref:
462 return nir_intrinsic_atomic_counter_min;
463 case nir_intrinsic_atomic_counter_max_deref:
464 return nir_intrinsic_atomic_counter_max;
465 case nir_intrinsic_atomic_counter_and_deref:
466 return nir_intrinsic_atomic_counter_and;
467 case nir_intrinsic_atomic_counter_or_deref:
468 return nir_intrinsic_atomic_counter_or;
469 case nir_intrinsic_atomic_counter_xor_deref:
470 return nir_intrinsic_atomic_counter_xor;
471 case nir_intrinsic_atomic_counter_exchange_deref:
472 return nir_intrinsic_atomic_counter_exchange;
473 case nir_intrinsic_atomic_counter_comp_swap_deref:
474 return nir_intrinsic_atomic_counter_comp_swap;
475 default:
476 return nir_num_intrinsics;
477 }
478 }
479
480 static bool
r600_lower_deref_instr(nir_builder * b,nir_instr * instr_,UNUSED void * cb_data)481 r600_lower_deref_instr(nir_builder *b, nir_instr *instr_, UNUSED void *cb_data)
482 {
483 if (instr_->type != nir_instr_type_intrinsic)
484 return false;
485
486 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(instr_);
487
488 nir_intrinsic_op op = r600_map_atomic(instr->intrinsic);
489 if (nir_num_intrinsics == op)
490 return false;
491
492 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
493 nir_variable *var = nir_deref_instr_get_variable(deref);
494
495 if (var->data.mode != nir_var_uniform &&
496 var->data.mode != nir_var_mem_ssbo &&
497 var->data.mode != nir_var_mem_shared)
498 return false; /* atomics passed as function arguments can't be lowered */
499
500 const unsigned idx = var->data.binding;
501
502 b->cursor = nir_before_instr(&instr->instr);
503
504 nir_ssa_def *offset = nir_imm_int(b, var->data.index);
505 for (nir_deref_instr *d = deref; d->deref_type != nir_deref_type_var;
506 d = nir_deref_instr_parent(d)) {
507 assert(d->deref_type == nir_deref_type_array);
508 assert(d->arr.index.is_ssa);
509
510 unsigned array_stride = 1;
511 if (glsl_type_is_array(d->type))
512 array_stride *= glsl_get_aoa_size(d->type);
513
514 offset = nir_iadd(b, offset, nir_imul(b, d->arr.index.ssa,
515 nir_imm_int(b, array_stride)));
516 }
517
518 /* Since the first source is a deref and the first source in the lowered
519 * instruction is the offset, we can just swap it out and change the
520 * opcode.
521 */
522 instr->intrinsic = op;
523 nir_instr_rewrite_src(&instr->instr, &instr->src[0],
524 nir_src_for_ssa(offset));
525 nir_intrinsic_set_base(instr, idx);
526
527 nir_deref_instr_remove_if_unused(deref);
528
529 return true;
530 }
531
532 static bool
r600_nir_lower_atomics(nir_shader * shader)533 r600_nir_lower_atomics(nir_shader *shader)
534 {
535 /* First re-do the offsets, in Hardware we start at zero for each new
536 * binding, and we use an offset of one per counter */
537 int current_binding = -1;
538 int current_offset = 0;
539 nir_foreach_variable_with_modes(var, shader, nir_var_uniform) {
540 if (!var->type->contains_atomic())
541 continue;
542
543 if (current_binding == (int)var->data.binding) {
544 var->data.index = current_offset;
545 current_offset += var->type->atomic_size() / ATOMIC_COUNTER_SIZE;
546 } else {
547 current_binding = var->data.binding;
548 var->data.index = 0;
549 current_offset = var->type->atomic_size() / ATOMIC_COUNTER_SIZE;
550 }
551 }
552
553 return nir_shader_instructions_pass(shader, r600_lower_deref_instr,
554 nir_metadata_block_index |
555 nir_metadata_dominance,
556 NULL);
557 }
558 using r600::r600_nir_lower_int_tg4;
559 using r600::r600_lower_scratch_addresses;
560 using r600::r600_lower_fs_out_to_vector;
561 using r600::r600_lower_ubo_to_align16;
562
563 int
r600_glsl_type_size(const struct glsl_type * type,bool is_bindless)564 r600_glsl_type_size(const struct glsl_type *type, bool is_bindless)
565 {
566 return glsl_count_vec4_slots(type, false, is_bindless);
567 }
568
569 void
r600_get_natural_size_align_bytes(const struct glsl_type * type,unsigned * size,unsigned * align)570 r600_get_natural_size_align_bytes(const struct glsl_type *type,
571 unsigned *size, unsigned *align)
572 {
573 if (type->base_type != GLSL_TYPE_ARRAY) {
574 *align = 1;
575 *size = 1;
576 } else {
577 unsigned elem_size, elem_align;
578 glsl_get_natural_size_align_bytes(type->fields.array,
579 &elem_size, &elem_align);
580 *align = 1;
581 *size = type->length;
582 }
583 }
584
585 static bool
r600_lower_shared_io_impl(nir_function * func)586 r600_lower_shared_io_impl(nir_function *func)
587 {
588 nir_builder b;
589 nir_builder_init(&b, func->impl);
590
591 bool progress = false;
592 nir_foreach_block(block, func->impl) {
593 nir_foreach_instr_safe(instr, block) {
594
595 if (instr->type != nir_instr_type_intrinsic)
596 continue;
597
598 nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
599 if (op->intrinsic != nir_intrinsic_load_shared &&
600 op->intrinsic != nir_intrinsic_store_shared)
601 continue;
602
603 b.cursor = nir_before_instr(instr);
604
605 if (op->intrinsic == nir_intrinsic_load_shared) {
606 nir_ssa_def *addr = op->src[0].ssa;
607
608 switch (nir_dest_num_components(op->dest)) {
609 case 2: {
610 auto addr2 = nir_iadd_imm(&b, addr, 4);
611 addr = nir_vec2(&b, addr, addr2);
612 break;
613 }
614 case 3: {
615 auto addr2 = nir_iadd(&b, addr, nir_imm_ivec2(&b, 4, 8));
616 addr = nir_vec3(&b, addr,
617 nir_channel(&b, addr2, 0),
618 nir_channel(&b, addr2, 1));
619 break;
620 }
621 case 4: {
622 addr = nir_iadd(&b, addr, nir_imm_ivec4(&b, 0, 4, 8, 12));
623 break;
624 }
625 }
626
627 auto load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_local_shared_r600);
628 load->num_components = nir_dest_num_components(op->dest);
629 load->src[0] = nir_src_for_ssa(addr);
630 nir_ssa_dest_init(&load->instr, &load->dest,
631 load->num_components, 32, NULL);
632 nir_ssa_def_rewrite_uses(&op->dest.ssa, &load->dest.ssa);
633 nir_builder_instr_insert(&b, &load->instr);
634 } else {
635 nir_ssa_def *addr = op->src[1].ssa;
636 for (int i = 0; i < 2; ++i) {
637 unsigned test_mask = (0x3 << 2 * i);
638 if (!(nir_intrinsic_write_mask(op) & test_mask))
639 continue;
640
641 auto store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_local_shared_r600);
642 unsigned writemask = nir_intrinsic_write_mask(op) & test_mask;
643 nir_intrinsic_set_write_mask(store, writemask);
644 store->src[0] = nir_src_for_ssa(op->src[0].ssa);
645 store->num_components = store->src[0].ssa->num_components;
646 bool start_even = (writemask & (1u << (2 * i)));
647
648 auto addr2 = nir_iadd(&b, addr, nir_imm_int(&b, 8 * i + (start_even ? 0 : 4)));
649 store->src[1] = nir_src_for_ssa(addr2);
650
651 nir_builder_instr_insert(&b, &store->instr);
652 }
653 }
654 nir_instr_remove(instr);
655 progress = true;
656 }
657 }
658 return progress;
659 }
660
661 static bool
r600_lower_shared_io(nir_shader * nir)662 r600_lower_shared_io(nir_shader *nir)
663 {
664 bool progress=false;
665 nir_foreach_function(function, nir) {
666 if (function->impl &&
667 r600_lower_shared_io_impl(function))
668 progress = true;
669 }
670 return progress;
671 }
672
673
674 static nir_ssa_def *
r600_lower_fs_pos_input_impl(nir_builder * b,nir_instr * instr,void * _options)675 r600_lower_fs_pos_input_impl(nir_builder *b, nir_instr *instr, void *_options)
676 {
677 auto old_ir = nir_instr_as_intrinsic(instr);
678 auto load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
679 nir_ssa_dest_init(&load->instr, &load->dest,
680 old_ir->dest.ssa.num_components, old_ir->dest.ssa.bit_size, NULL);
681 nir_intrinsic_set_io_semantics(load, nir_intrinsic_io_semantics(old_ir));
682
683 nir_intrinsic_set_base(load, nir_intrinsic_base(old_ir));
684 nir_intrinsic_set_component(load, nir_intrinsic_component(old_ir));
685 nir_intrinsic_set_dest_type(load, nir_type_float32);
686 load->num_components = old_ir->num_components;
687 load->src[0] = old_ir->src[1];
688 nir_builder_instr_insert(b, &load->instr);
689 return &load->dest.ssa;
690 }
691
r600_lower_fs_pos_input_filter(const nir_instr * instr,const void * _options)692 bool r600_lower_fs_pos_input_filter(const nir_instr *instr, const void *_options)
693 {
694 if (instr->type != nir_instr_type_intrinsic)
695 return false;
696
697 auto ir = nir_instr_as_intrinsic(instr);
698 if (ir->intrinsic != nir_intrinsic_load_interpolated_input)
699 return false;
700
701 return nir_intrinsic_io_semantics(ir).location == VARYING_SLOT_POS;
702 }
703
704 /* Strip the interpolator specification, it is not needed and irritates */
r600_lower_fs_pos_input(nir_shader * shader)705 bool r600_lower_fs_pos_input(nir_shader *shader)
706 {
707 return nir_shader_lower_instructions(shader,
708 r600_lower_fs_pos_input_filter,
709 r600_lower_fs_pos_input_impl,
710 nullptr);
711 };
712
713 static bool
optimize_once(nir_shader * shader,bool vectorize)714 optimize_once(nir_shader *shader, bool vectorize)
715 {
716 bool progress = false;
717 NIR_PASS(progress, shader, nir_lower_vars_to_ssa);
718 NIR_PASS(progress, shader, nir_copy_prop);
719 NIR_PASS(progress, shader, nir_opt_dce);
720 NIR_PASS(progress, shader, nir_opt_algebraic);
721 NIR_PASS(progress, shader, nir_opt_constant_folding);
722 NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
723 if (vectorize)
724 NIR_PASS(progress, shader, nir_opt_vectorize, NULL, NULL);
725
726 NIR_PASS(progress, shader, nir_opt_remove_phis);
727
728 if (nir_opt_trivial_continues(shader)) {
729 progress = true;
730 NIR_PASS(progress, shader, nir_copy_prop);
731 NIR_PASS(progress, shader, nir_opt_dce);
732 }
733
734 NIR_PASS(progress, shader, nir_opt_if, false);
735 NIR_PASS(progress, shader, nir_opt_dead_cf);
736 NIR_PASS(progress, shader, nir_opt_cse);
737 NIR_PASS(progress, shader, nir_opt_peephole_select, 200, true, true);
738
739 NIR_PASS(progress, shader, nir_opt_conditional_discard);
740 NIR_PASS(progress, shader, nir_opt_dce);
741 NIR_PASS(progress, shader, nir_opt_undef);
742 return progress;
743 }
744
has_saturate(const nir_function * func)745 bool has_saturate(const nir_function *func)
746 {
747 nir_foreach_block(block, func->impl) {
748 nir_foreach_instr(instr, block) {
749 if (instr->type == nir_instr_type_alu) {
750 auto alu = nir_instr_as_alu(instr);
751 if (alu->dest.saturate)
752 return true;
753 }
754 }
755 }
756 return false;
757 }
758
759 extern "C"
r600_lower_to_scalar_instr_filter(const nir_instr * instr,const void *)760 bool r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *)
761 {
762 if (instr->type != nir_instr_type_alu)
763 return true;
764
765 auto alu = nir_instr_as_alu(instr);
766 switch (alu->op) {
767 case nir_op_bany_fnequal3:
768 case nir_op_bany_fnequal4:
769 case nir_op_ball_fequal3:
770 case nir_op_ball_fequal4:
771 case nir_op_bany_inequal3:
772 case nir_op_bany_inequal4:
773 case nir_op_ball_iequal3:
774 case nir_op_ball_iequal4:
775 case nir_op_fdot2:
776 case nir_op_fdot3:
777 case nir_op_fdot4:
778 case nir_op_cube_r600:
779 return false;
780 case nir_op_bany_fnequal2:
781 case nir_op_ball_fequal2:
782 case nir_op_bany_inequal2:
783 case nir_op_ball_iequal2:
784 return nir_src_bit_size(alu->src[0].src) != 64;
785 default:
786 return true;
787 }
788 }
789
r600_shader_from_nir(struct r600_context * rctx,struct r600_pipe_shader * pipeshader,r600_shader_key * key)790 int r600_shader_from_nir(struct r600_context *rctx,
791 struct r600_pipe_shader *pipeshader,
792 r600_shader_key *key)
793 {
794 char filename[4000];
795 struct r600_pipe_shader_selector *sel = pipeshader->selector;
796
797 bool lower_64bit = ((sel->nir->options->lower_int64_options ||
798 sel->nir->options->lower_doubles_options) &&
799 (sel->nir->info.bit_sizes_float | sel->nir->info.bit_sizes_int) & 64);
800
801 r600::ShaderFromNir convert;
802
803 if (rctx->screen->b.debug_flags & DBG_PREOPT_IR) {
804 fprintf(stderr, "PRE-OPT-NIR-----------.------------------------------\n");
805 nir_print_shader(sel->nir, stderr);
806 fprintf(stderr, "END PRE-OPT-NIR--------------------------------------\n\n");
807 }
808
809 r600::sort_uniforms(sel->nir);
810
811 /* Cayman seems very crashy about accessing images that don't exists or are
812 * accessed out of range, this lowering seems to help (but it can also be
813 * another problem */
814 if (sel->nir->info.num_images > 0 && rctx->b.chip_class == CAYMAN)
815 NIR_PASS_V(sel->nir, r600_legalize_image_load_store);
816
817 NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
818 NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
819 nir_lower_idiv_options idiv_options = {0};
820 idiv_options.imprecise_32bit_lowering = sel->nir->info.stage != MESA_SHADER_COMPUTE;
821 idiv_options.allow_fp16 = true;
822
823 NIR_PASS_V(sel->nir, nir_lower_idiv, &idiv_options);
824 NIR_PASS_V(sel->nir, r600_nir_lower_trigen);
825 NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar, false);
826
827 if (lower_64bit)
828 NIR_PASS_V(sel->nir, nir_lower_int64);
829 while(optimize_once(sel->nir, false));
830
831 NIR_PASS_V(sel->nir, r600_lower_shared_io);
832 NIR_PASS_V(sel->nir, r600_nir_lower_atomics);
833
834 struct nir_lower_tex_options lower_tex_options = {0};
835 lower_tex_options.lower_txp = ~0u;
836 lower_tex_options.lower_txf_offset = true;
837
838 NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
839 NIR_PASS_V(sel->nir, r600::r600_nir_lower_txl_txf_array_or_cube);
840 NIR_PASS_V(sel->nir, r600::r600_nir_lower_cube_to_2darray);
841
842 NIR_PASS_V(sel->nir, r600_nir_lower_pack_unpack_2x16);
843
844 if (sel->nir->info.stage == MESA_SHADER_VERTEX)
845 NIR_PASS_V(sel->nir, r600_vectorize_vs_inputs);
846
847 if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
848 NIR_PASS_V(sel->nir, nir_lower_fragcoord_wtrans);
849 NIR_PASS_V(sel->nir, r600_lower_fs_out_to_vector);
850 }
851
852 nir_variable_mode io_modes = nir_var_uniform | nir_var_shader_in;
853
854 //if (sel->nir->info.stage != MESA_SHADER_FRAGMENT)
855 io_modes |= nir_var_shader_out;
856
857 if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
858
859 /* Lower IO to temporaries late, because otherwise we get into trouble
860 * with the glsl 4.40 interpolateAt swizzle tests. There seems to be a bug
861 * somewhere that results in the input alweas reading from the same temp
862 * regardless of interpolation when the lowering is done early */
863 NIR_PASS_V(sel->nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(sel->nir),
864 true, true);
865
866 /* Since we're doing nir_lower_io_to_temporaries late, we need
867 * to lower all the copy_deref's introduced by
868 * lower_io_to_temporaries before calling nir_lower_io.
869 */
870 NIR_PASS_V(sel->nir, nir_split_var_copies);
871 NIR_PASS_V(sel->nir, nir_lower_var_copies);
872 NIR_PASS_V(sel->nir, nir_lower_global_vars_to_local);
873 }
874
875 NIR_PASS_V(sel->nir, nir_lower_io, io_modes, r600_glsl_type_size,
876 nir_lower_io_lower_64bit_to_32);
877
878 if (sel->nir->info.stage == MESA_SHADER_FRAGMENT)
879 NIR_PASS_V(sel->nir, r600_lower_fs_pos_input);
880
881 /**/
882 if (lower_64bit)
883 NIR_PASS_V(sel->nir, nir_lower_indirect_derefs, nir_var_function_temp, 10);
884
885 NIR_PASS_V(sel->nir, nir_opt_constant_folding);
886 NIR_PASS_V(sel->nir, nir_io_add_const_offset_to_base, io_modes);
887
888 NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
889 NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar, false);
890 if (lower_64bit)
891 NIR_PASS_V(sel->nir, r600::r600_nir_split_64bit_io);
892 NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
893 NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar, false);
894 NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
895 NIR_PASS_V(sel->nir, nir_copy_prop);
896 NIR_PASS_V(sel->nir, nir_opt_dce);
897
898 auto sh = nir_shader_clone(sel->nir, sel->nir);
899
900 if (sh->info.stage == MESA_SHADER_TESS_CTRL ||
901 sh->info.stage == MESA_SHADER_TESS_EVAL ||
902 (sh->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) {
903 auto prim_type = sh->info.stage == MESA_SHADER_TESS_EVAL ?
904 sh->info.tess.primitive_mode: key->tcs.prim_mode;
905 NIR_PASS_V(sh, r600_lower_tess_io, static_cast<pipe_prim_type>(prim_type));
906 }
907
908 if (sh->info.stage == MESA_SHADER_TESS_CTRL)
909 NIR_PASS_V(sh, r600_append_tcs_TF_emission,
910 (pipe_prim_type)key->tcs.prim_mode);
911
912 if (sh->info.stage == MESA_SHADER_TESS_EVAL)
913 NIR_PASS_V(sh, r600_lower_tess_coord,
914 static_cast<pipe_prim_type>(sh->info.tess.primitive_mode));
915
916 NIR_PASS_V(sh, nir_lower_ubo_vec4);
917 if (lower_64bit)
918 NIR_PASS_V(sh, r600::r600_nir_64_to_vec2);
919
920 /* Lower to scalar to let some optimization work out better */
921 while(optimize_once(sh, false));
922
923 NIR_PASS_V(sh, r600::r600_merge_vec2_stores);
924
925 NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_in, NULL);
926 NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_out, NULL);
927
928
929 NIR_PASS_V(sh, nir_lower_vars_to_scratch,
930 nir_var_function_temp,
931 40,
932 r600_get_natural_size_align_bytes);
933
934 while (optimize_once(sh, true));
935
936 NIR_PASS_V(sh, nir_lower_bool_to_int32);
937 NIR_PASS_V(sh, r600_nir_lower_int_tg4);
938 NIR_PASS_V(sh, nir_opt_algebraic_late);
939
940 if (sh->info.stage == MESA_SHADER_FRAGMENT)
941 r600::sort_fsoutput(sh);
942
943 NIR_PASS_V(sh, nir_lower_locals_to_regs);
944
945 //NIR_PASS_V(sh, nir_opt_algebraic);
946 //NIR_PASS_V(sh, nir_copy_prop);
947 NIR_PASS_V(sh, nir_lower_to_source_mods,
948 (nir_lower_to_source_mods_flags)(nir_lower_float_source_mods |
949 nir_lower_64bit_source_mods));
950 NIR_PASS_V(sh, nir_convert_from_ssa, true);
951 NIR_PASS_V(sh, nir_opt_dce);
952
953 if ((rctx->screen->b.debug_flags & DBG_NIR_PREFERRED) &&
954 (rctx->screen->b.debug_flags & DBG_ALL_SHADERS)) {
955 fprintf(stderr, "-- NIR --------------------------------------------------------\n");
956 struct nir_function *func = (struct nir_function *)exec_list_get_head(&sh->functions);
957 nir_index_ssa_defs(func->impl);
958 nir_print_shader(sh, stderr);
959 fprintf(stderr, "-- END --------------------------------------------------------\n");
960 }
961
962 memset(&pipeshader->shader, 0, sizeof(r600_shader));
963 pipeshader->scratch_space_needed = sh->scratch_size;
964
965 if (sh->info.stage == MESA_SHADER_TESS_EVAL ||
966 sh->info.stage == MESA_SHADER_VERTEX ||
967 sh->info.stage == MESA_SHADER_GEOMETRY) {
968 pipeshader->shader.clip_dist_write |= ((1 << sh->info.clip_distance_array_size) - 1);
969 pipeshader->shader.cull_dist_write = ((1 << sh->info.cull_distance_array_size) - 1)
970 << sh->info.clip_distance_array_size;
971 pipeshader->shader.cc_dist_mask = (1 << (sh->info.cull_distance_array_size +
972 sh->info.clip_distance_array_size)) - 1;
973 }
974
975 struct r600_shader* gs_shader = nullptr;
976 if (rctx->gs_shader)
977 gs_shader = &rctx->gs_shader->current->shader;
978 r600_screen *rscreen = rctx->screen;
979
980 bool r = convert.lower(sh, pipeshader, sel, *key, gs_shader, rscreen->b.chip_class);
981 if (!r || rctx->screen->b.debug_flags & DBG_ALL_SHADERS) {
982 static int shnr = 0;
983
984 snprintf(filename, 4000, "nir-%s_%d.inc", sh->info.name, shnr++);
985
986 if (access(filename, F_OK) == -1) {
987 FILE *f = fopen(filename, "w");
988
989 if (f) {
990 fprintf(f, "const char *shader_blob_%s = {\nR\"(", sh->info.name);
991 nir_print_shader(sh, f);
992 fprintf(f, ")\";\n");
993 fclose(f);
994 }
995 }
996 if (!r)
997 return -2;
998 }
999
1000 auto shader = convert.shader();
1001
1002 r600_bytecode_init(&pipeshader->shader.bc, rscreen->b.chip_class, rscreen->b.family,
1003 rscreen->has_compressed_msaa_texturing);
1004
1005 r600::sfn_log << r600::SfnLog::shader_info
1006 << "pipeshader->shader.processor_type = "
1007 << pipeshader->shader.processor_type << "\n";
1008
1009 pipeshader->shader.bc.type = pipeshader->shader.processor_type;
1010 pipeshader->shader.bc.isa = rctx->isa;
1011
1012 r600::AssemblyFromShaderLegacy afs(&pipeshader->shader, key);
1013 if (!afs.lower(shader.m_ir)) {
1014 R600_ERR("%s: Lowering to assembly failed\n", __func__);
1015 return -1;
1016 }
1017
1018 if (sh->info.stage == MESA_SHADER_GEOMETRY) {
1019 r600::sfn_log << r600::SfnLog::shader_info << "Geometry shader, create copy shader\n";
1020 generate_gs_copy_shader(rctx, pipeshader, &sel->so);
1021 assert(pipeshader->gs_copy_shader);
1022 } else {
1023 r600::sfn_log << r600::SfnLog::shader_info << "This is not a Geometry shader\n";
1024 }
1025 if (pipeshader->shader.bc.ngpr < 6)
1026 pipeshader->shader.bc.ngpr = 6;
1027
1028 return 0;
1029 }
1030