1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2019 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_nir.h"
28 #include "nir_builder.h"
29
30 #include "../r600_pipe.h"
31 #include "../r600_shader.h"
32
33 #include "sfn_instruction_tex.h"
34
35 #include "sfn_shader_vertex.h"
36 #include "sfn_shader_fragment.h"
37 #include "sfn_shader_geometry.h"
38 #include "sfn_shader_compute.h"
39 #include "sfn_shader_tcs.h"
40 #include "sfn_shader_tess_eval.h"
41 #include "sfn_nir_lower_fs_out_to_vector.h"
42 #include "sfn_ir_to_assembly.h"
43
44 #include <vector>
45
46 namespace r600 {
47
48 using std::vector;
49
ShaderFromNir()50 ShaderFromNir::ShaderFromNir():sh(nullptr),
51 m_current_if_id(0),
52 m_current_loop_id(0)
53 {
54 }
55
lower(const nir_shader * shader,r600_pipe_shader * pipe_shader,r600_pipe_shader_selector * sel,r600_shader_key & key,struct r600_shader * gs_shader,enum chip_class _chip_class)56 bool ShaderFromNir::lower(const nir_shader *shader, r600_pipe_shader *pipe_shader,
57 r600_pipe_shader_selector *sel, r600_shader_key& key,
58 struct r600_shader* gs_shader, enum chip_class _chip_class)
59 {
60 sh = shader;
61 chip_class = _chip_class;
62 assert(sh);
63
64 switch (shader->info.stage) {
65 case MESA_SHADER_VERTEX:
66 impl.reset(new VertexShaderFromNir(pipe_shader, *sel, key, gs_shader, chip_class));
67 break;
68 case MESA_SHADER_TESS_CTRL:
69 sfn_log << SfnLog::trans << "Start TCS\n";
70 impl.reset(new TcsShaderFromNir(pipe_shader, *sel, key, chip_class));
71 break;
72 case MESA_SHADER_TESS_EVAL:
73 sfn_log << SfnLog::trans << "Start TESS_EVAL\n";
74 impl.reset(new TEvalShaderFromNir(pipe_shader, *sel, key, gs_shader, chip_class));
75 break;
76 case MESA_SHADER_GEOMETRY:
77 sfn_log << SfnLog::trans << "Start GS\n";
78 impl.reset(new GeometryShaderFromNir(pipe_shader, *sel, key, chip_class));
79 break;
80 case MESA_SHADER_FRAGMENT:
81 sfn_log << SfnLog::trans << "Start FS\n";
82 impl.reset(new FragmentShaderFromNir(*shader, pipe_shader->shader, *sel, key, chip_class));
83 break;
84 case MESA_SHADER_COMPUTE:
85 sfn_log << SfnLog::trans << "Start CS\n";
86 impl.reset(new ComputeShaderFromNir(pipe_shader, *sel, key, chip_class));
87 break;
88 default:
89 return false;
90 }
91
92 sfn_log << SfnLog::trans << "Process declarations\n";
93 if (!process_declaration())
94 return false;
95
96 // at this point all functions should be inlined
97 const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sh->functions));
98
99 sfn_log << SfnLog::trans << "Scan shader\n";
100 nir_foreach_block(block, func->impl) {
101 nir_foreach_instr(instr, block) {
102 if (!impl->scan_instruction(instr)) {
103 fprintf(stderr, "Unhandled sysvalue access ");
104 nir_print_instr(instr, stderr);
105 fprintf(stderr, "\n");
106 return false;
107 }
108 }
109 }
110
111 sfn_log << SfnLog::trans << "Reserve registers\n";
112 if (!impl->allocate_reserved_registers()) {
113 return false;
114 }
115
116 ValuePool::array_list arrays;
117 sfn_log << SfnLog::trans << "Allocate local registers\n";
118 foreach_list_typed(nir_register, reg, node, &func->impl->registers) {
119 impl->allocate_local_register(*reg, arrays);
120 }
121
122 sfn_log << SfnLog::trans << "Emit shader start\n";
123 impl->allocate_arrays(arrays);
124
125 impl->emit_shader_start();
126
127 sfn_log << SfnLog::trans << "Process shader \n";
128 foreach_list_typed(nir_cf_node, node, node, &func->impl->body) {
129 if (!process_cf_node(node))
130 return false;
131 }
132
133 // Add optimizations here
134 sfn_log << SfnLog::trans << "Finalize\n";
135 impl->finalize();
136
137 if (!sfn_log.has_debug_flag(SfnLog::nomerge)) {
138 sfn_log << SfnLog::trans << "Merge registers\n";
139 impl->remap_registers();
140 }
141 sfn_log << SfnLog::trans << "Finished translating to R600 IR\n";
142 return true;
143 }
144
shader() const145 Shader ShaderFromNir::shader() const
146 {
147 return Shader{impl->m_output, impl->get_temp_registers()};
148 }
149
150
process_cf_node(nir_cf_node * node)151 bool ShaderFromNir::process_cf_node(nir_cf_node *node)
152 {
153 SFN_TRACE_FUNC(SfnLog::flow, "CF");
154 switch (node->type) {
155 case nir_cf_node_block:
156 return process_block(nir_cf_node_as_block(node));
157 case nir_cf_node_if:
158 return process_if(nir_cf_node_as_if(node));
159 case nir_cf_node_loop:
160 return process_loop(nir_cf_node_as_loop(node));
161 default:
162 return false;
163 }
164 }
165
process_if(nir_if * if_stmt)166 bool ShaderFromNir::process_if(nir_if *if_stmt)
167 {
168 SFN_TRACE_FUNC(SfnLog::flow, "IF");
169
170 if (!impl->emit_if_start(m_current_if_id, if_stmt))
171 return false;
172
173 int if_id = m_current_if_id++;
174 m_if_stack.push(if_id);
175
176 foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list)
177 if (!process_cf_node(n)) return false;
178
179 if (!if_stmt->then_list.is_empty()) {
180 if (!impl->emit_else_start(if_id))
181 return false;
182
183 foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list)
184 if (!process_cf_node(n)) return false;
185 }
186
187 if (!impl->emit_ifelse_end(if_id))
188 return false;
189
190 m_if_stack.pop();
191 return true;
192 }
193
process_loop(nir_loop * node)194 bool ShaderFromNir::process_loop(nir_loop *node)
195 {
196 SFN_TRACE_FUNC(SfnLog::flow, "LOOP");
197 int loop_id = m_current_loop_id++;
198
199 if (!impl->emit_loop_start(loop_id))
200 return false;
201
202 foreach_list_typed(nir_cf_node, n, node, &node->body)
203 if (!process_cf_node(n)) return false;
204
205 if (!impl->emit_loop_end(loop_id))
206 return false;
207
208 return true;
209 }
210
process_block(nir_block * block)211 bool ShaderFromNir::process_block(nir_block *block)
212 {
213 SFN_TRACE_FUNC(SfnLog::flow, "BLOCK");
214 nir_foreach_instr(instr, block) {
215 int r = emit_instruction(instr);
216 if (!r) {
217 sfn_log << SfnLog::err << "R600: Unsupported instruction: "
218 << *instr << "\n";
219 return false;
220 }
221 }
222 return true;
223 }
224
225
~ShaderFromNir()226 ShaderFromNir::~ShaderFromNir()
227 {
228 }
229
processor_type() const230 pipe_shader_type ShaderFromNir::processor_type() const
231 {
232 return impl->m_processor_type;
233 }
234
235
emit_instruction(nir_instr * instr)236 bool ShaderFromNir::emit_instruction(nir_instr *instr)
237 {
238 assert(impl);
239
240 sfn_log << SfnLog::instr << "Read instruction " << *instr << "\n";
241
242 switch (instr->type) {
243 case nir_instr_type_alu:
244 return impl->emit_alu_instruction(instr);
245 case nir_instr_type_deref:
246 return impl->emit_deref_instruction(nir_instr_as_deref(instr));
247 case nir_instr_type_intrinsic:
248 return impl->emit_intrinsic_instruction(nir_instr_as_intrinsic(instr));
249 case nir_instr_type_load_const: /* const values are loaded when needed */
250 return true;
251 case nir_instr_type_tex:
252 return impl->emit_tex_instruction(instr);
253 case nir_instr_type_jump:
254 return impl->emit_jump_instruction(nir_instr_as_jump(instr));
255 default:
256 fprintf(stderr, "R600: %s: ShaderFromNir Unsupported instruction: type %d:'", __func__, instr->type);
257 nir_print_instr(instr, stderr);
258 fprintf(stderr, "'\n");
259 return false;
260 case nir_instr_type_ssa_undef:
261 return impl->create_undef(nir_instr_as_ssa_undef(instr));
262 return true;
263 }
264 }
265
process_declaration()266 bool ShaderFromNir::process_declaration()
267 {
268 // scan declarations
269 nir_foreach_shader_in_variable(variable, sh) {
270 if (!impl->process_inputs(variable)) {
271 fprintf(stderr, "R600: error parsing input varible %s\n", variable->name);
272 return false;
273 }
274 }
275
276 // scan declarations
277 nir_foreach_shader_out_variable(variable, sh) {
278 if (!impl->process_outputs(variable)) {
279 fprintf(stderr, "R600: error parsing outputs varible %s\n", variable->name);
280 return false;
281 }
282 }
283
284 // scan declarations
285 nir_foreach_variable_with_modes(variable, sh, nir_var_uniform |
286 nir_var_mem_ubo |
287 nir_var_mem_ssbo) {
288 if (!impl->process_uniforms(variable)) {
289 fprintf(stderr, "R600: error parsing outputs varible %s\n", variable->name);
290 return false;
291 }
292 }
293
294 return true;
295 }
296
shader_ir() const297 const std::vector<InstructionBlock>& ShaderFromNir::shader_ir() const
298 {
299 assert(impl);
300 return impl->m_output;
301 }
302
303
~AssemblyFromShader()304 AssemblyFromShader::~AssemblyFromShader()
305 {
306 }
307
lower(const std::vector<InstructionBlock> & ir)308 bool AssemblyFromShader::lower(const std::vector<InstructionBlock>& ir)
309 {
310 return do_lower(ir);
311 }
312
313 static nir_ssa_def *
r600_nir_lower_pack_unpack_2x16_impl(nir_builder * b,nir_instr * instr,void * _options)314 r600_nir_lower_pack_unpack_2x16_impl(nir_builder *b, nir_instr *instr, void *_options)
315 {
316 nir_alu_instr *alu = nir_instr_as_alu(instr);
317
318 switch (alu->op) {
319 case nir_op_unpack_half_2x16: {
320 nir_ssa_def *packed = nir_ssa_for_alu_src(b, alu, 0);
321 return nir_vec2(b, nir_unpack_half_2x16_split_x(b, packed),
322 nir_unpack_half_2x16_split_y(b, packed));
323
324 }
325 case nir_op_pack_half_2x16: {
326 nir_ssa_def *src_vec2 = nir_ssa_for_alu_src(b, alu, 0);
327 return nir_pack_half_2x16_split(b, nir_channel(b, src_vec2, 0),
328 nir_channel(b, src_vec2, 1));
329 }
330 default:
331 return nullptr;
332 }
333 }
334
r600_nir_lower_pack_unpack_2x16_filter(const nir_instr * instr,const void * _options)335 bool r600_nir_lower_pack_unpack_2x16_filter(const nir_instr *instr, const void *_options)
336 {
337 return instr->type == nir_instr_type_alu;
338 }
339
r600_nir_lower_pack_unpack_2x16(nir_shader * shader)340 bool r600_nir_lower_pack_unpack_2x16(nir_shader *shader)
341 {
342 return nir_shader_lower_instructions(shader,
343 r600_nir_lower_pack_unpack_2x16_filter,
344 r600_nir_lower_pack_unpack_2x16_impl,
345 nullptr);
346 };
347
348 static void
r600_nir_lower_scratch_address_impl(nir_builder * b,nir_intrinsic_instr * instr)349 r600_nir_lower_scratch_address_impl(nir_builder *b, nir_intrinsic_instr *instr)
350 {
351 b->cursor = nir_before_instr(&instr->instr);
352
353 int address_index = 0;
354 int align;
355
356 if (instr->intrinsic == nir_intrinsic_store_scratch) {
357 align = instr->src[0].ssa->num_components;
358 address_index = 1;
359 } else{
360 align = instr->dest.ssa.num_components;
361 }
362
363 nir_ssa_def *address = instr->src[address_index].ssa;
364 nir_ssa_def *new_address = nir_ishr(b, address, nir_imm_int(b, 4 * align));
365
366 nir_instr_rewrite_src(&instr->instr, &instr->src[address_index],
367 nir_src_for_ssa(new_address));
368 }
369
r600_lower_scratch_addresses(nir_shader * shader)370 bool r600_lower_scratch_addresses(nir_shader *shader)
371 {
372 bool progress = false;
373 nir_foreach_function(function, shader) {
374 nir_builder build;
375 nir_builder_init(&build, function->impl);
376
377 nir_foreach_block(block, function->impl) {
378 nir_foreach_instr(instr, block) {
379 if (instr->type != nir_instr_type_intrinsic)
380 continue;
381 nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
382 if (op->intrinsic != nir_intrinsic_load_scratch &&
383 op->intrinsic != nir_intrinsic_store_scratch)
384 continue;
385 r600_nir_lower_scratch_address_impl(&build, op);
386 progress = true;
387 }
388 }
389 }
390 return progress;
391 }
392
393 static void
insert_uniform_sorted(struct exec_list * var_list,nir_variable * new_var)394 insert_uniform_sorted(struct exec_list *var_list, nir_variable *new_var)
395 {
396 nir_foreach_variable_in_list(var, var_list) {
397 if (var->data.binding > new_var->data.binding ||
398 (var->data.binding == new_var->data.binding &&
399 var->data.offset > new_var->data.offset)) {
400 exec_node_insert_node_before(&var->node, &new_var->node);
401 return;
402 }
403 }
404 exec_list_push_tail(var_list, &new_var->node);
405 }
406
sort_uniforms(nir_shader * shader)407 void sort_uniforms(nir_shader *shader)
408 {
409 struct exec_list new_list;
410 exec_list_make_empty(&new_list);
411
412 nir_foreach_uniform_variable_safe(var, shader) {
413 exec_node_remove(&var->node);
414 insert_uniform_sorted(&new_list, var);
415 }
416 exec_list_append(&shader->variables, &new_list);
417 }
418
419 static void
insert_fsoutput_sorted(struct exec_list * var_list,nir_variable * new_var)420 insert_fsoutput_sorted(struct exec_list *var_list, nir_variable *new_var)
421 {
422
423 nir_foreach_variable_in_list(var, var_list) {
424 if (var->data.location > new_var->data.location ||
425 (var->data.location == new_var->data.location &&
426 var->data.index > new_var->data.index)) {
427 exec_node_insert_node_before(&var->node, &new_var->node);
428 return;
429 }
430 }
431
432 exec_list_push_tail(var_list, &new_var->node);
433 }
434
sort_fsoutput(nir_shader * shader)435 void sort_fsoutput(nir_shader *shader)
436 {
437 struct exec_list new_list;
438 exec_list_make_empty(&new_list);
439
440 nir_foreach_shader_out_variable_safe(var, shader) {
441 exec_node_remove(&var->node);
442 insert_fsoutput_sorted(&new_list, var);
443 }
444
445 unsigned driver_location = 0;
446 nir_foreach_variable_in_list(var, &new_list)
447 var->data.driver_location = driver_location++;
448
449 exec_list_append(&shader->variables, &new_list);
450 }
451
452 }
453
454 static nir_intrinsic_op
r600_map_atomic(nir_intrinsic_op op)455 r600_map_atomic(nir_intrinsic_op op)
456 {
457 switch (op) {
458 case nir_intrinsic_atomic_counter_read_deref:
459 return nir_intrinsic_atomic_counter_read;
460 case nir_intrinsic_atomic_counter_inc_deref:
461 return nir_intrinsic_atomic_counter_inc;
462 case nir_intrinsic_atomic_counter_pre_dec_deref:
463 return nir_intrinsic_atomic_counter_pre_dec;
464 case nir_intrinsic_atomic_counter_post_dec_deref:
465 return nir_intrinsic_atomic_counter_post_dec;
466 case nir_intrinsic_atomic_counter_add_deref:
467 return nir_intrinsic_atomic_counter_add;
468 case nir_intrinsic_atomic_counter_min_deref:
469 return nir_intrinsic_atomic_counter_min;
470 case nir_intrinsic_atomic_counter_max_deref:
471 return nir_intrinsic_atomic_counter_max;
472 case nir_intrinsic_atomic_counter_and_deref:
473 return nir_intrinsic_atomic_counter_and;
474 case nir_intrinsic_atomic_counter_or_deref:
475 return nir_intrinsic_atomic_counter_or;
476 case nir_intrinsic_atomic_counter_xor_deref:
477 return nir_intrinsic_atomic_counter_xor;
478 case nir_intrinsic_atomic_counter_exchange_deref:
479 return nir_intrinsic_atomic_counter_exchange;
480 case nir_intrinsic_atomic_counter_comp_swap_deref:
481 return nir_intrinsic_atomic_counter_comp_swap;
482 default:
483 return nir_num_intrinsics;
484 }
485 }
486
487 static bool
r600_lower_deref_instr(nir_builder * b,nir_intrinsic_instr * instr,nir_shader * shader)488 r600_lower_deref_instr(nir_builder *b, nir_intrinsic_instr *instr,
489 nir_shader *shader)
490 {
491 nir_intrinsic_op op = r600_map_atomic(instr->intrinsic);
492 if (nir_num_intrinsics == op)
493 return false;
494
495 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
496 nir_variable *var = nir_deref_instr_get_variable(deref);
497
498 if (var->data.mode != nir_var_uniform &&
499 var->data.mode != nir_var_mem_ssbo &&
500 var->data.mode != nir_var_mem_shared)
501 return false; /* atomics passed as function arguments can't be lowered */
502
503 const unsigned idx = var->data.binding;
504
505 b->cursor = nir_before_instr(&instr->instr);
506
507 nir_ssa_def *offset = nir_imm_int(b, var->data.index);
508 for (nir_deref_instr *d = deref; d->deref_type != nir_deref_type_var;
509 d = nir_deref_instr_parent(d)) {
510 assert(d->deref_type == nir_deref_type_array);
511 assert(d->arr.index.is_ssa);
512
513 unsigned array_stride = 1;
514 if (glsl_type_is_array(d->type))
515 array_stride *= glsl_get_aoa_size(d->type);
516
517 offset = nir_iadd(b, offset, nir_imul(b, d->arr.index.ssa,
518 nir_imm_int(b, array_stride)));
519 }
520
521 /* Since the first source is a deref and the first source in the lowered
522 * instruction is the offset, we can just swap it out and change the
523 * opcode.
524 */
525 instr->intrinsic = op;
526 nir_instr_rewrite_src(&instr->instr, &instr->src[0],
527 nir_src_for_ssa(offset));
528 nir_intrinsic_set_base(instr, idx);
529
530 nir_deref_instr_remove_if_unused(deref);
531
532 return true;
533 }
534
535 static bool
r600_nir_lower_atomics(nir_shader * shader)536 r600_nir_lower_atomics(nir_shader *shader)
537 {
538 bool progress = false;
539
540 /* First re-do the offsets, in Hardware we start at zero for each new
541 * binding, and we use an offset of one per counter */
542 int current_binding = -1;
543 int current_offset = 0;
544 nir_foreach_variable_with_modes(var, shader, nir_var_uniform) {
545 if (!var->type->contains_atomic())
546 continue;
547
548 if (current_binding == (int)var->data.binding) {
549 var->data.index = current_offset;
550 current_offset += var->type->atomic_size() / ATOMIC_COUNTER_SIZE;
551 } else {
552 current_binding = var->data.binding;
553 var->data.index = 0;
554 current_offset = var->type->atomic_size() / ATOMIC_COUNTER_SIZE;
555 }
556 }
557
558 nir_foreach_function(function, shader) {
559 if (!function->impl)
560 continue;
561
562 bool impl_progress = false;
563
564 nir_builder build;
565 nir_builder_init(&build, function->impl);
566
567 nir_foreach_block(block, function->impl) {
568 nir_foreach_instr_safe(instr, block) {
569 if (instr->type != nir_instr_type_intrinsic)
570 continue;
571
572 impl_progress |= r600_lower_deref_instr(&build,
573 nir_instr_as_intrinsic(instr), shader);
574 }
575 }
576
577 if (impl_progress) {
578 nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance);
579 progress = true;
580 }
581 }
582
583 return progress;
584 }
585 using r600::r600_nir_lower_int_tg4;
586 using r600::r600_nir_lower_pack_unpack_2x16;
587 using r600::r600_lower_scratch_addresses;
588 using r600::r600_lower_fs_out_to_vector;
589 using r600::r600_lower_ubo_to_align16;
590
591 int
r600_glsl_type_size(const struct glsl_type * type,bool is_bindless)592 r600_glsl_type_size(const struct glsl_type *type, bool is_bindless)
593 {
594 return glsl_count_vec4_slots(type, false, is_bindless);
595 }
596
597 void
r600_get_natural_size_align_bytes(const struct glsl_type * type,unsigned * size,unsigned * align)598 r600_get_natural_size_align_bytes(const struct glsl_type *type,
599 unsigned *size, unsigned *align)
600 {
601 if (type->base_type != GLSL_TYPE_ARRAY) {
602 *align = 1;
603 *size = 1;
604 } else {
605 unsigned elem_size, elem_align;
606 glsl_get_natural_size_align_bytes(type->fields.array,
607 &elem_size, &elem_align);
608 *align = 1;
609 *size = type->length;
610 }
611 }
612
613 static bool
r600_lower_shared_io_impl(nir_function * func)614 r600_lower_shared_io_impl(nir_function *func)
615 {
616 nir_builder b;
617 nir_builder_init(&b, func->impl);
618
619 bool progress = false;
620 nir_foreach_block(block, func->impl) {
621 nir_foreach_instr_safe(instr, block) {
622
623 if (instr->type != nir_instr_type_intrinsic)
624 continue;
625
626 nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
627 if (op->intrinsic != nir_intrinsic_load_shared &&
628 op->intrinsic != nir_intrinsic_store_shared)
629 continue;
630
631 b.cursor = nir_before_instr(instr);
632
633 if (op->intrinsic == nir_intrinsic_load_shared) {
634 nir_ssa_def *addr = op->src[0].ssa;
635
636 switch (nir_dest_num_components(op->dest)) {
637 case 2: {
638 auto addr2 = nir_iadd_imm(&b, addr, 4);
639 addr = nir_vec2(&b, addr, addr2);
640 break;
641 }
642 case 3: {
643 auto addr2 = nir_iadd(&b, addr, nir_imm_ivec2(&b, 4, 8));
644 addr = nir_vec3(&b, addr,
645 nir_channel(&b, addr2, 0),
646 nir_channel(&b, addr2, 1));
647 break;
648 }
649 case 4: {
650 addr = nir_iadd(&b, addr, nir_imm_ivec4(&b, 0, 4, 8, 12));
651 break;
652 }
653 }
654
655 auto load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_local_shared_r600);
656 load->num_components = nir_dest_num_components(op->dest);
657 load->src[0] = nir_src_for_ssa(addr);
658 nir_ssa_dest_init(&load->instr, &load->dest,
659 load->num_components, 32, NULL);
660 nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&load->dest.ssa));
661 nir_builder_instr_insert(&b, &load->instr);
662 } else {
663 nir_ssa_def *addr = op->src[1].ssa;
664 for (int i = 0; i < 2; ++i) {
665 unsigned test_mask = (0x3 << 2 * i);
666 if (!(nir_intrinsic_write_mask(op) & test_mask))
667 continue;
668
669 auto store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_local_shared_r600);
670 unsigned writemask = nir_intrinsic_write_mask(op) & test_mask;
671 nir_intrinsic_set_write_mask(store, writemask);
672 store->src[0] = nir_src_for_ssa(op->src[0].ssa);
673 store->num_components = store->src[0].ssa->num_components;
674 bool start_even = (writemask & (1u << (2 * i)));
675
676 auto addr2 = nir_iadd(&b, addr, nir_imm_int(&b, 8 * i + (start_even ? 0 : 4)));
677 store->src[1] = nir_src_for_ssa(addr2);
678
679 nir_builder_instr_insert(&b, &store->instr);
680 }
681 }
682 nir_instr_remove(instr);
683 progress = true;
684 }
685 }
686 return progress;
687 }
688
689 static bool
r600_lower_shared_io(nir_shader * nir)690 r600_lower_shared_io(nir_shader *nir)
691 {
692 bool progress=false;
693 nir_foreach_function(function, nir) {
694 if (function->impl &&
695 r600_lower_shared_io_impl(function))
696 progress = true;
697 }
698 return progress;
699 }
700
701 static bool
optimize_once(nir_shader * shader,bool vectorize)702 optimize_once(nir_shader *shader, bool vectorize)
703 {
704 bool progress = false;
705 NIR_PASS(progress, shader, nir_copy_prop);
706 NIR_PASS(progress, shader, nir_opt_dce);
707 NIR_PASS(progress, shader, nir_opt_algebraic);
708 NIR_PASS(progress, shader, nir_opt_constant_folding);
709 NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
710 if (vectorize)
711 NIR_PASS(progress, shader, nir_opt_vectorize, NULL, NULL);
712
713 NIR_PASS(progress, shader, nir_opt_remove_phis);
714
715 if (nir_opt_trivial_continues(shader)) {
716 progress = true;
717 NIR_PASS(progress, shader, nir_copy_prop);
718 NIR_PASS(progress, shader, nir_opt_dce);
719 }
720
721 NIR_PASS(progress, shader, nir_opt_if, false);
722 NIR_PASS(progress, shader, nir_opt_dead_cf);
723 NIR_PASS(progress, shader, nir_opt_cse);
724 NIR_PASS(progress, shader, nir_opt_peephole_select, 200, true, true);
725
726 NIR_PASS(progress, shader, nir_opt_conditional_discard);
727 NIR_PASS(progress, shader, nir_opt_dce);
728 NIR_PASS(progress, shader, nir_opt_undef);
729 return progress;
730 }
731
has_saturate(const nir_function * func)732 bool has_saturate(const nir_function *func)
733 {
734 nir_foreach_block(block, func->impl) {
735 nir_foreach_instr(instr, block) {
736 if (instr->type == nir_instr_type_alu) {
737 auto alu = nir_instr_as_alu(instr);
738 if (alu->dest.saturate)
739 return true;
740 }
741 }
742 }
743 return false;
744 }
745
r600_shader_from_nir(struct r600_context * rctx,struct r600_pipe_shader * pipeshader,r600_shader_key * key)746 int r600_shader_from_nir(struct r600_context *rctx,
747 struct r600_pipe_shader *pipeshader,
748 r600_shader_key *key)
749 {
750 char filename[4000];
751 struct r600_pipe_shader_selector *sel = pipeshader->selector;
752
753 r600::ShaderFromNir convert;
754
755 if (rctx->screen->b.debug_flags & DBG_PREOPT_IR) {
756 fprintf(stderr, "PRE-OPT-NIR-----------.------------------------------\n");
757 nir_print_shader(sel->nir, stderr);
758 fprintf(stderr, "END PRE-OPT-NIR--------------------------------------\n\n");
759 }
760
761 r600::sort_uniforms(sel->nir);
762
763 NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
764 NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
765 NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
766
767 NIR_PASS_V(sel->nir, r600_lower_shared_io);
768 NIR_PASS_V(sel->nir, r600_nir_lower_atomics);
769
770 static const struct nir_lower_tex_options lower_tex_options = {
771 .lower_txp = ~0u,
772 };
773 NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
774 NIR_PASS_V(sel->nir, r600::r600_nir_lower_txl_txf_array_or_cube);
775
776 NIR_PASS_V(sel->nir, r600_nir_lower_int_tg4);
777 NIR_PASS_V(sel->nir, r600_nir_lower_pack_unpack_2x16);
778
779 NIR_PASS_V(sel->nir, nir_lower_io, nir_var_uniform, r600_glsl_type_size,
780 nir_lower_io_lower_64bit_to_32);
781
782 if (sel->nir->info.stage == MESA_SHADER_VERTEX)
783 NIR_PASS_V(sel->nir, r600_vectorize_vs_inputs);
784
785 if (sel->nir->info.stage == MESA_SHADER_FRAGMENT)
786 NIR_PASS_V(sel->nir, r600_lower_fs_out_to_vector);
787
788 if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL ||
789 (sel->nir->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) {
790 NIR_PASS_V(sel->nir, nir_lower_io, nir_var_shader_out, r600_glsl_type_size,
791 nir_lower_io_lower_64bit_to_32);
792 NIR_PASS_V(sel->nir, r600_lower_tess_io, (pipe_prim_type)key->tcs.prim_mode);
793 }
794
795 if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL ||
796 sel->nir->info.stage == MESA_SHADER_TESS_EVAL) {
797 NIR_PASS_V(sel->nir, nir_lower_io, nir_var_shader_in, r600_glsl_type_size,
798 nir_lower_io_lower_64bit_to_32);
799 }
800
801 if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL ||
802 sel->nir->info.stage == MESA_SHADER_TESS_EVAL ||
803 (sel->nir->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) {
804 auto prim_type = sel->nir->info.stage == MESA_SHADER_TESS_CTRL ?
805 key->tcs.prim_mode : sel->nir->info.tess.primitive_mode;
806 NIR_PASS_V(sel->nir, r600_lower_tess_io, static_cast<pipe_prim_type>(prim_type));
807 }
808
809
810 if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL)
811 NIR_PASS_V(sel->nir, r600_append_tcs_TF_emission,
812 (pipe_prim_type)key->tcs.prim_mode);
813
814
815 const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sel->nir->functions));
816 assert(func->impl->registers.length() == 0 && !has_saturate(func));
817
818 NIR_PASS_V(sel->nir, nir_lower_ubo_vec4);
819
820 /* Lower to scalar to let some optimization work out better */
821 NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, NULL, NULL);
822 while(optimize_once(sel->nir, false));
823
824 NIR_PASS_V(sel->nir, nir_remove_dead_variables, nir_var_shader_in, NULL);
825 NIR_PASS_V(sel->nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
826
827
828 NIR_PASS_V(sel->nir, nir_lower_vars_to_scratch,
829 nir_var_function_temp,
830 40,
831 r600_get_natural_size_align_bytes);
832
833 while (optimize_once(sel->nir, true));
834
835 auto sh = nir_shader_clone(sel->nir, sel->nir);
836 NIR_PASS_V(sel->nir, nir_lower_bool_to_int32);
837 NIR_PASS_V(sh, nir_opt_algebraic_late);
838
839 if (sel->nir->info.stage == MESA_SHADER_FRAGMENT)
840 r600::sort_fsoutput(sh);
841
842 NIR_PASS_V(sh, nir_lower_locals_to_regs);
843
844 //NIR_PASS_V(sel->nir, nir_opt_algebraic);
845 //NIR_PASS_V(sel->nir, nir_copy_prop);
846 NIR_PASS_V(sh, nir_lower_to_source_mods,
847 (nir_lower_to_source_mods_flags)(nir_lower_float_source_mods |
848 nir_lower_64bit_source_mods));
849 NIR_PASS_V(sh, nir_convert_from_ssa, true);
850 NIR_PASS_V(sh, nir_opt_dce);
851
852 if ((rctx->screen->b.debug_flags & DBG_NIR) &&
853 (rctx->screen->b.debug_flags & DBG_ALL_SHADERS)) {
854 fprintf(stderr, "-- NIR --------------------------------------------------------\n");
855 struct nir_function *func = (struct nir_function *)exec_list_get_head(&sh->functions);
856 nir_index_ssa_defs(func->impl);
857 nir_print_shader(sh, stderr);
858 fprintf(stderr, "-- END --------------------------------------------------------\n");
859 }
860
861 memset(&pipeshader->shader, 0, sizeof(r600_shader));
862 pipeshader->scratch_space_needed = sel->nir->scratch_size;
863
864 if (sel->nir->info.stage == MESA_SHADER_TESS_EVAL ||
865 sel->nir->info.stage == MESA_SHADER_VERTEX ||
866 sel->nir->info.stage == MESA_SHADER_GEOMETRY) {
867 pipeshader->shader.clip_dist_write |= ((1 << sel->nir->info.clip_distance_array_size) - 1);
868 pipeshader->shader.cull_dist_write = ((1 << sel->nir->info.cull_distance_array_size) - 1)
869 << sel->nir->info.clip_distance_array_size;
870 pipeshader->shader.cc_dist_mask = (1 << (sel->nir->info.cull_distance_array_size +
871 sel->nir->info.clip_distance_array_size)) - 1;
872 }
873
874 struct r600_shader* gs_shader = nullptr;
875 if (rctx->gs_shader)
876 gs_shader = &rctx->gs_shader->current->shader;
877 r600_screen *rscreen = rctx->screen;
878
879 bool r = convert.lower(sh, pipeshader, sel, *key, gs_shader, rscreen->b.chip_class);
880 if (!r || rctx->screen->b.debug_flags & DBG_ALL_SHADERS) {
881 static int shnr = 0;
882
883 snprintf(filename, 4000, "nir-%s_%d.inc", sel->nir->info.name, shnr++);
884
885 if (access(filename, F_OK) == -1) {
886 FILE *f = fopen(filename, "w");
887
888 if (f) {
889 fprintf(f, "const char *shader_blob_%s = {\nR\"(", sel->nir->info.name);
890 nir_print_shader(sh, f);
891 fprintf(f, ")\";\n");
892 fclose(f);
893 }
894 }
895 if (!r)
896 return -2;
897 }
898
899 auto shader = convert.shader();
900
901 r600_bytecode_init(&pipeshader->shader.bc, rscreen->b.chip_class, rscreen->b.family,
902 rscreen->has_compressed_msaa_texturing);
903
904 r600::sfn_log << r600::SfnLog::shader_info
905 << "pipeshader->shader.processor_type = "
906 << pipeshader->shader.processor_type << "\n";
907
908 pipeshader->shader.bc.type = pipeshader->shader.processor_type;
909 pipeshader->shader.bc.isa = rctx->isa;
910
911 r600::AssemblyFromShaderLegacy afs(&pipeshader->shader, key);
912 if (!afs.lower(shader.m_ir)) {
913 R600_ERR("%s: Lowering to assembly failed\n", __func__);
914 return -1;
915 }
916
917 if (sel->nir->info.stage == MESA_SHADER_GEOMETRY) {
918 r600::sfn_log << r600::SfnLog::shader_info << "Geometry shader, create copy shader\n";
919 generate_gs_copy_shader(rctx, pipeshader, &sel->so);
920 assert(pipeshader->gs_copy_shader);
921 } else {
922 r600::sfn_log << r600::SfnLog::shader_info << "This is not a Geometry shader\n";
923 }
924 if (pipeshader->shader.bc.ngpr < 6)
925 pipeshader->shader.bc.ngpr = 6;
926
927 return 0;
928 }
929