• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2019 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_nir.h"
28 #include "nir_builder.h"
29 
30 #include "../r600_pipe.h"
31 #include "../r600_shader.h"
32 
33 
34 #include "util/u_prim.h"
35 
36 #include "sfn_shader.h"
37 #include "sfn_assembler.h"
38 #include "sfn_debug.h"
39 #include "sfn_liverangeevaluator.h"
40 #include "sfn_nir_lower_fs_out_to_vector.h"
41 #include "sfn_nir_lower_alu.h"
42 #include "sfn_nir_lower_tex.h"
43 #include "sfn_optimizer.h"
44 #include "sfn_ra.h"
45 #include "sfn_scheduler.h"
46 
47 #include <vector>
48 
49 namespace r600 {
50 
51 using std::vector;
52 
53 
NirLowerInstruction()54 NirLowerInstruction::NirLowerInstruction():
55 	b(nullptr)
56 {
57 }
58 
filter_instr(const nir_instr * instr,const void * data)59 bool NirLowerInstruction::filter_instr(const nir_instr *instr, const void *data)
60 {
61    auto me = reinterpret_cast<const NirLowerInstruction*>(data);
62    return me->filter(instr);
63 }
64 
lower_instr(nir_builder * b,nir_instr * instr,void * data)65 nir_ssa_def *NirLowerInstruction::lower_instr(nir_builder *b, nir_instr *instr, void *data)
66 {
67    auto me = reinterpret_cast<NirLowerInstruction*>(data);
68    me->set_builder(b);
69    return me->lower(instr);
70 }
71 
run(nir_shader * shader)72 bool NirLowerInstruction::run(nir_shader *shader)
73 {
74    return nir_shader_lower_instructions(shader,
75                                         filter_instr,
76                                         lower_instr,
77                                         (void *)this);
78 }
79 
~AssemblyFromShader()80 AssemblyFromShader::~AssemblyFromShader()
81 {
82 }
83 
lower(const Shader & ir)84 bool AssemblyFromShader::lower(const Shader& ir)
85 {
86    return do_lower(ir);
87 }
88 
89 static void
r600_nir_lower_scratch_address_impl(nir_builder * b,nir_intrinsic_instr * instr)90 r600_nir_lower_scratch_address_impl(nir_builder *b, nir_intrinsic_instr *instr)
91 {
92    b->cursor = nir_before_instr(&instr->instr);
93 
94    int address_index = 0;
95    int align;
96 
97    if (instr->intrinsic == nir_intrinsic_store_scratch) {
98       align  = instr->src[0].ssa->num_components;
99       address_index = 1;
100    } else{
101       align = instr->dest.ssa.num_components;
102    }
103 
104    nir_ssa_def *address = instr->src[address_index].ssa;
105    nir_ssa_def *new_address = nir_ishr(b, address,  nir_imm_int(b, 4 * align));
106 
107    nir_instr_rewrite_src(&instr->instr, &instr->src[address_index],
108                          nir_src_for_ssa(new_address));
109 }
110 
r600_lower_scratch_addresses(nir_shader * shader)111 bool r600_lower_scratch_addresses(nir_shader *shader)
112 {
113    bool progress = false;
114    nir_foreach_function(function, shader) {
115       nir_builder build;
116       nir_builder_init(&build, function->impl);
117 
118       nir_foreach_block(block, function->impl) {
119          nir_foreach_instr(instr, block) {
120             if (instr->type != nir_instr_type_intrinsic)
121                continue;
122             nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
123             if (op->intrinsic != nir_intrinsic_load_scratch &&
124                 op->intrinsic != nir_intrinsic_store_scratch)
125                continue;
126             r600_nir_lower_scratch_address_impl(&build, op);
127             progress = true;
128          }
129       }
130    }
131    return progress;
132 }
133 
134 static void
insert_uniform_sorted(struct exec_list * var_list,nir_variable * new_var)135 insert_uniform_sorted(struct exec_list *var_list, nir_variable *new_var)
136 {
137    nir_foreach_variable_in_list(var, var_list) {
138       if (var->data.binding > new_var->data.binding ||
139           (var->data.binding == new_var->data.binding &&
140            var->data.offset > new_var->data.offset)) {
141          exec_node_insert_node_before(&var->node, &new_var->node);
142          return;
143       }
144    }
145    exec_list_push_tail(var_list, &new_var->node);
146 }
147 
sort_uniforms(nir_shader * shader)148 void sort_uniforms(nir_shader *shader)
149 {
150    struct exec_list new_list;
151    exec_list_make_empty(&new_list);
152 
153    nir_foreach_uniform_variable_safe(var, shader) {
154       exec_node_remove(&var->node);
155       insert_uniform_sorted(&new_list, var);
156    }
157    exec_list_append(&shader->variables, &new_list);
158 }
159 
160 static void
insert_fsoutput_sorted(struct exec_list * var_list,nir_variable * new_var)161 insert_fsoutput_sorted(struct exec_list *var_list, nir_variable *new_var)
162 {
163 
164    nir_foreach_variable_in_list(var, var_list) {
165       if (var->data.location > new_var->data.location ||
166           (var->data.location == new_var->data.location &&
167            var->data.index > new_var->data.index)) {
168          exec_node_insert_node_before(&var->node, &new_var->node);
169          return;
170       }
171    }
172 
173    exec_list_push_tail(var_list, &new_var->node);
174 }
175 
sort_fsoutput(nir_shader * shader)176 void sort_fsoutput(nir_shader *shader)
177 {
178    struct exec_list new_list;
179    exec_list_make_empty(&new_list);
180 
181    nir_foreach_shader_out_variable_safe(var, shader) {
182       exec_node_remove(&var->node);
183       insert_fsoutput_sorted(&new_list, var);
184    }
185 
186    unsigned driver_location = 0;
187    nir_foreach_variable_in_list(var, &new_list)
188       var->data.driver_location = driver_location++;
189 
190    exec_list_append(&shader->variables, &new_list);
191 }
192 
193 class LowerClipvertexWrite : public NirLowerInstruction {
194 
195 public:
LowerClipvertexWrite(int noutputs,pipe_stream_output_info & so_info)196    LowerClipvertexWrite(int noutputs, pipe_stream_output_info& so_info) :
197       m_clipplane1(noutputs),
198       m_clipvtx(noutputs + 1),
199       m_so_info(so_info){}
200 private:
filter(const nir_instr * instr) const201    bool filter(const nir_instr *instr) const override {
202       if (instr->type != nir_instr_type_intrinsic)
203          return false;
204 
205       auto intr = nir_instr_as_intrinsic(instr);
206       if (intr->intrinsic != nir_intrinsic_store_output)
207          return false;
208 
209       return nir_intrinsic_io_semantics(intr).location == VARYING_SLOT_CLIP_VERTEX;
210    }
211 
lower(nir_instr * instr)212    nir_ssa_def *lower(nir_instr *instr) override {
213 
214       auto intr = nir_instr_as_intrinsic(instr);
215       nir_ssa_def *output[8] = {nullptr};
216 
217       // for UBO loads we correct the buffer ID by adding 1
218       auto buf_id = nir_imm_int(b, R600_BUFFER_INFO_CONST_BUFFER - 1);
219 
220       assert(intr->src[0].is_ssa);
221       auto clip_vtx = intr->src[0].ssa;
222 
223       for (int i = 0; i < 8; ++i) {
224          auto sel = nir_imm_int(b, i);
225          auto mrow = nir_load_ubo_vec4(b, 4, 32, buf_id, sel);
226          output[i] = nir_fdot4(b, clip_vtx, mrow);
227       }
228 
229       unsigned clip_vertex_index = nir_intrinsic_base(intr);
230 
231       for (int i = 0; i < 2; ++i) {
232          auto clip_i = nir_vec(b, &output[4 * i], 4);
233          auto store = nir_store_output(b, clip_i,  intr->src[1].ssa);
234          nir_intrinsic_set_write_mask(store, 0xf);
235          nir_intrinsic_set_base(store, clip_vertex_index);
236          nir_io_semantics semantic = nir_intrinsic_io_semantics(intr);
237          semantic.location = VARYING_SLOT_CLIP_DIST0 + i;
238          semantic.no_varying = 1;
239 
240          if (i > 0)
241             nir_intrinsic_set_base(store, m_clipplane1);
242          nir_intrinsic_set_write_mask(store, 0xf);
243          nir_intrinsic_set_io_semantics(store, semantic);
244       }
245       nir_intrinsic_set_base(intr, m_clipvtx);
246 
247       nir_ssa_def *result = NIR_LOWER_INSTR_PROGRESS_REPLACE;
248       for (unsigned  i = 0; i < m_so_info.num_outputs; ++i) {
249          if (m_so_info.output[i].register_index == clip_vertex_index) {
250             m_so_info.output[i].register_index = m_clipvtx;
251             result = NIR_LOWER_INSTR_PROGRESS;
252          }
253       }
254       return result;
255    }
256    int m_clipplane1;
257    int m_clipvtx;
258    pipe_stream_output_info& m_so_info;
259 };
260 
261 
262 }
263 
264 static nir_intrinsic_op
r600_map_atomic(nir_intrinsic_op op)265 r600_map_atomic(nir_intrinsic_op op)
266 {
267    switch (op) {
268    case nir_intrinsic_atomic_counter_read_deref:
269       return nir_intrinsic_atomic_counter_read;
270    case nir_intrinsic_atomic_counter_inc_deref:
271       return nir_intrinsic_atomic_counter_inc;
272    case nir_intrinsic_atomic_counter_pre_dec_deref:
273       return nir_intrinsic_atomic_counter_pre_dec;
274    case nir_intrinsic_atomic_counter_post_dec_deref:
275       return nir_intrinsic_atomic_counter_post_dec;
276    case nir_intrinsic_atomic_counter_add_deref:
277       return nir_intrinsic_atomic_counter_add;
278    case nir_intrinsic_atomic_counter_min_deref:
279       return nir_intrinsic_atomic_counter_min;
280    case nir_intrinsic_atomic_counter_max_deref:
281       return nir_intrinsic_atomic_counter_max;
282    case nir_intrinsic_atomic_counter_and_deref:
283       return nir_intrinsic_atomic_counter_and;
284    case nir_intrinsic_atomic_counter_or_deref:
285       return nir_intrinsic_atomic_counter_or;
286    case nir_intrinsic_atomic_counter_xor_deref:
287       return nir_intrinsic_atomic_counter_xor;
288    case nir_intrinsic_atomic_counter_exchange_deref:
289       return nir_intrinsic_atomic_counter_exchange;
290    case nir_intrinsic_atomic_counter_comp_swap_deref:
291       return nir_intrinsic_atomic_counter_comp_swap;
292    default:
293       return nir_num_intrinsics;
294    }
295 }
296 
297 static bool
r600_lower_deref_instr(nir_builder * b,nir_instr * instr_,UNUSED void * cb_data)298 r600_lower_deref_instr(nir_builder *b, nir_instr *instr_, UNUSED void *cb_data)
299 {
300    if (instr_->type != nir_instr_type_intrinsic)
301       return false;
302 
303    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(instr_);
304 
305    nir_intrinsic_op op = r600_map_atomic(instr->intrinsic);
306    if (nir_num_intrinsics == op)
307       return false;
308 
309    nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
310    nir_variable *var = nir_deref_instr_get_variable(deref);
311 
312    if (var->data.mode != nir_var_uniform &&
313        var->data.mode != nir_var_mem_ssbo &&
314        var->data.mode != nir_var_mem_shared)
315       return false; /* atomics passed as function arguments can't be lowered */
316 
317    const unsigned idx = var->data.binding;
318 
319    b->cursor = nir_before_instr(&instr->instr);
320 
321    nir_ssa_def *offset = nir_imm_int(b, var->data.index);
322    for (nir_deref_instr *d = deref; d->deref_type != nir_deref_type_var;
323         d = nir_deref_instr_parent(d)) {
324       assert(d->deref_type == nir_deref_type_array);
325       assert(d->arr.index.is_ssa);
326 
327       unsigned array_stride = 1;
328       if (glsl_type_is_array(d->type))
329          array_stride *= glsl_get_aoa_size(d->type);
330 
331       offset = nir_iadd(b, offset, nir_imul(b, d->arr.index.ssa,
332                                             nir_imm_int(b, array_stride)));
333    }
334 
335    /* Since the first source is a deref and the first source in the lowered
336     * instruction is the offset, we can just swap it out and change the
337     * opcode.
338     */
339    instr->intrinsic = op;
340    nir_instr_rewrite_src(&instr->instr, &instr->src[0],
341                          nir_src_for_ssa(offset));
342    nir_intrinsic_set_base(instr, idx);
343 
344    nir_deref_instr_remove_if_unused(deref);
345 
346    return true;
347 }
348 
349 
350 static bool
r600_lower_clipvertex_to_clipdist(nir_shader * sh,pipe_stream_output_info & so_info)351 r600_lower_clipvertex_to_clipdist(nir_shader *sh,
352                                   pipe_stream_output_info& so_info)
353 {
354    if (!(sh->info.outputs_written & VARYING_BIT_CLIP_VERTEX))
355       return false;
356 
357    int noutputs = util_bitcount64(sh->info.outputs_written);
358    bool result = r600::LowerClipvertexWrite(noutputs, so_info).run(sh);
359    return result;
360 }
361 
362 static bool
r600_nir_lower_atomics(nir_shader * shader)363 r600_nir_lower_atomics(nir_shader *shader)
364 {
365    /* First re-do the offsets, in Hardware we start at zero for each new
366     * binding, and we use an offset of one per counter */
367    int current_binding = -1;
368    int current_offset = 0;
369    nir_foreach_variable_with_modes(var, shader, nir_var_uniform) {
370       if (!var->type->contains_atomic())
371          continue;
372 
373       if (current_binding == (int)var->data.binding) {
374          var->data.index = current_offset;
375          current_offset += var->type->atomic_size() / ATOMIC_COUNTER_SIZE;
376       } else {
377          current_binding = var->data.binding;
378          var->data.index = 0;
379          current_offset = var->type->atomic_size() / ATOMIC_COUNTER_SIZE;
380       }
381    }
382 
383    return nir_shader_instructions_pass(shader, r600_lower_deref_instr,
384                                        nir_metadata_block_index |
385                                        nir_metadata_dominance,
386                                        NULL);
387 }
388 using r600::r600_lower_scratch_addresses;
389 using r600::r600_lower_fs_out_to_vector;
390 using r600::r600_lower_ubo_to_align16;
391 
392 int
r600_glsl_type_size(const struct glsl_type * type,bool is_bindless)393 r600_glsl_type_size(const struct glsl_type *type, bool is_bindless)
394 {
395    return glsl_count_vec4_slots(type, false, is_bindless);
396 }
397 
398 void
r600_get_natural_size_align_bytes(const struct glsl_type * type,unsigned * size,unsigned * align)399 r600_get_natural_size_align_bytes(const struct glsl_type *type,
400                                   unsigned *size, unsigned *align)
401 {
402    if (type->base_type != GLSL_TYPE_ARRAY) {
403       *align = 1;
404       *size = 1;
405    } else {
406       unsigned elem_size, elem_align;
407       glsl_get_natural_size_align_bytes(type->fields.array,
408                                         &elem_size, &elem_align);
409       *align = 1;
410       *size = type->length;
411    }
412 }
413 
414 static bool
r600_lower_shared_io_impl(nir_function * func)415 r600_lower_shared_io_impl(nir_function *func)
416 {
417    nir_builder b;
418    nir_builder_init(&b, func->impl);
419 
420    bool progress = false;
421    nir_foreach_block(block, func->impl) {
422       nir_foreach_instr_safe(instr, block) {
423 
424          if (instr->type != nir_instr_type_intrinsic)
425             continue;
426 
427          nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
428          if (op->intrinsic != nir_intrinsic_load_shared &&
429              op->intrinsic != nir_intrinsic_store_shared)
430             continue;
431 
432          b.cursor = nir_before_instr(instr);
433 
434          if (op->intrinsic == nir_intrinsic_load_shared) {
435             nir_ssa_def *addr = op->src[0].ssa;
436 
437             switch (nir_dest_num_components(op->dest)) {
438             case 2: {
439                auto addr2 = nir_iadd_imm(&b, addr, 4);
440                addr = nir_vec2(&b, addr, addr2);
441                break;
442             }
443             case 3: {
444                auto addr2 = nir_iadd(&b, addr, nir_imm_ivec2(&b, 4, 8));
445                addr = nir_vec3(&b, addr,
446                                nir_channel(&b, addr2, 0),
447                                nir_channel(&b, addr2, 1));
448                break;
449             }
450             case 4: {
451                addr = nir_iadd(&b, addr, nir_imm_ivec4(&b, 0, 4, 8, 12));
452                break;
453             }
454             }
455 
456             auto load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_local_shared_r600);
457             load->num_components = nir_dest_num_components(op->dest);
458             load->src[0] = nir_src_for_ssa(addr);
459             nir_ssa_dest_init(&load->instr, &load->dest,
460                               load->num_components, 32, NULL);
461             nir_ssa_def_rewrite_uses(&op->dest.ssa, &load->dest.ssa);
462             nir_builder_instr_insert(&b, &load->instr);
463          } else {
464             nir_ssa_def *addr = op->src[1].ssa;
465             for (int i = 0; i < 2; ++i) {
466                unsigned test_mask = (0x3 << 2 * i);
467                if (!(nir_intrinsic_write_mask(op) & test_mask))
468                   continue;
469 
470                auto store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_local_shared_r600);
471                unsigned writemask = nir_intrinsic_write_mask(op) & test_mask;
472                nir_intrinsic_set_write_mask(store, writemask);
473                store->src[0] = nir_src_for_ssa(op->src[0].ssa);
474                store->num_components = store->src[0].ssa->num_components;
475                bool start_even = (writemask & (1u << (2 * i)));
476 
477                auto addr2 = nir_iadd(&b, addr, nir_imm_int(&b, 8 * i + (start_even ? 0 : 4)));
478                store->src[1] = nir_src_for_ssa(addr2);
479 
480                nir_builder_instr_insert(&b, &store->instr);
481             }
482          }
483          nir_instr_remove(instr);
484          progress = true;
485       }
486    }
487    return progress;
488 }
489 
490 static bool
r600_lower_shared_io(nir_shader * nir)491 r600_lower_shared_io(nir_shader *nir)
492 {
493 	bool progress=false;
494 	nir_foreach_function(function, nir) {
495 		if (function->impl &&
496 			 r600_lower_shared_io_impl(function))
497 			progress = true;
498 	}
499 	return progress;
500 }
501 
502 
503 static nir_ssa_def *
r600_lower_fs_pos_input_impl(nir_builder * b,nir_instr * instr,void * _options)504 r600_lower_fs_pos_input_impl(nir_builder *b, nir_instr *instr, void *_options)
505 {
506    (void)_options;
507    auto old_ir = nir_instr_as_intrinsic(instr);
508    auto load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
509    nir_ssa_dest_init(&load->instr, &load->dest,
510                      old_ir->dest.ssa.num_components, old_ir->dest.ssa.bit_size, NULL);
511    nir_intrinsic_set_io_semantics(load, nir_intrinsic_io_semantics(old_ir));
512 
513    nir_intrinsic_set_base(load, nir_intrinsic_base(old_ir));
514    nir_intrinsic_set_component(load, nir_intrinsic_component(old_ir));
515    nir_intrinsic_set_dest_type(load, nir_type_float32);
516    load->num_components = old_ir->num_components;
517    load->src[0] = old_ir->src[1];
518    nir_builder_instr_insert(b, &load->instr);
519    return &load->dest.ssa;
520 }
521 
r600_lower_fs_pos_input_filter(const nir_instr * instr,const void * _options)522 bool r600_lower_fs_pos_input_filter(const nir_instr *instr, const void *_options)
523 {
524    (void)_options;
525 
526    if (instr->type != nir_instr_type_intrinsic)
527       return false;
528 
529    auto ir = nir_instr_as_intrinsic(instr);
530    if (ir->intrinsic != nir_intrinsic_load_interpolated_input)
531       return false;
532 
533    return nir_intrinsic_io_semantics(ir).location == VARYING_SLOT_POS;
534 }
535 
536 /* Strip the interpolator specification, it is not needed and irritates */
r600_lower_fs_pos_input(nir_shader * shader)537 bool r600_lower_fs_pos_input(nir_shader *shader)
538 {
539    return nir_shader_lower_instructions(shader,
540                                         r600_lower_fs_pos_input_filter,
541                                         r600_lower_fs_pos_input_impl,
542                                         nullptr);
543 };
544 
545 static bool
optimize_once(nir_shader * shader)546 optimize_once(nir_shader *shader)
547 {
548    bool progress = false;
549    NIR_PASS(progress, shader, nir_lower_vars_to_ssa);
550    NIR_PASS(progress, shader, nir_copy_prop);
551    NIR_PASS(progress, shader, nir_opt_dce);
552    NIR_PASS(progress, shader, nir_opt_algebraic);
553    NIR_PASS(progress, shader, nir_opt_constant_folding);
554    NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
555    NIR_PASS(progress, shader, nir_opt_remove_phis);
556 
557    if (nir_opt_trivial_continues(shader)) {
558            progress = true;
559            NIR_PASS(progress, shader, nir_copy_prop);
560            NIR_PASS(progress, shader, nir_opt_dce);
561    }
562 
563    NIR_PASS(progress, shader, nir_opt_if, nir_opt_if_optimize_phi_true_false);
564    NIR_PASS(progress, shader, nir_opt_dead_cf);
565    NIR_PASS(progress, shader, nir_opt_cse);
566    NIR_PASS(progress, shader, nir_opt_peephole_select, 200, true, true);
567 
568    NIR_PASS(progress, shader, nir_opt_conditional_discard);
569    NIR_PASS(progress, shader, nir_opt_dce);
570    NIR_PASS(progress, shader, nir_opt_undef);
571    return progress;
572 }
573 
has_saturate(const nir_function * func)574 bool has_saturate(const nir_function *func)
575 {
576    nir_foreach_block(block, func->impl) {
577       nir_foreach_instr(instr, block) {
578          if (instr->type == nir_instr_type_alu) {
579             auto alu = nir_instr_as_alu(instr);
580             if (alu->dest.saturate)
581                return true;
582          }
583       }
584    }
585    return false;
586 }
587 
r600_is_last_vertex_stage(nir_shader * nir,const r600_shader_key & key)588 static bool r600_is_last_vertex_stage(nir_shader *nir, const r600_shader_key& key)
589 {
590    if (nir->info.stage == MESA_SHADER_GEOMETRY)
591       return true;
592 
593    if (nir->info.stage == MESA_SHADER_TESS_EVAL &&
594        !key.tes.as_es)
595       return true;
596 
597    if (nir->info.stage == MESA_SHADER_VERTEX &&
598        !key.vs.as_es && !key.vs.as_ls)
599       return true;
600 
601    return false;
602 }
603 
604 extern "C"
r600_lower_to_scalar_instr_filter(const nir_instr * instr,const void *)605 bool r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *)
606 {
607    if (instr->type != nir_instr_type_alu)
608       return true;
609 
610    auto alu = nir_instr_as_alu(instr);
611    switch (alu->op) {
612    case nir_op_bany_fnequal3:
613    case nir_op_bany_fnequal4:
614    case nir_op_ball_fequal3:
615    case nir_op_ball_fequal4:
616    case nir_op_bany_inequal3:
617    case nir_op_bany_inequal4:
618    case nir_op_ball_iequal3:
619    case nir_op_ball_iequal4:
620    case nir_op_fdot2:
621    case nir_op_fdot3:
622    case nir_op_fdot4:
623       return nir_src_bit_size(alu->src[0].src) == 64;
624    case nir_op_cube_r600:
625       return false;
626    default:
627       return true;
628    }
629 }
630 
631 
632 class MallocPoolRelease {
633 public:
~MallocPoolRelease()634    ~MallocPoolRelease() {
635       r600::release_pool();
636    }
637 };
638 
r600_shader_from_nir(struct r600_context * rctx,struct r600_pipe_shader * pipeshader,r600_shader_key * key)639 int r600_shader_from_nir(struct r600_context *rctx,
640                          struct r600_pipe_shader *pipeshader,
641                          r600_shader_key *key)
642 {
643    MallocPoolRelease pool_release;
644 
645    struct r600_pipe_shader_selector *sel = pipeshader->selector;
646 
647    bool lower_64bit = (rctx->b.gfx_level < CAYMAN  &&
648                        (sel->nir->options->lower_int64_options ||
649                         sel->nir->options->lower_doubles_options) &&
650                        (sel->nir->info.bit_sizes_float | sel->nir->info.bit_sizes_int) & 64);
651 
652    if (rctx->screen->b.debug_flags & DBG_PREOPT_IR) {
653       fprintf(stderr, "PRE-OPT-NIR-----------.------------------------------\n");
654       nir_print_shader(sel->nir, stderr);
655       fprintf(stderr, "END PRE-OPT-NIR--------------------------------------\n\n");
656    }
657 
658    r600::sort_uniforms(sel->nir);
659 
660    /* Cayman seems very crashy about accessing images that don't exists or are
661     * accessed out of range, this lowering seems to help (but it can also be
662     * another problem */
663 
664    NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
665    nir_lower_idiv_options idiv_options = {0};
666    idiv_options.imprecise_32bit_lowering = sel->nir->info.stage != MESA_SHADER_COMPUTE;
667    idiv_options.allow_fp16 = true;
668 
669    NIR_PASS_V(sel->nir, nir_lower_idiv, &idiv_options);
670    NIR_PASS_V(sel->nir, r600_nir_lower_trigen, rctx->b.gfx_level);
671    NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar, false);
672    NIR_PASS_V(sel->nir, nir_lower_undef_to_zero);
673 
674    if (lower_64bit)
675       NIR_PASS_V(sel->nir, nir_lower_int64);
676    while(optimize_once(sel->nir));
677 
678    NIR_PASS_V(sel->nir, r600_lower_shared_io);
679    NIR_PASS_V(sel->nir, r600_nir_lower_atomics);
680 
681    struct nir_lower_tex_options lower_tex_options = {0};
682    lower_tex_options.lower_txp = ~0u;
683    lower_tex_options.lower_txf_offset = true;
684    lower_tex_options.lower_invalid_implicit_lod = true;
685    lower_tex_options.lower_tg4_offsets = true;
686 
687    NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
688    NIR_PASS_V(sel->nir, r600_nir_lower_txl_txf_array_or_cube);
689    NIR_PASS_V(sel->nir, r600_nir_lower_cube_to_2darray);
690 
691    NIR_PASS_V(sel->nir, r600_nir_lower_pack_unpack_2x16);
692 
693    if (sel->nir->info.stage == MESA_SHADER_VERTEX)
694       NIR_PASS_V(sel->nir, r600_vectorize_vs_inputs);
695 
696    if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
697       NIR_PASS_V(sel->nir, nir_lower_fragcoord_wtrans);
698       NIR_PASS_V(sel->nir, r600_lower_fs_out_to_vector);
699    }
700    nir_variable_mode io_modes = nir_var_uniform |
701                                 nir_var_shader_in |
702                                 nir_var_shader_out;
703 
704    NIR_PASS_V(sel->nir, nir_opt_combine_stores, nir_var_shader_out);
705    NIR_PASS_V(sel->nir, nir_lower_io, io_modes, r600_glsl_type_size,
706                  nir_lower_io_lower_64bit_to_32);
707 
708    if (sel->nir->info.stage == MESA_SHADER_FRAGMENT)
709       NIR_PASS_V(sel->nir, r600_lower_fs_pos_input);
710 
711    /**/
712    if (lower_64bit)
713       NIR_PASS_V(sel->nir, nir_lower_indirect_derefs, nir_var_function_temp, 10);
714 
715    NIR_PASS_V(sel->nir, nir_opt_constant_folding);
716    NIR_PASS_V(sel->nir, nir_io_add_const_offset_to_base, io_modes);
717 
718    NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
719    NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar, false);
720    if (lower_64bit)
721       NIR_PASS_V(sel->nir, r600::r600_nir_split_64bit_io);
722    NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
723    NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar, false);
724    NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
725    NIR_PASS_V(sel->nir, nir_copy_prop);
726    NIR_PASS_V(sel->nir, nir_opt_dce);
727 
728    auto sh = nir_shader_clone(sel->nir, sel->nir);
729 
730    if (r600_is_last_vertex_stage(sh, *key))
731       r600_lower_clipvertex_to_clipdist(sh, sel->so);
732 
733    if (sh->info.stage == MESA_SHADER_TESS_CTRL ||
734        sh->info.stage == MESA_SHADER_TESS_EVAL ||
735        (sh->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) {
736       auto prim_type = sh->info.stage == MESA_SHADER_TESS_EVAL ?
737 	 u_tess_prim_from_shader(sh->info.tess._primitive_mode) : key->tcs.prim_mode;
738       NIR_PASS_V(sh, r600_lower_tess_io, static_cast<pipe_prim_type>(prim_type));
739    }
740 
741    if (sh->info.stage == MESA_SHADER_TESS_CTRL)
742       NIR_PASS_V(sh, r600_append_tcs_TF_emission,
743                  (pipe_prim_type)key->tcs.prim_mode);
744 
745    if (sh->info.stage == MESA_SHADER_TESS_EVAL) {
746       NIR_PASS_V(sh, r600_lower_tess_coord, u_tess_prim_from_shader(sh->info.tess._primitive_mode));
747    }
748 
749    NIR_PASS_V(sh, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
750    NIR_PASS_V(sh, nir_lower_phis_to_scalar, false);
751    NIR_PASS_V(sh, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
752 
753    NIR_PASS_V(sh, r600::r600_nir_split_64bit_io);
754    NIR_PASS_V(sh, r600::r600_split_64bit_alu_and_phi);
755    NIR_PASS_V(sh, nir_split_64bit_vec3_and_vec4);
756    NIR_PASS_V(sh, nir_lower_int64);
757 
758    NIR_PASS_V(sh, nir_lower_ubo_vec4);
759 
760    if (lower_64bit)
761       NIR_PASS_V(sh, r600::r600_nir_64_to_vec2);
762 
763    NIR_PASS_V(sh, r600::r600_split_64bit_uniforms_and_ubo);
764    /* Lower to scalar to let some optimization work out better */
765    while(optimize_once(sh));
766 
767    if (lower_64bit)
768       NIR_PASS_V(sh, r600::r600_merge_vec2_stores);
769 
770    NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_in, NULL);
771    NIR_PASS_V(sh, nir_remove_dead_variables,  nir_var_shader_out, NULL);
772 
773 
774    NIR_PASS_V(sh, nir_lower_vars_to_scratch,
775               nir_var_function_temp,
776               40,
777               r600_get_natural_size_align_bytes);
778 
779    while (optimize_once(sh));
780 
781    NIR_PASS_V(sh, nir_lower_bool_to_int32);
782    NIR_PASS_V(sh, r600_nir_lower_int_tg4);
783    NIR_PASS_V(sh, nir_opt_algebraic_late);
784 
785    if (sh->info.stage == MESA_SHADER_FRAGMENT)
786       r600::sort_fsoutput(sh);
787 
788    NIR_PASS_V(sh, nir_lower_locals_to_regs);
789 
790    NIR_PASS_V(sh, nir_lower_to_source_mods,
791 	      (nir_lower_to_source_mods_flags)(nir_lower_float_source_mods |
792 					       nir_lower_64bit_source_mods));
793    NIR_PASS_V(sh, nir_convert_from_ssa, true);
794    NIR_PASS_V(sh, nir_opt_dce);
795 
796    if ((rctx->screen->b.debug_flags & DBG_NIR_PREFERRED) &&
797        (rctx->screen->b.debug_flags & DBG_ALL_SHADERS)) {
798       fprintf(stderr, "-- NIR --------------------------------------------------------\n");
799       struct nir_function *func = (struct nir_function *)exec_list_get_head(&sh->functions);
800       nir_index_ssa_defs(func->impl);
801       nir_print_shader(sh, stderr);
802       fprintf(stderr, "-- END --------------------------------------------------------\n");
803    }
804 
805    memset(&pipeshader->shader, 0, sizeof(r600_shader));
806    pipeshader->scratch_space_needed = sh->scratch_size;
807 
808    if (sh->info.stage == MESA_SHADER_TESS_EVAL ||
809        sh->info.stage == MESA_SHADER_VERTEX ||
810        sh->info.stage == MESA_SHADER_GEOMETRY) {
811       pipeshader->shader.clip_dist_write |= ((1 << sh->info.clip_distance_array_size) - 1);
812       pipeshader->shader.cull_dist_write = ((1 << sh->info.cull_distance_array_size) - 1)
813                                            << sh->info.clip_distance_array_size;
814       pipeshader->shader.cc_dist_mask = (1 <<  (sh->info.cull_distance_array_size +
815                                                 sh->info.clip_distance_array_size)) - 1;
816    }
817    struct r600_shader* gs_shader = nullptr;
818    if (rctx->gs_shader)
819       gs_shader = &rctx->gs_shader->current->shader;
820    r600_screen *rscreen = rctx->screen;
821 
822    r600::Shader *shader = r600::Shader::translate_from_nir(sh, &sel->so, gs_shader,
823                                                            *key, rctx->isa->hw_class);
824 
825    assert(shader);
826    if (!shader)
827       return -2;
828 
829    pipeshader->enabled_stream_buffers_mask = shader->enabled_stream_buffers_mask();
830    pipeshader->selector->info.file_count[TGSI_FILE_HW_ATOMIC] += shader->atomic_file_count();
831    pipeshader->selector->info.writes_memory = shader->has_flag(r600::Shader::sh_writes_memory);
832 
833    if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
834       std::cerr << "Shader after conversion from nir\n";
835       shader->print(std::cerr);
836    }
837 
838    if (!r600::sfn_log.has_debug_flag(r600::SfnLog::noopt)) {
839       optimize(*shader);
840 
841       if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
842          std::cerr << "Shader after optimization\n";
843          shader->print(std::cerr);
844       }
845    }
846 
847    auto scheduled_shader = r600::schedule(shader);
848    if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
849       std::cerr << "Shader after scheduling\n";
850       shader->print(std::cerr);
851    }
852 
853    if (!r600::sfn_log.has_debug_flag(r600::SfnLog::nomerge)) {
854 
855       if (r600::sfn_log.has_debug_flag(r600::SfnLog::merge)) {
856          r600::sfn_log << r600::SfnLog::merge << "Shader before RA\n";
857          scheduled_shader->print(std::cerr);
858       }
859 
860       r600::sfn_log << r600::SfnLog::trans << "Merge registers\n";
861       auto lrm = r600::LiveRangeEvaluator().run(*scheduled_shader);
862 
863       if (!r600::register_allocation(lrm)) {
864          R600_ERR("%s: Register allocation failed\n", __func__);
865          /* For now crash if the shader could not be benerated */
866          assert(0);
867          return -1;
868       } else if (r600::sfn_log.has_debug_flag(r600::SfnLog::merge) ||
869                  r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
870          r600::sfn_log << "Shader after RA\n";
871          scheduled_shader->print(std::cerr);
872       }
873    }
874 
875    scheduled_shader->get_shader_info(&pipeshader->shader);
876    pipeshader->shader.uses_doubles = sh->info.bit_sizes_float & 64 ? 1 : 0;
877 
878    r600_bytecode_init(&pipeshader->shader.bc, rscreen->b.gfx_level, rscreen->b.family,
879                       rscreen->has_compressed_msaa_texturing);
880 
881    r600::sfn_log << r600::SfnLog::shader_info
882                  << "pipeshader->shader.processor_type = "
883                  << pipeshader->shader.processor_type << "\n";
884 
885    pipeshader->shader.bc.type = pipeshader->shader.processor_type;
886    pipeshader->shader.bc.isa = rctx->isa;
887 
888    r600::Assembler afs(&pipeshader->shader, *key);
889    if (!afs.lower(scheduled_shader)) {
890       R600_ERR("%s: Lowering to assembly failed\n", __func__);
891 
892       scheduled_shader->print(std::cerr);
893       /* For now crash if the shader could not be benerated */
894       assert(0);
895       return -1;
896    }
897 
898    if (sh->info.stage == MESA_SHADER_GEOMETRY) {
899       r600::sfn_log << r600::SfnLog::shader_info << "Geometry shader, create copy shader\n";
900       generate_gs_copy_shader(rctx, pipeshader, &sel->so);
901       assert(pipeshader->gs_copy_shader);
902    } else {
903       r600::sfn_log << r600::SfnLog::shader_info << "This is not a Geometry shader\n";
904    }
905    return 0;
906 }
907