• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2019 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_nir.h"
28 
29 #include "../r600_asm.h"
30 #include "../r600_pipe.h"
31 #include "../r600_shader.h"
32 
33 #include "nir.h"
34 #include "nir_builder.h"
35 #include "nir_intrinsics.h"
36 #include "sfn_assembler.h"
37 #include "sfn_debug.h"
38 #include "sfn_instr_tex.h"
39 #include "sfn_liverangeevaluator.h"
40 #include "sfn_nir_lower_alu.h"
41 #include "sfn_nir_lower_fs_out_to_vector.h"
42 #include "sfn_nir_lower_tex.h"
43 #include "sfn_optimizer.h"
44 #include "sfn_ra.h"
45 #include "sfn_scheduler.h"
46 #include "sfn_shader.h"
47 #include "sfn_split_address_loads.h"
48 #include "util/u_debug.h"
49 #include "util/u_prim.h"
50 
51 #include <vector>
52 
53 namespace r600 {
54 
55 using std::vector;
56 
NirLowerInstruction()57 NirLowerInstruction::NirLowerInstruction():
58     b(nullptr)
59 {
60 }
61 
62 bool
filter_instr(const nir_instr * instr,const void * data)63 NirLowerInstruction::filter_instr(const nir_instr *instr, const void *data)
64 {
65    auto me = reinterpret_cast<const NirLowerInstruction *>(data);
66    return me->filter(instr);
67 }
68 
69 nir_def *
lower_instr(nir_builder * b,nir_instr * instr,void * data)70 NirLowerInstruction::lower_instr(nir_builder *b, nir_instr *instr, void *data)
71 {
72    auto me = reinterpret_cast<NirLowerInstruction *>(data);
73    me->set_builder(b);
74    return me->lower(instr);
75 }
76 
77 bool
run(nir_shader * shader)78 NirLowerInstruction::run(nir_shader *shader)
79 {
80    return nir_shader_lower_instructions(shader, filter_instr, lower_instr, (void *)this);
81 }
82 
~AssemblyFromShader()83 AssemblyFromShader::~AssemblyFromShader() {}
84 
85 bool
lower(const Shader & ir)86 AssemblyFromShader::lower(const Shader& ir)
87 {
88    return do_lower(ir);
89 }
90 
91 static void
r600_nir_lower_scratch_address_impl(nir_builder * b,nir_intrinsic_instr * instr)92 r600_nir_lower_scratch_address_impl(nir_builder *b, nir_intrinsic_instr *instr)
93 {
94    b->cursor = nir_before_instr(&instr->instr);
95 
96    int address_index = 0;
97    int align;
98 
99    if (instr->intrinsic == nir_intrinsic_store_scratch) {
100       align = instr->src[0].ssa->num_components;
101       address_index = 1;
102    } else {
103       align = instr->def.num_components;
104    }
105 
106    nir_def *address = instr->src[address_index].ssa;
107    nir_def *new_address = nir_ishr_imm(b, address, 4 * align);
108 
109    nir_src_rewrite(&instr->src[address_index], new_address);
110 }
111 
112 bool
r600_lower_scratch_addresses(nir_shader * shader)113 r600_lower_scratch_addresses(nir_shader *shader)
114 {
115    bool progress = false;
116    nir_foreach_function_impl(impl, shader)
117    {
118       nir_builder build = nir_builder_create(impl);
119 
120       nir_foreach_block(block, impl)
121       {
122          nir_foreach_instr(instr, block)
123          {
124             if (instr->type != nir_instr_type_intrinsic)
125                continue;
126             nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
127             if (op->intrinsic != nir_intrinsic_load_scratch &&
128                 op->intrinsic != nir_intrinsic_store_scratch)
129                continue;
130             r600_nir_lower_scratch_address_impl(&build, op);
131             progress = true;
132          }
133       }
134    }
135    return progress;
136 }
137 
138 static void
insert_uniform_sorted(struct exec_list * var_list,nir_variable * new_var)139 insert_uniform_sorted(struct exec_list *var_list, nir_variable *new_var)
140 {
141    nir_foreach_variable_in_list(var, var_list)
142    {
143       if (var->data.binding > new_var->data.binding ||
144           (var->data.binding == new_var->data.binding &&
145            var->data.offset > new_var->data.offset)) {
146          exec_node_insert_node_before(&var->node, &new_var->node);
147          return;
148       }
149    }
150    exec_list_push_tail(var_list, &new_var->node);
151 }
152 
153 void
sort_uniforms(nir_shader * shader)154 sort_uniforms(nir_shader *shader)
155 {
156    struct exec_list new_list;
157    exec_list_make_empty(&new_list);
158 
159    nir_foreach_uniform_variable_safe(var, shader)
160    {
161       exec_node_remove(&var->node);
162       insert_uniform_sorted(&new_list, var);
163    }
164    exec_list_append(&shader->variables, &new_list);
165 }
166 
167 static void
insert_fsoutput_sorted(struct exec_list * var_list,nir_variable * new_var)168 insert_fsoutput_sorted(struct exec_list *var_list, nir_variable *new_var)
169 {
170 
171    nir_foreach_variable_in_list(var, var_list)
172    {
173       if ((var->data.location >= FRAG_RESULT_DATA0 ||
174           var->data.location == FRAG_RESULT_COLOR) &&
175           (new_var->data.location < FRAG_RESULT_COLOR ||
176            new_var->data.location == FRAG_RESULT_SAMPLE_MASK)) {
177          exec_node_insert_after(&var->node, &new_var->node);
178          return;
179       } else if ((new_var->data.location >= FRAG_RESULT_DATA0 ||
180                   new_var->data.location == FRAG_RESULT_COLOR) &&
181                  (var->data.location < FRAG_RESULT_COLOR ||
182                   var->data.location == FRAG_RESULT_SAMPLE_MASK)) {
183          exec_node_insert_node_before(&var->node, &new_var->node);
184          return;
185       } else if (var->data.location > new_var->data.location ||
186           (var->data.location == new_var->data.location &&
187            var->data.index > new_var->data.index)) {
188          exec_node_insert_node_before(&var->node, &new_var->node);
189          return;
190       }
191    }
192 
193    exec_list_push_tail(var_list, &new_var->node);
194 }
195 
196 void
sort_fsoutput(nir_shader * shader)197 sort_fsoutput(nir_shader *shader)
198 {
199    struct exec_list new_list;
200    exec_list_make_empty(&new_list);
201 
202    nir_foreach_shader_out_variable_safe(var, shader)
203    {
204       exec_node_remove(&var->node);
205       insert_fsoutput_sorted(&new_list, var);
206    }
207 
208    unsigned driver_location = 0;
209    nir_foreach_variable_in_list(var, &new_list) var->data.driver_location =
210       driver_location++;
211 
212    exec_list_append(&shader->variables, &new_list);
213 }
214 
215 class LowerClipvertexWrite : public NirLowerInstruction {
216 
217 public:
LowerClipvertexWrite(int noutputs,pipe_stream_output_info & so_info)218    LowerClipvertexWrite(int noutputs, pipe_stream_output_info& so_info):
219        m_clipplane1(noutputs),
220        m_clipvtx(noutputs + 1),
221        m_so_info(so_info)
222    {
223    }
224 
225 private:
filter(const nir_instr * instr) const226    bool filter(const nir_instr *instr) const override
227    {
228       if (instr->type != nir_instr_type_intrinsic)
229          return false;
230 
231       auto intr = nir_instr_as_intrinsic(instr);
232       if (intr->intrinsic != nir_intrinsic_store_output)
233          return false;
234 
235       return nir_intrinsic_io_semantics(intr).location == VARYING_SLOT_CLIP_VERTEX;
236    }
237 
lower(nir_instr * instr)238    nir_def *lower(nir_instr *instr) override
239    {
240 
241       auto intr = nir_instr_as_intrinsic(instr);
242       nir_def *output[8] = {nullptr};
243 
244       auto buf_id = nir_imm_int(b, R600_BUFFER_INFO_CONST_BUFFER);
245 
246       auto clip_vtx = intr->src[0].ssa;
247 
248       for (int i = 0; i < 8; ++i) {
249          auto sel = nir_imm_int(b, i);
250          auto mrow = nir_load_ubo_vec4(b, 4, 32, buf_id, sel);
251          output[i] = nir_fdot4(b, clip_vtx, mrow);
252       }
253 
254       unsigned clip_vertex_index = nir_intrinsic_base(intr);
255 
256       for (int i = 0; i < 2; ++i) {
257          auto clip_i = nir_vec(b, &output[4 * i], 4);
258          auto store = nir_store_output(b, clip_i, intr->src[1].ssa);
259          nir_intrinsic_set_write_mask(store, 0xf);
260          nir_intrinsic_set_base(store, clip_vertex_index);
261          nir_io_semantics semantic = nir_intrinsic_io_semantics(intr);
262          semantic.location = VARYING_SLOT_CLIP_DIST0 + i;
263          semantic.no_varying = 1;
264 
265          if (i > 0)
266             nir_intrinsic_set_base(store, m_clipplane1);
267          nir_intrinsic_set_write_mask(store, 0xf);
268          nir_intrinsic_set_io_semantics(store, semantic);
269       }
270       nir_intrinsic_set_base(intr, m_clipvtx);
271 
272       nir_def *result = NIR_LOWER_INSTR_PROGRESS_REPLACE;
273       for (unsigned i = 0; i < m_so_info.num_outputs; ++i) {
274          if (m_so_info.output[i].register_index == clip_vertex_index) {
275             m_so_info.output[i].register_index = m_clipvtx;
276             result = NIR_LOWER_INSTR_PROGRESS;
277          }
278       }
279       return result;
280    }
281    int m_clipplane1;
282    int m_clipvtx;
283    pipe_stream_output_info& m_so_info;
284 };
285 
286 /* lower_uniforms_to_ubo adds a 1 to the UBO buffer ID.
287  * If the buffer ID is a non-constant value we end up
288  * with "iadd bufid, 1", bot on r600 we can put that constant
289  * "1" as constant cache ID into the CF instruction and don't need
290  * to execute that extra ADD op, so eliminate the addition here
291  * again and move the buffer base ID into the base value of
292  * the intrinsic that is not used otherwise */
293 class OptIndirectUBOLoads : public NirLowerInstruction {
294 private:
filter(const nir_instr * instr) const295    bool filter(const nir_instr *instr) const override
296    {
297       if (instr->type != nir_instr_type_intrinsic)
298          return false;
299 
300       auto intr = nir_instr_as_intrinsic(instr);
301       if (intr->intrinsic != nir_intrinsic_load_ubo_vec4)
302          return false;
303 
304       if (nir_src_as_const_value(intr->src[0]) != nullptr)
305          return false;
306 
307       return nir_intrinsic_base(intr) == 0;
308    }
309 
lower(nir_instr * instr)310    nir_def *lower(nir_instr *instr) override
311    {
312       auto intr = nir_instr_as_intrinsic(instr);
313       assert(intr->intrinsic == nir_intrinsic_load_ubo_vec4);
314 
315       auto parent = intr->src[0].ssa->parent_instr;
316 
317       if (parent->type != nir_instr_type_alu)
318          return nullptr;
319 
320       auto alu = nir_instr_as_alu(parent);
321 
322       if (alu->op != nir_op_iadd)
323          return nullptr;
324 
325       int new_base = 0;
326       nir_src *new_bufid = nullptr;
327       auto src0 = nir_src_as_const_value(alu->src[0].src);
328       if (src0) {
329          new_bufid = &alu->src[1].src;
330          new_base = src0->i32;
331       } else if (auto src1 = nir_src_as_const_value(alu->src[1].src)) {
332          new_bufid = &alu->src[0].src;
333          new_base = src1->i32;
334       } else {
335          return nullptr;
336       }
337 
338       nir_intrinsic_set_base(intr, new_base);
339       nir_src_rewrite(&intr->src[0], new_bufid->ssa);
340       return &intr->def;
341    }
342 };
343 
344 } // namespace r600
345 
346 static nir_intrinsic_op
r600_map_atomic(nir_intrinsic_op op)347 r600_map_atomic(nir_intrinsic_op op)
348 {
349    switch (op) {
350    case nir_intrinsic_atomic_counter_read_deref:
351       return nir_intrinsic_atomic_counter_read;
352    case nir_intrinsic_atomic_counter_inc_deref:
353       return nir_intrinsic_atomic_counter_inc;
354    case nir_intrinsic_atomic_counter_pre_dec_deref:
355       return nir_intrinsic_atomic_counter_pre_dec;
356    case nir_intrinsic_atomic_counter_post_dec_deref:
357       return nir_intrinsic_atomic_counter_post_dec;
358    case nir_intrinsic_atomic_counter_add_deref:
359       return nir_intrinsic_atomic_counter_add;
360    case nir_intrinsic_atomic_counter_min_deref:
361       return nir_intrinsic_atomic_counter_min;
362    case nir_intrinsic_atomic_counter_max_deref:
363       return nir_intrinsic_atomic_counter_max;
364    case nir_intrinsic_atomic_counter_and_deref:
365       return nir_intrinsic_atomic_counter_and;
366    case nir_intrinsic_atomic_counter_or_deref:
367       return nir_intrinsic_atomic_counter_or;
368    case nir_intrinsic_atomic_counter_xor_deref:
369       return nir_intrinsic_atomic_counter_xor;
370    case nir_intrinsic_atomic_counter_exchange_deref:
371       return nir_intrinsic_atomic_counter_exchange;
372    case nir_intrinsic_atomic_counter_comp_swap_deref:
373       return nir_intrinsic_atomic_counter_comp_swap;
374    default:
375       return nir_num_intrinsics;
376    }
377 }
378 
379 static bool
r600_lower_deref_instr(nir_builder * b,nir_intrinsic_instr * instr,UNUSED void * cb_data)380 r600_lower_deref_instr(nir_builder *b, nir_intrinsic_instr *instr,
381                        UNUSED void *cb_data)
382 {
383    nir_intrinsic_op op = r600_map_atomic(instr->intrinsic);
384    if (nir_num_intrinsics == op)
385       return false;
386 
387    nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
388    nir_variable *var = nir_deref_instr_get_variable(deref);
389 
390    if (var->data.mode != nir_var_uniform && var->data.mode != nir_var_mem_ssbo &&
391        var->data.mode != nir_var_mem_shared)
392       return false; /* atomics passed as function arguments can't be lowered */
393 
394    const unsigned idx = var->data.binding;
395 
396    b->cursor = nir_before_instr(&instr->instr);
397 
398    nir_def *offset = nir_imm_int(b, 0);
399    for (nir_deref_instr *d = deref; d->deref_type != nir_deref_type_var;
400         d = nir_deref_instr_parent(d)) {
401       assert(d->deref_type == nir_deref_type_array);
402 
403       unsigned array_stride = 1;
404       if (glsl_type_is_array(d->type))
405          array_stride *= glsl_get_aoa_size(d->type);
406 
407       offset =
408          nir_iadd(b, offset, nir_imul_imm(b, d->arr.index.ssa, array_stride));
409    }
410 
411    /* Since the first source is a deref and the first source in the lowered
412     * instruction is the offset, we can just swap it out and change the
413     * opcode.
414     */
415    instr->intrinsic = op;
416    nir_src_rewrite(&instr->src[0], offset);
417    nir_intrinsic_set_base(instr, idx);
418    nir_intrinsic_set_range_base(instr, var->data.index);
419 
420    nir_deref_instr_remove_if_unused(deref);
421 
422    return true;
423 }
424 
425 static bool
r600_lower_clipvertex_to_clipdist(nir_shader * sh,pipe_stream_output_info & so_info)426 r600_lower_clipvertex_to_clipdist(nir_shader *sh, pipe_stream_output_info& so_info)
427 {
428    if (!(sh->info.outputs_written & VARYING_BIT_CLIP_VERTEX))
429       return false;
430 
431    int noutputs = util_bitcount64(sh->info.outputs_written);
432    bool result = r600::LowerClipvertexWrite(noutputs, so_info).run(sh);
433    return result;
434 }
435 
436 static bool
r600_nir_lower_atomics(nir_shader * shader)437 r600_nir_lower_atomics(nir_shader *shader)
438 {
439    /* In Hardware we start at a zero index for each new
440     * binding, and we use an offset of one per counter. We also
441     * need to sort the atomics according to binding and offset. */
442    std::map<unsigned, unsigned> binding_offset;
443    std::map<unsigned, nir_variable *> sorted_var;
444 
445    nir_foreach_variable_with_modes_safe(var, shader, nir_var_uniform) {
446       if (glsl_contains_atomic(var->type)) {
447          sorted_var[(var->data.binding << 16) | var->data.offset] = var;
448          exec_node_remove(&var->node);
449       }
450    }
451 
452    for (auto& [dummy, var] : sorted_var) {
453       auto iindex = binding_offset.find(var->data.binding);
454       unsigned offset_update = glsl_atomic_size(var->type) / 4; /* ATOMIC_COUNTER_SIZE */
455       if (iindex == binding_offset.end()) {
456          var->data.index = 0;
457          binding_offset[var->data.binding] = offset_update;
458       } else {
459          var->data.index = iindex->second;
460          iindex->second += offset_update;
461       }
462       shader->variables.push_tail(&var->node);
463    }
464 
465    return nir_shader_intrinsics_pass(shader, r600_lower_deref_instr,
466                                      nir_metadata_block_index | nir_metadata_dominance,
467                                      NULL);
468 }
469 using r600::r600_lower_fs_out_to_vector;
470 using r600::r600_lower_scratch_addresses;
471 using r600::r600_lower_ubo_to_align16;
472 
473 int
r600_glsl_type_size(const struct glsl_type * type,bool is_bindless)474 r600_glsl_type_size(const struct glsl_type *type, bool is_bindless)
475 {
476    return glsl_count_vec4_slots(type, false, is_bindless);
477 }
478 
479 void
r600_get_natural_size_align_bytes(const struct glsl_type * type,unsigned * size,unsigned * align)480 r600_get_natural_size_align_bytes(const struct glsl_type *type,
481                                   unsigned *size,
482                                   unsigned *align)
483 {
484    if (type->base_type != GLSL_TYPE_ARRAY) {
485       *align = 1;
486       *size = 1;
487    } else {
488       unsigned elem_size, elem_align;
489       glsl_get_natural_size_align_bytes(type->fields.array, &elem_size, &elem_align);
490       *align = 1;
491       *size = type->length;
492    }
493 }
494 
495 static bool
r600_lower_shared_io_impl(nir_function_impl * impl)496 r600_lower_shared_io_impl(nir_function_impl *impl)
497 {
498    nir_builder b = nir_builder_create(impl);
499 
500    bool progress = false;
501    nir_foreach_block(block, impl)
502    {
503       nir_foreach_instr_safe(instr, block)
504       {
505 
506          if (instr->type != nir_instr_type_intrinsic)
507             continue;
508 
509          nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
510          if (op->intrinsic != nir_intrinsic_load_shared &&
511              op->intrinsic != nir_intrinsic_store_shared)
512             continue;
513 
514          b.cursor = nir_before_instr(instr);
515 
516          if (op->intrinsic == nir_intrinsic_load_shared) {
517             nir_def *addr = op->src[0].ssa;
518 
519             switch (op->def.num_components) {
520             case 2: {
521                auto addr2 = nir_iadd_imm(&b, addr, 4);
522                addr = nir_vec2(&b, addr, addr2);
523                break;
524             }
525             case 3: {
526                auto addr2 = nir_iadd(&b, addr, nir_imm_ivec2(&b, 4, 8));
527                addr =
528                   nir_vec3(&b, addr, nir_channel(&b, addr2, 0), nir_channel(&b, addr2, 1));
529                break;
530             }
531             case 4: {
532                addr = nir_iadd(&b, addr, nir_imm_ivec4(&b, 0, 4, 8, 12));
533                break;
534             }
535             }
536 
537             auto load =
538                nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_local_shared_r600);
539             load->num_components = op->def.num_components;
540             load->src[0] = nir_src_for_ssa(addr);
541             nir_def_init(&load->instr, &load->def, load->num_components,
542                          32);
543             nir_def_rewrite_uses(&op->def, &load->def);
544             nir_builder_instr_insert(&b, &load->instr);
545          } else {
546             nir_def *addr = op->src[1].ssa;
547             for (int i = 0; i < 2; ++i) {
548                unsigned test_mask = (0x3 << 2 * i);
549                if (!(nir_intrinsic_write_mask(op) & test_mask))
550                   continue;
551 
552                auto store =
553                   nir_intrinsic_instr_create(b.shader,
554                                              nir_intrinsic_store_local_shared_r600);
555                unsigned writemask = nir_intrinsic_write_mask(op) & test_mask;
556                nir_intrinsic_set_write_mask(store, writemask);
557                store->src[0] = nir_src_for_ssa(op->src[0].ssa);
558                store->num_components = store->src[0].ssa->num_components;
559                bool start_even = (writemask & (1u << (2 * i)));
560 
561                auto addr2 =
562                   nir_iadd_imm(&b, addr, 8 * i + (start_even ? 0 : 4));
563                store->src[1] = nir_src_for_ssa(addr2);
564 
565                nir_builder_instr_insert(&b, &store->instr);
566             }
567          }
568          nir_instr_remove(instr);
569          progress = true;
570       }
571    }
572    return progress;
573 }
574 
575 static bool
r600_lower_shared_io(nir_shader * nir)576 r600_lower_shared_io(nir_shader *nir)
577 {
578    bool progress = false;
579    nir_foreach_function_impl(impl, nir)
580    {
581       if (r600_lower_shared_io_impl(impl))
582          progress = true;
583    }
584    return progress;
585 }
586 
587 static nir_def *
r600_lower_fs_pos_input_impl(nir_builder * b,nir_instr * instr,void * _options)588 r600_lower_fs_pos_input_impl(nir_builder *b, nir_instr *instr, void *_options)
589 {
590    (void)_options;
591    auto old_ir = nir_instr_as_intrinsic(instr);
592    auto load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
593    nir_def_init(&load->instr, &load->def,
594                 old_ir->def.num_components, old_ir->def.bit_size);
595    nir_intrinsic_set_io_semantics(load, nir_intrinsic_io_semantics(old_ir));
596 
597    nir_intrinsic_set_base(load, nir_intrinsic_base(old_ir));
598    nir_intrinsic_set_component(load, nir_intrinsic_component(old_ir));
599    nir_intrinsic_set_dest_type(load, nir_type_float32);
600    load->num_components = old_ir->num_components;
601    load->src[0] = old_ir->src[1];
602    nir_builder_instr_insert(b, &load->instr);
603    return &load->def;
604 }
605 
606 bool
r600_lower_fs_pos_input_filter(const nir_instr * instr,const void * _options)607 r600_lower_fs_pos_input_filter(const nir_instr *instr, const void *_options)
608 {
609    (void)_options;
610 
611    if (instr->type != nir_instr_type_intrinsic)
612       return false;
613 
614    auto ir = nir_instr_as_intrinsic(instr);
615    if (ir->intrinsic != nir_intrinsic_load_interpolated_input)
616       return false;
617 
618    return nir_intrinsic_io_semantics(ir).location == VARYING_SLOT_POS;
619 }
620 
621 /* Strip the interpolator specification, it is not needed and irritates */
622 bool
r600_lower_fs_pos_input(nir_shader * shader)623 r600_lower_fs_pos_input(nir_shader *shader)
624 {
625    return nir_shader_lower_instructions(shader,
626                                         r600_lower_fs_pos_input_filter,
627                                         r600_lower_fs_pos_input_impl,
628                                         nullptr);
629 };
630 
631 bool
r600_opt_indirect_fbo_loads(nir_shader * shader)632 r600_opt_indirect_fbo_loads(nir_shader *shader)
633 {
634    return r600::OptIndirectUBOLoads().run(shader);
635 }
636 
637 static bool
optimize_once(nir_shader * shader)638 optimize_once(nir_shader *shader)
639 {
640    bool progress = false;
641    NIR_PASS(progress, shader, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
642    NIR_PASS(progress, shader, nir_lower_vars_to_ssa);
643    NIR_PASS(progress, shader, nir_copy_prop);
644    NIR_PASS(progress, shader, nir_opt_dce);
645    NIR_PASS(progress, shader, nir_opt_algebraic);
646    NIR_PASS(progress, shader, nir_opt_constant_folding);
647    NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
648    NIR_PASS(progress, shader, nir_opt_remove_phis);
649 
650    if (nir_opt_loop(shader)) {
651       progress = true;
652       NIR_PASS(progress, shader, nir_copy_prop);
653       NIR_PASS(progress, shader, nir_opt_dce);
654    }
655 
656    NIR_PASS(progress, shader, nir_opt_if, nir_opt_if_optimize_phi_true_false);
657    NIR_PASS(progress, shader, nir_opt_dead_cf);
658    NIR_PASS(progress, shader, nir_opt_cse);
659    NIR_PASS(progress, shader, nir_opt_peephole_select, 200, true, true);
660 
661    NIR_PASS(progress, shader, nir_opt_conditional_discard);
662    NIR_PASS(progress, shader, nir_opt_dce);
663    NIR_PASS(progress, shader, nir_opt_undef);
664    NIR_PASS(progress, shader, nir_opt_loop_unroll);
665    return progress;
666 }
667 
668 static bool
r600_is_last_vertex_stage(nir_shader * nir,const r600_shader_key & key)669 r600_is_last_vertex_stage(nir_shader *nir, const r600_shader_key& key)
670 {
671    if (nir->info.stage == MESA_SHADER_GEOMETRY)
672       return true;
673 
674    if (nir->info.stage == MESA_SHADER_TESS_EVAL && !key.tes.as_es)
675       return true;
676 
677    if (nir->info.stage == MESA_SHADER_VERTEX && !key.vs.as_es && !key.vs.as_ls)
678       return true;
679 
680    return false;
681 }
682 
683 extern "C" bool
r600_lower_to_scalar_instr_filter(const nir_instr * instr,const void *)684 r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *)
685 {
686    if (instr->type != nir_instr_type_alu)
687       return true;
688 
689    auto alu = nir_instr_as_alu(instr);
690    switch (alu->op) {
691    case nir_op_bany_fnequal3:
692    case nir_op_bany_fnequal4:
693    case nir_op_ball_fequal3:
694    case nir_op_ball_fequal4:
695    case nir_op_bany_inequal3:
696    case nir_op_bany_inequal4:
697    case nir_op_ball_iequal3:
698    case nir_op_ball_iequal4:
699    case nir_op_fdot2:
700    case nir_op_fdot3:
701    case nir_op_fdot4:
702    case nir_op_fddx:
703    case nir_op_fddx_coarse:
704    case nir_op_fddx_fine:
705    case nir_op_fddy:
706    case nir_op_fddy_coarse:
707    case nir_op_fddy_fine:
708       return nir_src_bit_size(alu->src[0].src) == 64;
709    default:
710       return true;
711    }
712 }
713 
714 void
r600_finalize_nir_common(nir_shader * nir,enum amd_gfx_level gfx_level)715 r600_finalize_nir_common(nir_shader *nir, enum amd_gfx_level gfx_level)
716 {
717    const int nir_lower_flrp_mask = 16 | 32 | 64;
718 
719    NIR_PASS_V(nir, nir_lower_flrp, nir_lower_flrp_mask, false);
720 
721    nir_lower_idiv_options idiv_options = {0};
722    NIR_PASS_V(nir, nir_lower_idiv, &idiv_options);
723 
724    NIR_PASS_V(nir, r600_nir_lower_trigen, gfx_level);
725    NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
726    NIR_PASS_V(nir, nir_lower_undef_to_zero);
727 
728    struct nir_lower_tex_options lower_tex_options = {0};
729    lower_tex_options.lower_txp = ~0u;
730    lower_tex_options.lower_txf_offset = true;
731    lower_tex_options.lower_invalid_implicit_lod = true;
732    lower_tex_options.lower_tg4_offsets = true;
733 
734    NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
735    NIR_PASS_V(nir, r600_nir_lower_txl_txf_array_or_cube);
736    NIR_PASS_V(nir, r600_nir_lower_cube_to_2darray);
737 
738    NIR_PASS_V(nir, r600_nir_lower_pack_unpack_2x16);
739 
740    NIR_PASS_V(nir, r600_lower_shared_io);
741    NIR_PASS_V(nir, r600_nir_lower_atomics);
742 
743    if (gfx_level == CAYMAN)
744       NIR_PASS_V(nir, r600_legalize_image_load_store);
745 
746    while (optimize_once(nir))
747       ;
748 }
749 
750 DEBUG_GET_ONCE_NUM_OPTION(skip_opt_start, "R600_SFN_SKIP_OPT_START", -1);
751 DEBUG_GET_ONCE_NUM_OPTION(skip_opt_end, "R600_SFN_SKIP_OPT_END", -1);
752 
753 void
r600_lower_and_optimize_nir(nir_shader * sh,const union r600_shader_key * key,enum amd_gfx_level gfx_level,struct pipe_stream_output_info * so_info)754 r600_lower_and_optimize_nir(nir_shader *sh,
755                             const union r600_shader_key *key,
756                             enum amd_gfx_level gfx_level,
757                             struct pipe_stream_output_info *so_info)
758 {
759    bool lower_64bit =
760       gfx_level < CAYMAN &&
761       (sh->options->lower_int64_options || sh->options->lower_doubles_options) &&
762       ((sh->info.bit_sizes_float | sh->info.bit_sizes_int) & 64);
763 
764    r600::sort_uniforms(sh);
765    NIR_PASS_V(sh, r600_nir_fix_kcache_indirect_access);
766 
767    while (optimize_once(sh))
768       ;
769 
770 
771    if (sh->info.stage == MESA_SHADER_VERTEX)
772       NIR_PASS_V(sh, r600_vectorize_vs_inputs);
773 
774    if (sh->info.stage == MESA_SHADER_FRAGMENT) {
775       NIR_PASS_V(sh, nir_lower_fragcoord_wtrans);
776       NIR_PASS_V(sh, r600_lower_fs_out_to_vector);
777       NIR_PASS_V(sh, nir_opt_dce);
778       NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_out, 0);
779       r600::sort_fsoutput(sh);
780    }
781    nir_variable_mode io_modes = nir_var_uniform | nir_var_shader_in | nir_var_shader_out;
782 
783    NIR_PASS_V(sh, nir_opt_combine_stores, nir_var_shader_out);
784    NIR_PASS_V(sh,
785               nir_lower_io,
786               io_modes,
787               r600_glsl_type_size,
788               nir_lower_io_lower_64bit_to_32);
789 
790    if (sh->info.stage == MESA_SHADER_FRAGMENT)
791       NIR_PASS_V(sh, r600_lower_fs_pos_input);
792 
793    /**/
794    if (lower_64bit)
795       NIR_PASS_V(sh, nir_lower_indirect_derefs, nir_var_function_temp, 10);
796 
797    NIR_PASS_V(sh, nir_opt_constant_folding);
798    NIR_PASS_V(sh, nir_io_add_const_offset_to_base, io_modes);
799 
800    NIR_PASS_V(sh, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
801    NIR_PASS_V(sh, nir_lower_phis_to_scalar, false);
802    if (lower_64bit)
803       NIR_PASS_V(sh, r600::r600_nir_split_64bit_io);
804    NIR_PASS_V(sh, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
805    NIR_PASS_V(sh, nir_lower_phis_to_scalar, false);
806    NIR_PASS_V(sh, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
807    NIR_PASS_V(sh, nir_copy_prop);
808    NIR_PASS_V(sh, nir_opt_dce);
809 
810 
811 
812    if (r600_is_last_vertex_stage(sh, *key))
813       r600_lower_clipvertex_to_clipdist(sh, *so_info);
814 
815    if (sh->info.stage == MESA_SHADER_TESS_CTRL ||
816        sh->info.stage == MESA_SHADER_TESS_EVAL ||
817        (sh->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) {
818       auto prim_type = sh->info.stage == MESA_SHADER_TESS_EVAL
819                           ? u_tess_prim_from_shader(sh->info.tess._primitive_mode)
820                           : (mesa_prim)key->tcs.prim_mode;
821       NIR_PASS_V(sh, r600_lower_tess_io, static_cast<mesa_prim>(prim_type));
822    }
823 
824    if (sh->info.stage == MESA_SHADER_TESS_CTRL)
825       NIR_PASS_V(sh, r600_append_tcs_TF_emission, (mesa_prim)key->tcs.prim_mode);
826 
827    if (sh->info.stage == MESA_SHADER_TESS_EVAL) {
828       NIR_PASS_V(sh, nir_lower_tess_coord_z,
829                  sh->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES);
830    }
831 
832    NIR_PASS_V(sh, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
833    NIR_PASS_V(sh, nir_lower_phis_to_scalar, false);
834    NIR_PASS_V(sh, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
835    NIR_PASS_V(sh, r600_nir_lower_int_tg4);
836    NIR_PASS_V(sh, r600::r600_nir_lower_tex_to_backend, gfx_level);
837 
838    if ((sh->info.bit_sizes_float | sh->info.bit_sizes_int) & 64) {
839       NIR_PASS_V(sh, r600::r600_nir_split_64bit_io);
840       NIR_PASS_V(sh, r600::r600_split_64bit_alu_and_phi);
841       NIR_PASS_V(sh, nir_split_64bit_vec3_and_vec4);
842       NIR_PASS_V(sh, nir_lower_int64);
843    }
844 
845    NIR_PASS_V(sh, nir_lower_ubo_vec4);
846    NIR_PASS_V(sh, r600_opt_indirect_fbo_loads);
847 
848    if (lower_64bit)
849       NIR_PASS_V(sh, r600::r600_nir_64_to_vec2);
850 
851    if ((sh->info.bit_sizes_float | sh->info.bit_sizes_int) & 64)
852       NIR_PASS_V(sh, r600::r600_split_64bit_uniforms_and_ubo);
853 
854    /* Lower to scalar to let some optimization work out better */
855    while (optimize_once(sh))
856       ;
857 
858    if (lower_64bit)
859       NIR_PASS_V(sh, r600::r600_merge_vec2_stores);
860 
861    NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_in, NULL);
862    NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_out, NULL);
863 
864    NIR_PASS_V(sh,
865               nir_lower_vars_to_scratch,
866               nir_var_function_temp,
867               40,
868               r600_get_natural_size_align_bytes);
869 
870    while (optimize_once(sh))
871       ;
872 
873    if ((sh->info.bit_sizes_float | sh->info.bit_sizes_int) & 64)
874       NIR_PASS_V(sh, r600::r600_split_64bit_alu_and_phi);
875 
876    bool late_algebraic_progress;
877    do {
878       late_algebraic_progress = false;
879       NIR_PASS(late_algebraic_progress, sh, nir_opt_algebraic_late);
880       NIR_PASS(late_algebraic_progress, sh, nir_opt_constant_folding);
881       NIR_PASS(late_algebraic_progress, sh, nir_copy_prop);
882       NIR_PASS(late_algebraic_progress, sh, nir_opt_dce);
883       NIR_PASS(late_algebraic_progress, sh, nir_opt_cse);
884    } while (late_algebraic_progress);
885 
886    NIR_PASS_V(sh, nir_lower_bool_to_int32);
887 
888    NIR_PASS_V(sh, nir_lower_locals_to_regs, 32);
889    NIR_PASS_V(sh, nir_convert_from_ssa, true);
890    NIR_PASS_V(sh, nir_opt_dce);
891 }
892 
893 void
r600_finalize_and_optimize_shader(r600::Shader * shader)894 r600_finalize_and_optimize_shader(r600::Shader *shader)
895 {
896    if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
897       std::cerr << "Shader after conversion from nir\n";
898       shader->print(std::cerr);
899    }
900 
901    auto sfn_skip_opt_start = debug_get_option_skip_opt_start();
902    auto sfn_skip_opt_end = debug_get_option_skip_opt_end();
903    bool skip_shader_opt_per_id = sfn_skip_opt_start >= 0 &&
904                                  sfn_skip_opt_start <= shader->shader_id() &&
905                                  sfn_skip_opt_end >= shader->shader_id();
906 
907    bool skip_shader_opt = r600::sfn_log.has_debug_flag(r600::SfnLog::noopt) ||
908                           skip_shader_opt_per_id;
909 
910    if (!skip_shader_opt) {
911       optimize(*shader);
912       if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
913          std::cerr << "Shader after optimization\n";
914          shader->print(std::cerr);
915       }
916    }
917 
918    split_address_loads(*shader);
919 
920    if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
921       std::cerr << "Shader after splitting address loads\n";
922       shader->print(std::cerr);
923    }
924 
925    if (!skip_shader_opt) {
926       optimize(*shader);
927       if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
928          std::cerr << "Shader after optimization\n";
929          shader->print(std::cerr);
930       }
931    }
932 }
933 
934 r600::Shader *
r600_schedule_shader(r600::Shader * shader)935 r600_schedule_shader(r600::Shader *shader)
936 {
937    auto scheduled_shader = r600::schedule(shader);
938    if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
939       std::cerr << "Shader after scheduling\n";
940       scheduled_shader->print(std::cerr);
941    }
942 
943    if (!r600::sfn_log.has_debug_flag(r600::SfnLog::nomerge)) {
944 
945       if (r600::sfn_log.has_debug_flag(r600::SfnLog::merge)) {
946          r600::sfn_log << r600::SfnLog::merge << "Shader before RA\n";
947          scheduled_shader->print(std::cerr);
948       }
949 
950       r600::sfn_log << r600::SfnLog::trans << "Merge registers\n";
951       auto lrm = r600::LiveRangeEvaluator().run(*scheduled_shader);
952 
953       if (!r600::register_allocation(lrm)) {
954          R600_ERR("%s: Register allocation failed\n", __func__);
955          /* For now crash if the shader could not be benerated */
956          assert(0);
957          return nullptr;
958       } else if (r600::sfn_log.has_debug_flag(r600::SfnLog::merge) ||
959                  r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
960          r600::sfn_log << "Shader after RA\n";
961          scheduled_shader->print(std::cerr);
962       }
963    }
964 
965    return scheduled_shader;
966 }
967