• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2019 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_nir_lower_fs_out_to_vector.h"
28 
29 #include "nir_builder.h"
30 #include "nir_deref.h"
31 #include "util/u_math.h"
32 
33 #include <algorithm>
34 #include <array>
35 #include <set>
36 #include <vector>
37 
38 namespace r600 {
39 
40 using std::array;
41 using std::multiset;
42 using std::vector;
43 
44 struct nir_intrinsic_instr_less {
operator ()r600::nir_intrinsic_instr_less45    bool operator()(const nir_intrinsic_instr *lhs, const nir_intrinsic_instr *rhs) const
46    {
47       nir_variable *vlhs = nir_intrinsic_get_var(lhs, 0);
48       nir_variable *vrhs = nir_intrinsic_get_var(rhs, 0);
49 
50       auto ltype = glsl_get_base_type(vlhs->type);
51       auto rtype = glsl_get_base_type(vrhs->type);
52 
53       if (ltype != rtype)
54          return ltype < rtype;
55       return vlhs->data.location < vrhs->data.location;
56    }
57 };
58 
59 class NirLowerIOToVector {
60 public:
61    NirLowerIOToVector(int base_slot);
62    bool run(nir_function_impl *shader);
63 
64 protected:
65    bool var_can_merge(const nir_variable *lhs, const nir_variable *rhs);
66    bool var_can_rewrite(nir_variable *var) const;
67    void create_new_io_vars(nir_shader *shader);
68    void create_new_io_var(nir_shader *shader, unsigned location, unsigned comps);
69 
70    nir_deref_instr *clone_deref_array(nir_builder *b,
71                                       nir_deref_instr *dst_tail,
72                                       const nir_deref_instr *src_head);
73 
74    bool vectorize_block(nir_builder *b, nir_block *block);
75    bool instr_can_rewrite(nir_instr *instr);
76    bool vec_instr_set_remove(nir_builder *b, nir_instr *instr);
77 
78    using InstrSet = multiset<nir_intrinsic_instr *, nir_intrinsic_instr_less>;
79    using InstrSubSet = std::pair<InstrSet::iterator, InstrSet::iterator>;
80 
81    bool
82    vec_instr_stack_pop(nir_builder *b, InstrSubSet& ir_set, nir_intrinsic_instr *instr);
83 
84    array<array<nir_variable *, 4>, 16> m_vars;
85    InstrSet m_block_io;
86    int m_next_index;
87 
88 private:
89    virtual nir_variable_mode get_io_mode(nir_shader *shader) const = 0;
90    virtual bool instr_can_rewrite_type(nir_intrinsic_instr *intr) const = 0;
91    virtual bool var_can_rewrite_slot(nir_variable *var) const = 0;
92    virtual void create_new_io(nir_builder *b,
93                               nir_intrinsic_instr *intr,
94                               nir_variable *var,
95                               nir_def **srcs,
96                               unsigned first_comp,
97                               unsigned num_comps) = 0;
98 
99    int m_base_slot;
100 };
101 
102 class NirLowerFSOutToVector : public NirLowerIOToVector {
103 public:
104    NirLowerFSOutToVector();
105 
106 private:
107    nir_variable_mode get_io_mode(nir_shader *shader) const override;
108    bool var_can_rewrite_slot(nir_variable *var) const override;
109    void create_new_io(nir_builder *b,
110                       nir_intrinsic_instr *intr,
111                       nir_variable *var,
112                       nir_def **srcs,
113                       unsigned first_comp,
114                       unsigned num_comps) override;
115    bool instr_can_rewrite_type(nir_intrinsic_instr *intr) const override;
116 
117    nir_def *create_combined_vector(nir_builder *b,
118                                        nir_def **srcs,
119                                        int first_comp,
120                                        int num_comp);
121 };
122 
123 bool
r600_lower_fs_out_to_vector(nir_shader * shader)124 r600_lower_fs_out_to_vector(nir_shader *shader)
125 {
126    NirLowerFSOutToVector processor;
127 
128    assert(shader->info.stage == MESA_SHADER_FRAGMENT);
129    bool progress = false;
130 
131    nir_foreach_function_impl(impl, shader) {
132       progress |= processor.run(impl);
133    }
134    return progress;
135 }
136 
NirLowerIOToVector(int base_slot)137 NirLowerIOToVector::NirLowerIOToVector(int base_slot):
138     m_next_index(0),
139     m_base_slot(base_slot)
140 {
141    for (auto& a : m_vars)
142       for (auto& aa : a)
143          aa = nullptr;
144 }
145 
146 bool
run(nir_function_impl * impl)147 NirLowerIOToVector::run(nir_function_impl *impl)
148 {
149    nir_builder b = nir_builder_create(impl);
150 
151    nir_metadata_require(impl, nir_metadata_dominance);
152    create_new_io_vars(impl->function->shader);
153 
154    bool progress = vectorize_block(&b, nir_start_block(impl));
155    if (progress) {
156       nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance);
157    } else {
158       nir_metadata_preserve(impl, nir_metadata_all);
159    }
160    return progress;
161 }
162 
163 void
create_new_io_vars(nir_shader * shader)164 NirLowerIOToVector::create_new_io_vars(nir_shader *shader)
165 {
166    nir_variable_mode mode = get_io_mode(shader);
167 
168    bool can_rewrite_vars = false;
169    nir_foreach_variable_with_modes(var, shader, mode)
170    {
171       if (var_can_rewrite(var)) {
172          can_rewrite_vars = true;
173          unsigned loc = var->data.location - m_base_slot;
174          m_vars[loc][var->data.location_frac] = var;
175       }
176    }
177 
178    if (!can_rewrite_vars)
179       return;
180 
181    /* We don't handle combining vars of different type e.g. different array
182     * lengths.
183     */
184    for (unsigned i = 0; i < 16; i++) {
185       unsigned comps = 0;
186 
187       for (unsigned j = 0; j < 3; j++) {
188          if (!m_vars[i][j])
189             continue;
190 
191          for (unsigned k = j + 1; k < 4; k++) {
192             if (!m_vars[i][k])
193                continue;
194 
195             if (!var_can_merge(m_vars[i][j], m_vars[i][k]))
196                continue;
197 
198             /* Set comps */
199             for (unsigned n = 0; n < glsl_get_components(m_vars[i][j]->type); ++n)
200                comps |= 1 << (m_vars[i][j]->data.location_frac + n);
201 
202             for (unsigned n = 0; n < glsl_get_components(m_vars[i][k]->type); ++n)
203                comps |= 1 << (m_vars[i][k]->data.location_frac + n);
204          }
205       }
206       if (comps)
207          create_new_io_var(shader, i, comps);
208    }
209 }
210 
211 bool
var_can_merge(const nir_variable * lhs,const nir_variable * rhs)212 NirLowerIOToVector::var_can_merge(const nir_variable *lhs, const nir_variable *rhs)
213 {
214    return (glsl_get_base_type(lhs->type) == glsl_get_base_type(rhs->type));
215 }
216 
217 void
create_new_io_var(nir_shader * shader,unsigned location,unsigned comps)218 NirLowerIOToVector::create_new_io_var(nir_shader *shader,
219                                       unsigned location,
220                                       unsigned comps)
221 {
222    unsigned num_comps = util_bitcount(comps);
223    assert(num_comps > 1);
224 
225    /* Note: u_bit_scan() strips a component of the comps bitfield here */
226    unsigned first_comp = u_bit_scan(&comps);
227 
228    nir_variable *var = nir_variable_clone(m_vars[location][first_comp], shader);
229    var->data.location_frac = first_comp;
230    var->type = glsl_replace_vector_type(var->type, num_comps);
231 
232    nir_shader_add_variable(shader, var);
233 
234    m_vars[location][first_comp] = var;
235 
236    while (comps) {
237       const int comp = u_bit_scan(&comps);
238       if (m_vars[location][comp]) {
239          m_vars[location][comp] = var;
240       }
241    }
242 }
243 
244 bool
var_can_rewrite(nir_variable * var) const245 NirLowerIOToVector::var_can_rewrite(nir_variable *var) const
246 {
247    /* Skip complex types we don't split in the first place */
248    if (!glsl_type_is_vector_or_scalar(glsl_without_array(var->type)))
249       return false;
250 
251    if (glsl_get_bit_size(glsl_without_array(var->type)) != 32)
252       return false;
253 
254    return var_can_rewrite_slot(var);
255 }
256 
257 bool
vectorize_block(nir_builder * b,nir_block * block)258 NirLowerIOToVector::vectorize_block(nir_builder *b, nir_block *block)
259 {
260    bool progress = false;
261 
262    nir_foreach_instr_safe(instr, block)
263    {
264       if (instr_can_rewrite(instr)) {
265          instr->index = m_next_index++;
266          nir_intrinsic_instr *ir = nir_instr_as_intrinsic(instr);
267          m_block_io.insert(ir);
268       }
269    }
270 
271    for (unsigned i = 0; i < block->num_dom_children; i++) {
272       nir_block *child = block->dom_children[i];
273       progress |= vectorize_block(b, child);
274    }
275 
276    nir_foreach_instr_reverse_safe(instr, block)
277    {
278       progress |= vec_instr_set_remove(b, instr);
279    }
280    m_block_io.clear();
281 
282    return progress;
283 }
284 
285 bool
instr_can_rewrite(nir_instr * instr)286 NirLowerIOToVector::instr_can_rewrite(nir_instr *instr)
287 {
288    if (instr->type != nir_instr_type_intrinsic)
289       return false;
290 
291    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
292 
293    if (intr->num_components > 3)
294       return false;
295 
296    return instr_can_rewrite_type(intr);
297 }
298 
299 bool
vec_instr_set_remove(nir_builder * b,nir_instr * instr)300 NirLowerIOToVector::vec_instr_set_remove(nir_builder *b, nir_instr *instr)
301 {
302    if (!instr_can_rewrite(instr))
303       return false;
304 
305    nir_intrinsic_instr *ir = nir_instr_as_intrinsic(instr);
306    auto entry = m_block_io.equal_range(ir);
307    if (entry.first != m_block_io.end()) {
308       vec_instr_stack_pop(b, entry, ir);
309    }
310    return true;
311 }
312 
313 nir_deref_instr *
clone_deref_array(nir_builder * b,nir_deref_instr * dst_tail,const nir_deref_instr * src_head)314 NirLowerIOToVector::clone_deref_array(nir_builder *b,
315                                       nir_deref_instr *dst_tail,
316                                       const nir_deref_instr *src_head)
317 {
318    const nir_deref_instr *parent = nir_deref_instr_parent(src_head);
319 
320    if (!parent)
321       return dst_tail;
322 
323    assert(src_head->deref_type == nir_deref_type_array);
324 
325    dst_tail = clone_deref_array(b, dst_tail, parent);
326 
327    return nir_build_deref_array(b, dst_tail, src_head->arr.index.ssa);
328 }
329 
NirLowerFSOutToVector()330 NirLowerFSOutToVector::NirLowerFSOutToVector():
331     NirLowerIOToVector(FRAG_RESULT_COLOR)
332 {
333 }
334 
335 bool
var_can_rewrite_slot(nir_variable * var) const336 NirLowerFSOutToVector::var_can_rewrite_slot(nir_variable *var) const
337 {
338    return ((var->data.mode == nir_var_shader_out) &&
339            ((var->data.location == FRAG_RESULT_COLOR) ||
340             ((var->data.location >= FRAG_RESULT_DATA0) &&
341              (var->data.location <= FRAG_RESULT_DATA7))));
342 }
343 
344 bool
vec_instr_stack_pop(nir_builder * b,InstrSubSet & ir_set,nir_intrinsic_instr * instr)345 NirLowerIOToVector::vec_instr_stack_pop(nir_builder *b,
346                                         InstrSubSet& ir_set,
347                                         nir_intrinsic_instr *instr)
348 {
349    vector<nir_intrinsic_instr *> ir_sorted_set(ir_set.first, ir_set.second);
350    std::sort(ir_sorted_set.begin(),
351              ir_sorted_set.end(),
352              [](const nir_intrinsic_instr *lhs, const nir_intrinsic_instr *rhs) {
353                 return lhs->instr.index > rhs->instr.index;
354              });
355 
356    nir_intrinsic_instr *intr = *ir_sorted_set.begin();
357    nir_variable *var = nir_intrinsic_get_var(intr, 0);
358 
359    unsigned loc = var->data.location - m_base_slot;
360 
361    nir_variable *new_var = m_vars[loc][var->data.location_frac];
362    unsigned num_comps = glsl_get_vector_elements(glsl_without_array(new_var->type));
363    unsigned old_num_comps = glsl_get_vector_elements(glsl_without_array(var->type));
364 
365    /* Don't bother walking the stack if this component can't be vectorised. */
366    if (old_num_comps > 3) {
367       return false;
368    }
369 
370    if (new_var == var) {
371       return false;
372    }
373 
374    b->cursor = nir_after_instr(&intr->instr);
375    nir_undef_instr *instr_undef = nir_undef_instr_create(b->shader, 1, 32);
376    nir_builder_instr_insert(b, &instr_undef->instr);
377 
378    nir_def *srcs[4];
379    for (int i = 0; i < 4; i++) {
380       srcs[i] = &instr_undef->def;
381    }
382    srcs[var->data.location_frac] = intr->src[1].ssa;
383 
384    for (auto k = ir_sorted_set.begin() + 1; k != ir_sorted_set.end(); ++k) {
385       nir_intrinsic_instr *intr2 = *k;
386       nir_variable *var2 = nir_intrinsic_get_var(intr2, 0);
387       unsigned loc2 = var->data.location - m_base_slot;
388 
389       if (m_vars[loc][var->data.location_frac] !=
390           m_vars[loc2][var2->data.location_frac]) {
391          continue;
392       }
393 
394       assert(glsl_get_vector_elements(glsl_without_array(var2->type)) < 4);
395 
396       if (srcs[var2->data.location_frac] == &instr_undef->def) {
397          assert(intr2->src[1].ssa);
398          srcs[var2->data.location_frac] = intr2->src[1].ssa;
399       }
400       nir_instr_remove(&intr2->instr);
401    }
402 
403    create_new_io(b, intr, new_var, srcs, new_var->data.location_frac, num_comps);
404    return true;
405 }
406 
407 nir_variable_mode
get_io_mode(nir_shader * shader) const408 NirLowerFSOutToVector::get_io_mode(nir_shader *shader) const
409 {
410    return nir_var_shader_out;
411 }
412 
413 void
create_new_io(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * var,nir_def ** srcs,unsigned first_comp,unsigned num_comps)414 NirLowerFSOutToVector::create_new_io(nir_builder *b,
415                                      nir_intrinsic_instr *intr,
416                                      nir_variable *var,
417                                      nir_def **srcs,
418                                      unsigned first_comp,
419                                      unsigned num_comps)
420 {
421    b->cursor = nir_before_instr(&intr->instr);
422 
423    nir_intrinsic_instr *new_intr = nir_intrinsic_instr_create(b->shader, intr->intrinsic);
424    new_intr->num_components = num_comps;
425 
426    nir_intrinsic_set_write_mask(new_intr, (1 << num_comps) - 1);
427 
428    nir_deref_instr *deref = nir_build_deref_var(b, var);
429    deref = clone_deref_array(b, deref, nir_src_as_deref(intr->src[0]));
430 
431    new_intr->src[0] = nir_src_for_ssa(&deref->def);
432    new_intr->src[1] =
433       nir_src_for_ssa(create_combined_vector(b, srcs, first_comp, num_comps));
434 
435    nir_builder_instr_insert(b, &new_intr->instr);
436 
437    /* Remove the old store intrinsic */
438    nir_instr_remove(&intr->instr);
439 }
440 
441 bool
instr_can_rewrite_type(nir_intrinsic_instr * intr) const442 NirLowerFSOutToVector::instr_can_rewrite_type(nir_intrinsic_instr *intr) const
443 {
444    if (intr->intrinsic != nir_intrinsic_store_deref)
445       return false;
446 
447    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
448    if (!nir_deref_mode_is(deref, nir_var_shader_out))
449       return false;
450 
451    return var_can_rewrite(nir_deref_instr_get_variable(deref));
452 }
453 
454 nir_def *
create_combined_vector(nir_builder * b,nir_def ** srcs,int first_comp,int num_comp)455 NirLowerFSOutToVector::create_combined_vector(nir_builder *b,
456                                               nir_def **srcs,
457                                               int first_comp,
458                                               int num_comp)
459 {
460    nir_op op;
461    switch (num_comp) {
462    case 2:
463       op = nir_op_vec2;
464       break;
465    case 3:
466       op = nir_op_vec3;
467       break;
468    case 4:
469       op = nir_op_vec4;
470       break;
471    default:
472       unreachable("combined vector must have 2 to 4 components");
473    }
474    nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
475    instr->exact = b->exact;
476 
477    int i = 0;
478    unsigned k = 0;
479    while (i < num_comp) {
480       nir_def *s = srcs[first_comp + k];
481       for (uint8_t kk = 0; kk < s->num_components && i < num_comp; ++kk) {
482          instr->src[i].src = nir_src_for_ssa(s);
483          instr->src[i].swizzle[0] = kk;
484          ++i;
485       }
486       k += s->num_components;
487    }
488 
489    nir_def_init(&instr->instr, &instr->def, num_comp, 32);
490    nir_builder_instr_insert(b, &instr->instr);
491    return &instr->def;
492 }
493 
494 } // namespace r600
495