• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /* These passes enable converting uniforms to literals when it's profitable,
25  * effectively inlining uniform values in the IR. The main benefit is register
26  * usage decrease leading to better SMT (hyperthreading). It's accomplished
27  * by targetting uniforms that determine whether a conditional branch is
28  * taken.
29  *
30  * Only uniforms used in if conditions are analyzed.
31  *
32  * nir_find_inlinable_uniforms finds uniforms that can be inlined and stores
33  * that information in shader_info.
34  *
35  * nir_inline_uniforms inlines uniform values.
36  *
37  * (uniforms must be lowered to load_ubo before calling this)
38  */
39 
40 #include "compiler/nir/nir_builder.h"
41 
42 /* Maximum value in shader_info::inlinable_uniform_dw_offsets[] */
43 #define MAX_OFFSET (UINT16_MAX * 4)
44 
45 static bool
src_only_uses_uniforms(const nir_src * src,struct set ** uni_offsets)46 src_only_uses_uniforms(const nir_src *src, struct set **uni_offsets)
47 {
48    if (!src->is_ssa)
49       return false;
50 
51    nir_instr *instr = src->ssa->parent_instr;
52 
53    switch (instr->type) {
54    case nir_instr_type_alu: {
55       /* Return true if all sources return true. */
56       /* TODO: Swizzles are ignored, so vectors can prevent inlining. */
57       nir_alu_instr *alu = nir_instr_as_alu(instr);
58       for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
59          if (!src_only_uses_uniforms(&alu->src[i].src, uni_offsets))
60              return false;
61       }
62       return true;
63    }
64 
65    case nir_instr_type_intrinsic: {
66       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
67       /* Return true if the intrinsic loads from UBO 0 with a constant
68        * offset.
69        */
70       if (intr->intrinsic == nir_intrinsic_load_ubo &&
71           nir_src_is_const(intr->src[0]) &&
72           nir_src_as_uint(intr->src[0]) == 0 &&
73           nir_src_is_const(intr->src[1]) &&
74           nir_src_as_uint(intr->src[1]) <= MAX_OFFSET &&
75           /* TODO: Can't handle vectors and other bit sizes for now. */
76           /* UBO loads should be scalarized. */
77           intr->dest.ssa.num_components == 1 &&
78           intr->dest.ssa.bit_size == 32) {
79          /* Record the uniform offset. */
80          if (!*uni_offsets)
81             *uni_offsets = _mesa_set_create_u32_keys(NULL);
82 
83          /* Add 1 because the set doesn't allow NULL keys. */
84          _mesa_set_add(*uni_offsets,
85                        (void*)(uintptr_t)(nir_src_as_uint(intr->src[1]) + 1));
86          return true;
87       }
88       return false;
89    }
90 
91    case nir_instr_type_load_const:
92       /* Always return true for constants. */
93       return true;
94 
95    default:
96       return false;
97    }
98 }
99 
100 void
nir_find_inlinable_uniforms(nir_shader * shader)101 nir_find_inlinable_uniforms(nir_shader *shader)
102 {
103    struct set *uni_offsets = NULL;
104 
105    nir_foreach_function(function, shader) {
106       if (function->impl) {
107          foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
108             switch (node->type) {
109             case nir_cf_node_if: {
110                const nir_src *cond = &nir_cf_node_as_if(node)->condition;
111                struct set *found_offsets = NULL;
112 
113                if (src_only_uses_uniforms(cond, &found_offsets) &&
114                    found_offsets) {
115                   /* All uniforms are lowerable. Save uniform offsets. */
116                   set_foreach(found_offsets, entry) {
117                      if (!uni_offsets)
118                         uni_offsets = _mesa_set_create_u32_keys(NULL);
119 
120                      _mesa_set_add(uni_offsets, entry->key);
121                   }
122                }
123                if (found_offsets)
124                   _mesa_set_destroy(found_offsets, NULL);
125                break;
126             }
127 
128             case nir_cf_node_loop:
129                /* TODO: handle loops if we want to unroll them at draw time */
130                break;
131 
132             default:
133                break;
134             }
135          }
136       }
137    }
138 
139    if (uni_offsets) {
140       unsigned num = 0;
141 
142       set_foreach(uni_offsets, entry) {
143          /* Subtract 1 because all keys are + 1. */
144          uint32_t offset = (uintptr_t)entry->key - 1;
145          assert(offset < MAX_OFFSET);
146 
147          if (num < MAX_INLINABLE_UNIFORMS)
148             shader->info.inlinable_uniform_dw_offsets[num++] = offset / 4;
149       }
150       shader->info.num_inlinable_uniforms = num;
151       _mesa_set_destroy(uni_offsets, NULL);
152    }
153 }
154 
155 void
nir_inline_uniforms(nir_shader * shader,unsigned num_uniforms,const uint32_t * uniform_values,const uint16_t * uniform_dw_offsets)156 nir_inline_uniforms(nir_shader *shader, unsigned num_uniforms,
157                     const uint32_t *uniform_values,
158                     const uint16_t *uniform_dw_offsets)
159 {
160    if (!num_uniforms)
161       return;
162 
163    nir_foreach_function(function, shader) {
164       if (function->impl) {
165          nir_builder b;
166          nir_builder_init(&b, function->impl);
167          nir_foreach_block(block, function->impl) {
168             nir_foreach_instr_safe(instr, block) {
169                if (instr->type != nir_instr_type_intrinsic)
170                   continue;
171 
172                nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
173 
174                /* Only replace UBO 0 with constant offsets. */
175                if (intr->intrinsic == nir_intrinsic_load_ubo &&
176                    nir_src_is_const(intr->src[0]) &&
177                    nir_src_as_uint(intr->src[0]) == 0 &&
178                    nir_src_is_const(intr->src[1]) &&
179                    /* TODO: Can't handle vectors and other bit sizes for now. */
180                    /* UBO loads should be scalarized. */
181                    intr->dest.ssa.num_components == 1 &&
182                    intr->dest.ssa.bit_size == 32) {
183                   uint64_t offset = nir_src_as_uint(intr->src[1]);
184 
185                   for (unsigned i = 0; i < num_uniforms; i++) {
186                      if (offset == uniform_dw_offsets[i] * 4) {
187                         b.cursor = nir_before_instr(&intr->instr);
188                         nir_ssa_def *def = nir_imm_int(&b, uniform_values[i]);
189                         nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(def));
190                         nir_instr_remove(&intr->instr);
191                         break;
192                      }
193                   }
194                }
195             }
196          }
197 
198          nir_metadata_preserve(function->impl, nir_metadata_block_index |
199                                                nir_metadata_dominance);
200       }
201    }
202 }
203