1 /*
2 * Copyright © 2020 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /* These passes enable converting uniforms to literals when it's profitable,
25 * effectively inlining uniform values in the IR. The main benefit is register
26 * usage decrease leading to better SMT (hyperthreading). It's accomplished
27 * by targetting uniforms that determine whether a conditional branch is
28 * taken.
29 *
30 * Only uniforms used in if conditions are analyzed.
31 *
32 * nir_find_inlinable_uniforms finds uniforms that can be inlined and stores
33 * that information in shader_info.
34 *
35 * nir_inline_uniforms inlines uniform values.
36 *
37 * (uniforms must be lowered to load_ubo before calling this)
38 */
39
40 #include "compiler/nir/nir_builder.h"
41
42 /* Maximum value in shader_info::inlinable_uniform_dw_offsets[] */
43 #define MAX_OFFSET (UINT16_MAX * 4)
44
45 static bool
src_only_uses_uniforms(const nir_src * src,struct set ** uni_offsets)46 src_only_uses_uniforms(const nir_src *src, struct set **uni_offsets)
47 {
48 if (!src->is_ssa)
49 return false;
50
51 nir_instr *instr = src->ssa->parent_instr;
52
53 switch (instr->type) {
54 case nir_instr_type_alu: {
55 /* Return true if all sources return true. */
56 /* TODO: Swizzles are ignored, so vectors can prevent inlining. */
57 nir_alu_instr *alu = nir_instr_as_alu(instr);
58 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
59 if (!src_only_uses_uniforms(&alu->src[i].src, uni_offsets))
60 return false;
61 }
62 return true;
63 }
64
65 case nir_instr_type_intrinsic: {
66 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
67 /* Return true if the intrinsic loads from UBO 0 with a constant
68 * offset.
69 */
70 if (intr->intrinsic == nir_intrinsic_load_ubo &&
71 nir_src_is_const(intr->src[0]) &&
72 nir_src_as_uint(intr->src[0]) == 0 &&
73 nir_src_is_const(intr->src[1]) &&
74 nir_src_as_uint(intr->src[1]) <= MAX_OFFSET &&
75 /* TODO: Can't handle vectors and other bit sizes for now. */
76 /* UBO loads should be scalarized. */
77 intr->dest.ssa.num_components == 1 &&
78 intr->dest.ssa.bit_size == 32) {
79 /* Record the uniform offset. */
80 if (!*uni_offsets)
81 *uni_offsets = _mesa_set_create_u32_keys(NULL);
82
83 /* Add 1 because the set doesn't allow NULL keys. */
84 _mesa_set_add(*uni_offsets,
85 (void*)(uintptr_t)(nir_src_as_uint(intr->src[1]) + 1));
86 return true;
87 }
88 return false;
89 }
90
91 case nir_instr_type_load_const:
92 /* Always return true for constants. */
93 return true;
94
95 default:
96 return false;
97 }
98 }
99
100 void
nir_find_inlinable_uniforms(nir_shader * shader)101 nir_find_inlinable_uniforms(nir_shader *shader)
102 {
103 struct set *uni_offsets = NULL;
104
105 nir_foreach_function(function, shader) {
106 if (function->impl) {
107 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
108 switch (node->type) {
109 case nir_cf_node_if: {
110 const nir_src *cond = &nir_cf_node_as_if(node)->condition;
111 struct set *found_offsets = NULL;
112
113 if (src_only_uses_uniforms(cond, &found_offsets) &&
114 found_offsets) {
115 /* All uniforms are lowerable. Save uniform offsets. */
116 set_foreach(found_offsets, entry) {
117 if (!uni_offsets)
118 uni_offsets = _mesa_set_create_u32_keys(NULL);
119
120 _mesa_set_add(uni_offsets, entry->key);
121 }
122 }
123 if (found_offsets)
124 _mesa_set_destroy(found_offsets, NULL);
125 break;
126 }
127
128 case nir_cf_node_loop:
129 /* TODO: handle loops if we want to unroll them at draw time */
130 break;
131
132 default:
133 break;
134 }
135 }
136 }
137 }
138
139 if (uni_offsets) {
140 unsigned num = 0;
141
142 set_foreach(uni_offsets, entry) {
143 /* Subtract 1 because all keys are + 1. */
144 uint32_t offset = (uintptr_t)entry->key - 1;
145 assert(offset < MAX_OFFSET);
146
147 if (num < MAX_INLINABLE_UNIFORMS)
148 shader->info.inlinable_uniform_dw_offsets[num++] = offset / 4;
149 }
150 shader->info.num_inlinable_uniforms = num;
151 _mesa_set_destroy(uni_offsets, NULL);
152 }
153 }
154
155 void
nir_inline_uniforms(nir_shader * shader,unsigned num_uniforms,const uint32_t * uniform_values,const uint16_t * uniform_dw_offsets)156 nir_inline_uniforms(nir_shader *shader, unsigned num_uniforms,
157 const uint32_t *uniform_values,
158 const uint16_t *uniform_dw_offsets)
159 {
160 if (!num_uniforms)
161 return;
162
163 nir_foreach_function(function, shader) {
164 if (function->impl) {
165 nir_builder b;
166 nir_builder_init(&b, function->impl);
167 nir_foreach_block(block, function->impl) {
168 nir_foreach_instr_safe(instr, block) {
169 if (instr->type != nir_instr_type_intrinsic)
170 continue;
171
172 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
173
174 /* Only replace UBO 0 with constant offsets. */
175 if (intr->intrinsic == nir_intrinsic_load_ubo &&
176 nir_src_is_const(intr->src[0]) &&
177 nir_src_as_uint(intr->src[0]) == 0 &&
178 nir_src_is_const(intr->src[1]) &&
179 /* TODO: Can't handle vectors and other bit sizes for now. */
180 /* UBO loads should be scalarized. */
181 intr->dest.ssa.num_components == 1 &&
182 intr->dest.ssa.bit_size == 32) {
183 uint64_t offset = nir_src_as_uint(intr->src[1]);
184
185 for (unsigned i = 0; i < num_uniforms; i++) {
186 if (offset == uniform_dw_offsets[i] * 4) {
187 b.cursor = nir_before_instr(&intr->instr);
188 nir_ssa_def *def = nir_imm_int(&b, uniform_values[i]);
189 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(def));
190 nir_instr_remove(&intr->instr);
191 break;
192 }
193 }
194 }
195 }
196 }
197
198 nir_metadata_preserve(function->impl, nir_metadata_block_index |
199 nir_metadata_dominance);
200 }
201 }
202 }
203