1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <math.h>
25 #include "nir.h"
26 #include "nir_builder.h"
27 #include "nir_constant_expressions.h"
28 #include "nir_deref.h"
29
30 /*
31 * Implements SSA-based constant folding.
32 */
33
34 struct constant_fold_state {
35 bool has_load_constant;
36 bool has_indirect_load_const;
37 };
38
39 static bool
try_fold_alu(nir_builder * b,nir_alu_instr * alu)40 try_fold_alu(nir_builder *b, nir_alu_instr *alu)
41 {
42 nir_const_value src[NIR_MAX_VEC_COMPONENTS][NIR_MAX_VEC_COMPONENTS];
43
44 /* In the case that any outputs/inputs have unsized types, then we need to
45 * guess the bit-size. In this case, the validator ensures that all
46 * bit-sizes match so we can just take the bit-size from first
47 * output/input with an unsized type. If all the outputs/inputs are sized
48 * then we don't need to guess the bit-size at all because the code we
49 * generate for constant opcodes in this case already knows the sizes of
50 * the types involved and does not need the provided bit-size for anything
51 * (although it still requires to receive a valid bit-size).
52 */
53 unsigned bit_size = 0;
54 if (!nir_alu_type_get_type_size(nir_op_infos[alu->op].output_type))
55 bit_size = alu->def.bit_size;
56
57 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
58 if (bit_size == 0 &&
59 !nir_alu_type_get_type_size(nir_op_infos[alu->op].input_types[i]))
60 bit_size = alu->src[i].src.ssa->bit_size;
61
62 nir_instr *src_instr = alu->src[i].src.ssa->parent_instr;
63
64 if (src_instr->type != nir_instr_type_load_const)
65 return false;
66 nir_load_const_instr *load_const = nir_instr_as_load_const(src_instr);
67
68 for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(alu, i);
69 j++) {
70 src[i][j] = load_const->value[alu->src[i].swizzle[j]];
71 }
72 }
73
74 if (bit_size == 0)
75 bit_size = 32;
76
77 nir_const_value dest[NIR_MAX_VEC_COMPONENTS];
78 nir_const_value *srcs[NIR_MAX_VEC_COMPONENTS];
79 memset(dest, 0, sizeof(dest));
80 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; ++i)
81 srcs[i] = src[i];
82 nir_eval_const_opcode(alu->op, dest, alu->def.num_components,
83 bit_size, srcs,
84 b->shader->info.float_controls_execution_mode);
85
86 b->cursor = nir_before_instr(&alu->instr);
87 nir_def *imm = nir_build_imm(b, alu->def.num_components,
88 alu->def.bit_size,
89 dest);
90 nir_def_rewrite_uses(&alu->def, imm);
91 nir_instr_remove(&alu->instr);
92 nir_instr_free(&alu->instr);
93
94 return true;
95 }
96
97 static nir_const_value *
const_value_for_deref(nir_deref_instr * deref)98 const_value_for_deref(nir_deref_instr *deref)
99 {
100 if (!nir_deref_mode_is(deref, nir_var_mem_constant))
101 return NULL;
102
103 nir_deref_path path;
104 nir_deref_path_init(&path, deref, NULL);
105 if (path.path[0]->deref_type != nir_deref_type_var)
106 goto fail;
107
108 nir_variable *var = path.path[0]->var;
109 assert(var->data.mode == nir_var_mem_constant);
110 if (var->constant_initializer == NULL)
111 goto fail;
112
113 if (var->constant_initializer->is_null_constant) {
114 /* Doesn't matter what casts are in the way, it's all zeros */
115 nir_deref_path_finish(&path);
116 return var->constant_initializer->values;
117 }
118
119 nir_constant *c = var->constant_initializer;
120 nir_const_value *v = NULL; /* Vector value for array-deref-of-vec */
121
122 for (unsigned i = 1; path.path[i] != NULL; i++) {
123 nir_deref_instr *p = path.path[i];
124 switch (p->deref_type) {
125 case nir_deref_type_var:
126 unreachable("Deref paths can only start with a var deref");
127
128 case nir_deref_type_array: {
129 assert(v == NULL);
130 if (!nir_src_is_const(p->arr.index))
131 goto fail;
132
133 uint64_t idx = nir_src_as_uint(p->arr.index);
134 if (c->num_elements > 0) {
135 assert(glsl_type_is_array(path.path[i - 1]->type));
136 if (idx >= c->num_elements)
137 goto fail;
138 c = c->elements[idx];
139 } else {
140 assert(glsl_type_is_vector(path.path[i - 1]->type));
141 assert(glsl_type_is_scalar(p->type));
142 if (idx >= NIR_MAX_VEC_COMPONENTS)
143 goto fail;
144 v = &c->values[idx];
145 }
146 break;
147 }
148
149 case nir_deref_type_struct:
150 assert(glsl_type_is_struct(path.path[i - 1]->type));
151 assert(v == NULL && c->num_elements > 0);
152 if (p->strct.index >= c->num_elements)
153 goto fail;
154 c = c->elements[p->strct.index];
155 break;
156
157 default:
158 goto fail;
159 }
160 }
161
162 /* We have to have ended at a vector */
163 assert(c->num_elements == 0);
164 nir_deref_path_finish(&path);
165 return v ? v : c->values;
166
167 fail:
168 nir_deref_path_finish(&path);
169 return NULL;
170 }
171
172 static bool
try_fold_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct constant_fold_state * state)173 try_fold_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
174 struct constant_fold_state *state)
175 {
176 switch (intrin->intrinsic) {
177 case nir_intrinsic_demote_if:
178 case nir_intrinsic_discard_if:
179 case nir_intrinsic_terminate_if:
180 if (nir_src_is_const(intrin->src[0])) {
181 if (nir_src_as_bool(intrin->src[0])) {
182 b->cursor = nir_before_instr(&intrin->instr);
183 nir_intrinsic_op op;
184 switch (intrin->intrinsic) {
185 case nir_intrinsic_discard_if:
186 op = nir_intrinsic_discard;
187 break;
188 case nir_intrinsic_demote_if:
189 op = nir_intrinsic_demote;
190 break;
191 case nir_intrinsic_terminate_if:
192 op = nir_intrinsic_terminate;
193 break;
194 default:
195 unreachable("invalid intrinsic");
196 }
197 nir_intrinsic_instr *new_instr =
198 nir_intrinsic_instr_create(b->shader, op);
199 nir_builder_instr_insert(b, &new_instr->instr);
200 }
201 nir_instr_remove(&intrin->instr);
202 return true;
203 }
204 return false;
205
206 case nir_intrinsic_load_deref: {
207 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
208 nir_const_value *v = const_value_for_deref(deref);
209 if (v) {
210 b->cursor = nir_before_instr(&intrin->instr);
211 nir_def *val = nir_build_imm(b, intrin->def.num_components,
212 intrin->def.bit_size, v);
213 nir_def_rewrite_uses(&intrin->def, val);
214 nir_instr_remove(&intrin->instr);
215 return true;
216 }
217 return false;
218 }
219
220 case nir_intrinsic_load_constant: {
221 state->has_load_constant = true;
222
223 if (!nir_src_is_const(intrin->src[0])) {
224 state->has_indirect_load_const = true;
225 return false;
226 }
227
228 unsigned offset = nir_src_as_uint(intrin->src[0]);
229 unsigned base = nir_intrinsic_base(intrin);
230 unsigned range = nir_intrinsic_range(intrin);
231 assert(base + range <= b->shader->constant_data_size);
232
233 b->cursor = nir_before_instr(&intrin->instr);
234 nir_def *val;
235 if (offset >= range) {
236 val = nir_undef(b, intrin->def.num_components,
237 intrin->def.bit_size);
238 } else {
239 nir_const_value imm[NIR_MAX_VEC_COMPONENTS];
240 memset(imm, 0, sizeof(imm));
241 uint8_t *data = (uint8_t *)b->shader->constant_data + base;
242 for (unsigned i = 0; i < intrin->num_components; i++) {
243 unsigned bytes = intrin->def.bit_size / 8;
244 bytes = MIN2(bytes, range - offset);
245
246 memcpy(&imm[i].u64, data + offset, bytes);
247 offset += bytes;
248 }
249 val = nir_build_imm(b, intrin->def.num_components,
250 intrin->def.bit_size, imm);
251 }
252 nir_def_rewrite_uses(&intrin->def, val);
253 nir_instr_remove(&intrin->instr);
254 return true;
255 }
256
257 case nir_intrinsic_vote_any:
258 case nir_intrinsic_vote_all:
259 case nir_intrinsic_read_invocation:
260 case nir_intrinsic_read_first_invocation:
261 case nir_intrinsic_as_uniform:
262 case nir_intrinsic_shuffle:
263 case nir_intrinsic_shuffle_xor:
264 case nir_intrinsic_shuffle_up:
265 case nir_intrinsic_shuffle_down:
266 case nir_intrinsic_quad_broadcast:
267 case nir_intrinsic_quad_swap_horizontal:
268 case nir_intrinsic_quad_swap_vertical:
269 case nir_intrinsic_quad_swap_diagonal:
270 case nir_intrinsic_quad_swizzle_amd:
271 case nir_intrinsic_masked_swizzle_amd:
272 /* All of these have the data payload in the first source. They may
273 * have a second source with a shuffle index but that doesn't matter if
274 * the data is constant.
275 */
276 if (nir_src_is_const(intrin->src[0])) {
277 nir_def_rewrite_uses(&intrin->def,
278 intrin->src[0].ssa);
279 nir_instr_remove(&intrin->instr);
280 return true;
281 }
282 return false;
283
284 case nir_intrinsic_vote_feq:
285 case nir_intrinsic_vote_ieq:
286 if (nir_src_is_const(intrin->src[0])) {
287 b->cursor = nir_before_instr(&intrin->instr);
288 nir_def_rewrite_uses(&intrin->def,
289 nir_imm_true(b));
290 nir_instr_remove(&intrin->instr);
291 return true;
292 }
293 return false;
294
295 default:
296 return false;
297 }
298 }
299
300 static bool
try_fold_txb_to_tex(nir_builder * b,nir_tex_instr * tex)301 try_fold_txb_to_tex(nir_builder *b, nir_tex_instr *tex)
302 {
303 assert(tex->op == nir_texop_txb);
304
305 const int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
306
307 /* nir_to_tgsi_lower_tex mangles many kinds of texture instructions,
308 * including txb, into invalid states. It removes the special
309 * parameters and appends the values to the texture coordinate.
310 */
311 if (bias_idx < 0)
312 return false;
313
314 if (nir_src_is_const(tex->src[bias_idx].src) &&
315 nir_src_as_float(tex->src[bias_idx].src) == 0.0) {
316 nir_tex_instr_remove_src(tex, bias_idx);
317 tex->op = nir_texop_tex;
318 return true;
319 }
320
321 return false;
322 }
323
324 static bool
try_fold_tex_offset(nir_tex_instr * tex,unsigned * index,nir_tex_src_type src_type)325 try_fold_tex_offset(nir_tex_instr *tex, unsigned *index,
326 nir_tex_src_type src_type)
327 {
328 const int src_idx = nir_tex_instr_src_index(tex, src_type);
329 if (src_idx < 0)
330 return false;
331
332 if (!nir_src_is_const(tex->src[src_idx].src))
333 return false;
334
335 *index += nir_src_as_uint(tex->src[src_idx].src);
336 nir_tex_instr_remove_src(tex, src_idx);
337
338 return true;
339 }
340
341 static bool
try_fold_texel_offset_src(nir_tex_instr * tex)342 try_fold_texel_offset_src(nir_tex_instr *tex)
343 {
344 int offset_src = nir_tex_instr_src_index(tex, nir_tex_src_offset);
345 if (offset_src < 0)
346 return false;
347
348 unsigned size = nir_tex_instr_src_size(tex, offset_src);
349 nir_tex_src *src = &tex->src[offset_src];
350
351 for (unsigned i = 0; i < size; i++) {
352 nir_scalar comp = nir_scalar_resolved(src->src.ssa, i);
353 if (!nir_scalar_is_const(comp) || nir_scalar_as_uint(comp) != 0)
354 return false;
355 }
356
357 nir_tex_instr_remove_src(tex, offset_src);
358
359 return true;
360 }
361
362 static bool
try_fold_tex(nir_builder * b,nir_tex_instr * tex)363 try_fold_tex(nir_builder *b, nir_tex_instr *tex)
364 {
365 bool progress = false;
366
367 progress |= try_fold_tex_offset(tex, &tex->texture_index,
368 nir_tex_src_texture_offset);
369 progress |= try_fold_tex_offset(tex, &tex->sampler_index,
370 nir_tex_src_sampler_offset);
371
372 /* txb with a bias of constant zero is just tex. */
373 if (tex->op == nir_texop_txb)
374 progress |= try_fold_txb_to_tex(b, tex);
375
376 /* tex with a zero offset is just tex. */
377 progress |= try_fold_texel_offset_src(tex);
378
379 return progress;
380 }
381
382 static bool
try_fold_instr(nir_builder * b,nir_instr * instr,void * _state)383 try_fold_instr(nir_builder *b, nir_instr *instr, void *_state)
384 {
385 switch (instr->type) {
386 case nir_instr_type_alu:
387 return try_fold_alu(b, nir_instr_as_alu(instr));
388 case nir_instr_type_intrinsic:
389 return try_fold_intrinsic(b, nir_instr_as_intrinsic(instr), _state);
390 case nir_instr_type_tex:
391 return try_fold_tex(b, nir_instr_as_tex(instr));
392 default:
393 /* Don't know how to constant fold */
394 return false;
395 }
396 }
397
398 bool
nir_opt_constant_folding(nir_shader * shader)399 nir_opt_constant_folding(nir_shader *shader)
400 {
401 struct constant_fold_state state;
402 state.has_load_constant = false;
403 state.has_indirect_load_const = false;
404
405 bool progress = nir_shader_instructions_pass(shader, try_fold_instr,
406 nir_metadata_block_index |
407 nir_metadata_dominance,
408 &state);
409
410 /* This doesn't free the constant data if there are no constant loads because
411 * the data might still be used but the loads have been lowered to load_ubo
412 */
413 if (state.has_load_constant && !state.has_indirect_load_const &&
414 shader->constant_data_size) {
415 ralloc_free(shader->constant_data);
416 shader->constant_data = NULL;
417 shader->constant_data_size = 0;
418 }
419
420 return progress;
421 }
422