• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <math.h>
25 #include "nir.h"
26 #include "nir_builder.h"
27 #include "nir_constant_expressions.h"
28 #include "nir_deref.h"
29 
30 /*
31  * Implements SSA-based constant folding.
32  */
33 
34 struct constant_fold_state {
35    bool has_load_constant;
36    bool has_indirect_load_const;
37 };
38 
39 static bool
try_fold_alu(nir_builder * b,nir_alu_instr * alu)40 try_fold_alu(nir_builder *b, nir_alu_instr *alu)
41 {
42    nir_const_value src[NIR_MAX_VEC_COMPONENTS][NIR_MAX_VEC_COMPONENTS];
43 
44    /* In the case that any outputs/inputs have unsized types, then we need to
45     * guess the bit-size. In this case, the validator ensures that all
46     * bit-sizes match so we can just take the bit-size from first
47     * output/input with an unsized type. If all the outputs/inputs are sized
48     * then we don't need to guess the bit-size at all because the code we
49     * generate for constant opcodes in this case already knows the sizes of
50     * the types involved and does not need the provided bit-size for anything
51     * (although it still requires to receive a valid bit-size).
52     */
53    unsigned bit_size = 0;
54    if (!nir_alu_type_get_type_size(nir_op_infos[alu->op].output_type))
55       bit_size = alu->def.bit_size;
56 
57    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
58       if (bit_size == 0 &&
59           !nir_alu_type_get_type_size(nir_op_infos[alu->op].input_types[i]))
60          bit_size = alu->src[i].src.ssa->bit_size;
61 
62       nir_instr *src_instr = alu->src[i].src.ssa->parent_instr;
63 
64       if (src_instr->type != nir_instr_type_load_const)
65          return false;
66       nir_load_const_instr *load_const = nir_instr_as_load_const(src_instr);
67 
68       for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(alu, i);
69            j++) {
70          src[i][j] = load_const->value[alu->src[i].swizzle[j]];
71       }
72    }
73 
74    if (bit_size == 0)
75       bit_size = 32;
76 
77    nir_const_value dest[NIR_MAX_VEC_COMPONENTS];
78    nir_const_value *srcs[NIR_MAX_VEC_COMPONENTS];
79    memset(dest, 0, sizeof(dest));
80    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; ++i)
81       srcs[i] = src[i];
82    nir_eval_const_opcode(alu->op, dest, alu->def.num_components,
83                          bit_size, srcs,
84                          b->shader->info.float_controls_execution_mode);
85 
86    b->cursor = nir_before_instr(&alu->instr);
87    nir_def *imm = nir_build_imm(b, alu->def.num_components,
88                                 alu->def.bit_size,
89                                 dest);
90    nir_def_rewrite_uses(&alu->def, imm);
91    nir_instr_remove(&alu->instr);
92    nir_instr_free(&alu->instr);
93 
94    return true;
95 }
96 
97 static nir_const_value *
const_value_for_deref(nir_deref_instr * deref)98 const_value_for_deref(nir_deref_instr *deref)
99 {
100    if (!nir_deref_mode_is(deref, nir_var_mem_constant))
101       return NULL;
102 
103    nir_deref_path path;
104    nir_deref_path_init(&path, deref, NULL);
105    if (path.path[0]->deref_type != nir_deref_type_var)
106       goto fail;
107 
108    nir_variable *var = path.path[0]->var;
109    assert(var->data.mode == nir_var_mem_constant);
110    if (var->constant_initializer == NULL)
111       goto fail;
112 
113    if (var->constant_initializer->is_null_constant) {
114       /* Doesn't matter what casts are in the way, it's all zeros */
115       nir_deref_path_finish(&path);
116       return var->constant_initializer->values;
117    }
118 
119    nir_constant *c = var->constant_initializer;
120    nir_const_value *v = NULL; /* Vector value for array-deref-of-vec */
121 
122    for (unsigned i = 1; path.path[i] != NULL; i++) {
123       nir_deref_instr *p = path.path[i];
124       switch (p->deref_type) {
125       case nir_deref_type_var:
126          unreachable("Deref paths can only start with a var deref");
127 
128       case nir_deref_type_array: {
129          assert(v == NULL);
130          if (!nir_src_is_const(p->arr.index))
131             goto fail;
132 
133          uint64_t idx = nir_src_as_uint(p->arr.index);
134          if (c->num_elements > 0) {
135             assert(glsl_type_is_array(path.path[i - 1]->type));
136             if (idx >= c->num_elements)
137                goto fail;
138             c = c->elements[idx];
139          } else {
140             assert(glsl_type_is_vector(path.path[i - 1]->type));
141             assert(glsl_type_is_scalar(p->type));
142             if (idx >= NIR_MAX_VEC_COMPONENTS)
143                goto fail;
144             v = &c->values[idx];
145          }
146          break;
147       }
148 
149       case nir_deref_type_struct:
150          assert(glsl_type_is_struct(path.path[i - 1]->type));
151          assert(v == NULL && c->num_elements > 0);
152          if (p->strct.index >= c->num_elements)
153             goto fail;
154          c = c->elements[p->strct.index];
155          break;
156 
157       default:
158          goto fail;
159       }
160    }
161 
162    /* We have to have ended at a vector */
163    assert(c->num_elements == 0);
164    nir_deref_path_finish(&path);
165    return v ? v : c->values;
166 
167 fail:
168    nir_deref_path_finish(&path);
169    return NULL;
170 }
171 
172 static bool
try_fold_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct constant_fold_state * state)173 try_fold_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
174                    struct constant_fold_state *state)
175 {
176    switch (intrin->intrinsic) {
177    case nir_intrinsic_demote_if:
178    case nir_intrinsic_discard_if:
179    case nir_intrinsic_terminate_if:
180       if (nir_src_is_const(intrin->src[0])) {
181          if (nir_src_as_bool(intrin->src[0])) {
182             b->cursor = nir_before_instr(&intrin->instr);
183             nir_intrinsic_op op;
184             switch (intrin->intrinsic) {
185             case nir_intrinsic_discard_if:
186                op = nir_intrinsic_discard;
187                break;
188             case nir_intrinsic_demote_if:
189                op = nir_intrinsic_demote;
190                break;
191             case nir_intrinsic_terminate_if:
192                op = nir_intrinsic_terminate;
193                break;
194             default:
195                unreachable("invalid intrinsic");
196             }
197             nir_intrinsic_instr *new_instr =
198                nir_intrinsic_instr_create(b->shader, op);
199             nir_builder_instr_insert(b, &new_instr->instr);
200          }
201          nir_instr_remove(&intrin->instr);
202          return true;
203       }
204       return false;
205 
206    case nir_intrinsic_load_deref: {
207       nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
208       nir_const_value *v = const_value_for_deref(deref);
209       if (v) {
210          b->cursor = nir_before_instr(&intrin->instr);
211          nir_def *val = nir_build_imm(b, intrin->def.num_components,
212                                       intrin->def.bit_size, v);
213          nir_def_rewrite_uses(&intrin->def, val);
214          nir_instr_remove(&intrin->instr);
215          return true;
216       }
217       return false;
218    }
219 
220    case nir_intrinsic_load_constant: {
221       state->has_load_constant = true;
222 
223       if (!nir_src_is_const(intrin->src[0])) {
224          state->has_indirect_load_const = true;
225          return false;
226       }
227 
228       unsigned offset = nir_src_as_uint(intrin->src[0]);
229       unsigned base = nir_intrinsic_base(intrin);
230       unsigned range = nir_intrinsic_range(intrin);
231       assert(base + range <= b->shader->constant_data_size);
232 
233       b->cursor = nir_before_instr(&intrin->instr);
234       nir_def *val;
235       if (offset >= range) {
236          val = nir_undef(b, intrin->def.num_components,
237                          intrin->def.bit_size);
238       } else {
239          nir_const_value imm[NIR_MAX_VEC_COMPONENTS];
240          memset(imm, 0, sizeof(imm));
241          uint8_t *data = (uint8_t *)b->shader->constant_data + base;
242          for (unsigned i = 0; i < intrin->num_components; i++) {
243             unsigned bytes = intrin->def.bit_size / 8;
244             bytes = MIN2(bytes, range - offset);
245 
246             memcpy(&imm[i].u64, data + offset, bytes);
247             offset += bytes;
248          }
249          val = nir_build_imm(b, intrin->def.num_components,
250                              intrin->def.bit_size, imm);
251       }
252       nir_def_rewrite_uses(&intrin->def, val);
253       nir_instr_remove(&intrin->instr);
254       return true;
255    }
256 
257    case nir_intrinsic_vote_any:
258    case nir_intrinsic_vote_all:
259    case nir_intrinsic_read_invocation:
260    case nir_intrinsic_read_first_invocation:
261    case nir_intrinsic_as_uniform:
262    case nir_intrinsic_shuffle:
263    case nir_intrinsic_shuffle_xor:
264    case nir_intrinsic_shuffle_up:
265    case nir_intrinsic_shuffle_down:
266    case nir_intrinsic_quad_broadcast:
267    case nir_intrinsic_quad_swap_horizontal:
268    case nir_intrinsic_quad_swap_vertical:
269    case nir_intrinsic_quad_swap_diagonal:
270    case nir_intrinsic_quad_swizzle_amd:
271    case nir_intrinsic_masked_swizzle_amd:
272       /* All of these have the data payload in the first source.  They may
273        * have a second source with a shuffle index but that doesn't matter if
274        * the data is constant.
275        */
276       if (nir_src_is_const(intrin->src[0])) {
277          nir_def_rewrite_uses(&intrin->def,
278                               intrin->src[0].ssa);
279          nir_instr_remove(&intrin->instr);
280          return true;
281       }
282       return false;
283 
284    case nir_intrinsic_vote_feq:
285    case nir_intrinsic_vote_ieq:
286       if (nir_src_is_const(intrin->src[0])) {
287          b->cursor = nir_before_instr(&intrin->instr);
288          nir_def_rewrite_uses(&intrin->def,
289                               nir_imm_true(b));
290          nir_instr_remove(&intrin->instr);
291          return true;
292       }
293       return false;
294 
295    default:
296       return false;
297    }
298 }
299 
300 static bool
try_fold_txb_to_tex(nir_builder * b,nir_tex_instr * tex)301 try_fold_txb_to_tex(nir_builder *b, nir_tex_instr *tex)
302 {
303    assert(tex->op == nir_texop_txb);
304 
305    const int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
306 
307    /* nir_to_tgsi_lower_tex mangles many kinds of texture instructions,
308     * including txb, into invalid states.  It removes the special
309     * parameters and appends the values to the texture coordinate.
310     */
311    if (bias_idx < 0)
312       return false;
313 
314    if (nir_src_is_const(tex->src[bias_idx].src) &&
315        nir_src_as_float(tex->src[bias_idx].src) == 0.0) {
316       nir_tex_instr_remove_src(tex, bias_idx);
317       tex->op = nir_texop_tex;
318       return true;
319    }
320 
321    return false;
322 }
323 
324 static bool
try_fold_tex_offset(nir_tex_instr * tex,unsigned * index,nir_tex_src_type src_type)325 try_fold_tex_offset(nir_tex_instr *tex, unsigned *index,
326                     nir_tex_src_type src_type)
327 {
328    const int src_idx = nir_tex_instr_src_index(tex, src_type);
329    if (src_idx < 0)
330       return false;
331 
332    if (!nir_src_is_const(tex->src[src_idx].src))
333       return false;
334 
335    *index += nir_src_as_uint(tex->src[src_idx].src);
336    nir_tex_instr_remove_src(tex, src_idx);
337 
338    return true;
339 }
340 
341 static bool
try_fold_texel_offset_src(nir_tex_instr * tex)342 try_fold_texel_offset_src(nir_tex_instr *tex)
343 {
344    int offset_src = nir_tex_instr_src_index(tex, nir_tex_src_offset);
345    if (offset_src < 0)
346       return false;
347 
348    unsigned size = nir_tex_instr_src_size(tex, offset_src);
349    nir_tex_src *src = &tex->src[offset_src];
350 
351    for (unsigned i = 0; i < size; i++) {
352       nir_scalar comp = nir_scalar_resolved(src->src.ssa, i);
353       if (!nir_scalar_is_const(comp) || nir_scalar_as_uint(comp) != 0)
354          return false;
355    }
356 
357    nir_tex_instr_remove_src(tex, offset_src);
358 
359    return true;
360 }
361 
362 static bool
try_fold_tex(nir_builder * b,nir_tex_instr * tex)363 try_fold_tex(nir_builder *b, nir_tex_instr *tex)
364 {
365    bool progress = false;
366 
367    progress |= try_fold_tex_offset(tex, &tex->texture_index,
368                                    nir_tex_src_texture_offset);
369    progress |= try_fold_tex_offset(tex, &tex->sampler_index,
370                                    nir_tex_src_sampler_offset);
371 
372    /* txb with a bias of constant zero is just tex. */
373    if (tex->op == nir_texop_txb)
374       progress |= try_fold_txb_to_tex(b, tex);
375 
376    /* tex with a zero offset is just tex. */
377    progress |= try_fold_texel_offset_src(tex);
378 
379    return progress;
380 }
381 
382 static bool
try_fold_instr(nir_builder * b,nir_instr * instr,void * _state)383 try_fold_instr(nir_builder *b, nir_instr *instr, void *_state)
384 {
385    switch (instr->type) {
386    case nir_instr_type_alu:
387       return try_fold_alu(b, nir_instr_as_alu(instr));
388    case nir_instr_type_intrinsic:
389       return try_fold_intrinsic(b, nir_instr_as_intrinsic(instr), _state);
390    case nir_instr_type_tex:
391       return try_fold_tex(b, nir_instr_as_tex(instr));
392    default:
393       /* Don't know how to constant fold */
394       return false;
395    }
396 }
397 
398 bool
nir_opt_constant_folding(nir_shader * shader)399 nir_opt_constant_folding(nir_shader *shader)
400 {
401    struct constant_fold_state state;
402    state.has_load_constant = false;
403    state.has_indirect_load_const = false;
404 
405    bool progress = nir_shader_instructions_pass(shader, try_fold_instr,
406                                                 nir_metadata_block_index |
407                                                    nir_metadata_dominance,
408                                                 &state);
409 
410    /* This doesn't free the constant data if there are no constant loads because
411     * the data might still be used but the loads have been lowered to load_ubo
412     */
413    if (state.has_load_constant && !state.has_indirect_load_const &&
414        shader->constant_data_size) {
415       ralloc_free(shader->constant_data);
416       shader->constant_data = NULL;
417       shader->constant_data_size = 0;
418    }
419 
420    return progress;
421 }
422