• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Connor Abbott (cwabbott0@gmail.com)
25  *
26  */
27 
28 #include "nir.h"
29 #include <assert.h>
30 #include <limits.h>
31 #include <math.h>
32 #include "util/half_float.h"
33 #include "util/macros.h"
34 #include "util/u_math.h"
35 #include "util/u_qsort.h"
36 #include "nir_builder.h"
37 #include "nir_control_flow_private.h"
38 #include "nir_worklist.h"
39 
40 #ifndef NDEBUG
41 uint32_t nir_debug = 0;
42 bool nir_debug_print_shader[MESA_SHADER_KERNEL + 1] = { 0 };
43 
44 static const struct debug_named_value nir_debug_control[] = {
45    { "clone", NIR_DEBUG_CLONE,
46      "Test cloning a shader at each successful lowering/optimization call" },
47    { "serialize", NIR_DEBUG_SERIALIZE,
48      "Test serialize and deserialize shader at each successful lowering/optimization call" },
49    { "novalidate", NIR_DEBUG_NOVALIDATE,
50      "Disable shader validation at each successful lowering/optimization call" },
51    { "validate_ssa_dominance", NIR_DEBUG_VALIDATE_SSA_DOMINANCE,
52      "Validate SSA dominance in shader at each successful lowering/optimization call" },
53    { "tgsi", NIR_DEBUG_TGSI,
54      "Dump NIR/TGSI shaders when doing a NIR<->TGSI translation" },
55    { "print", NIR_DEBUG_PRINT,
56      "Dump resulting shader after each successful lowering/optimization call" },
57    { "print_vs", NIR_DEBUG_PRINT_VS,
58      "Dump resulting vertex shader after each successful lowering/optimization call" },
59    { "print_tcs", NIR_DEBUG_PRINT_TCS,
60      "Dump resulting tessellation control shader after each successful lowering/optimization call" },
61    { "print_tes", NIR_DEBUG_PRINT_TES,
62      "Dump resulting tessellation evaluation shader after each successful lowering/optimization call" },
63    { "print_gs", NIR_DEBUG_PRINT_GS,
64      "Dump resulting geometry shader after each successful lowering/optimization call" },
65    { "print_fs", NIR_DEBUG_PRINT_FS,
66      "Dump resulting fragment shader after each successful lowering/optimization call" },
67    { "print_cs", NIR_DEBUG_PRINT_CS,
68      "Dump resulting compute shader after each successful lowering/optimization call" },
69    { "print_ts", NIR_DEBUG_PRINT_TS,
70      "Dump resulting task shader after each successful lowering/optimization call" },
71    { "print_ms", NIR_DEBUG_PRINT_MS,
72      "Dump resulting mesh shader after each successful lowering/optimization call" },
73    { "print_rgs", NIR_DEBUG_PRINT_RGS,
74      "Dump resulting raygen shader after each successful lowering/optimization call" },
75    { "print_ahs", NIR_DEBUG_PRINT_AHS,
76      "Dump resulting any-hit shader after each successful lowering/optimization call" },
77    { "print_chs", NIR_DEBUG_PRINT_CHS,
78      "Dump resulting closest-hit shader after each successful lowering/optimization call" },
79    { "print_mhs", NIR_DEBUG_PRINT_MHS,
80      "Dump resulting miss-hit shader after each successful lowering/optimization call" },
81    { "print_is", NIR_DEBUG_PRINT_IS,
82      "Dump resulting intersection shader after each successful lowering/optimization call" },
83    { "print_cbs", NIR_DEBUG_PRINT_CBS,
84      "Dump resulting callable shader after each successful lowering/optimization call" },
85    { "print_ks", NIR_DEBUG_PRINT_KS,
86      "Dump resulting kernel shader after each successful lowering/optimization call" },
87    { "print_no_inline_consts", NIR_DEBUG_PRINT_NO_INLINE_CONSTS,
88      "Do not print const value near each use of const SSA variable" },
89    { "print_internal", NIR_DEBUG_PRINT_INTERNAL,
90      "Print shaders even if they are marked as internal" },
91    { "print_pass_flags", NIR_DEBUG_PRINT_PASS_FLAGS,
92      "Print pass_flags for every instruction when pass_flags are non-zero" },
93    DEBUG_NAMED_VALUE_END
94 };
95 
96 DEBUG_GET_ONCE_FLAGS_OPTION(nir_debug, "NIR_DEBUG", nir_debug_control, 0)
97 
98 static void
nir_process_debug_variable_once(void)99 nir_process_debug_variable_once(void)
100 {
101    nir_debug = debug_get_option_nir_debug();
102 
103    /* clang-format off */
104    nir_debug_print_shader[MESA_SHADER_VERTEX]       = NIR_DEBUG(PRINT_VS);
105    nir_debug_print_shader[MESA_SHADER_TESS_CTRL]    = NIR_DEBUG(PRINT_TCS);
106    nir_debug_print_shader[MESA_SHADER_TESS_EVAL]    = NIR_DEBUG(PRINT_TES);
107    nir_debug_print_shader[MESA_SHADER_GEOMETRY]     = NIR_DEBUG(PRINT_GS);
108    nir_debug_print_shader[MESA_SHADER_FRAGMENT]     = NIR_DEBUG(PRINT_FS);
109    nir_debug_print_shader[MESA_SHADER_COMPUTE]      = NIR_DEBUG(PRINT_CS);
110    nir_debug_print_shader[MESA_SHADER_TASK]         = NIR_DEBUG(PRINT_TS);
111    nir_debug_print_shader[MESA_SHADER_MESH]         = NIR_DEBUG(PRINT_MS);
112    nir_debug_print_shader[MESA_SHADER_RAYGEN]       = NIR_DEBUG(PRINT_RGS);
113    nir_debug_print_shader[MESA_SHADER_ANY_HIT]      = NIR_DEBUG(PRINT_AHS);
114    nir_debug_print_shader[MESA_SHADER_CLOSEST_HIT]  = NIR_DEBUG(PRINT_CHS);
115    nir_debug_print_shader[MESA_SHADER_MISS]         = NIR_DEBUG(PRINT_MHS);
116    nir_debug_print_shader[MESA_SHADER_INTERSECTION] = NIR_DEBUG(PRINT_IS);
117    nir_debug_print_shader[MESA_SHADER_CALLABLE]     = NIR_DEBUG(PRINT_CBS);
118    nir_debug_print_shader[MESA_SHADER_KERNEL]       = NIR_DEBUG(PRINT_KS);
119    /* clang-format on */
120 }
121 
122 void
nir_process_debug_variable(void)123 nir_process_debug_variable(void)
124 {
125    static once_flag flag = ONCE_FLAG_INIT;
126    call_once(&flag, nir_process_debug_variable_once);
127 }
128 #endif
129 
130 /** Return true if the component mask "mask" with bit size "old_bit_size" can
131  * be re-interpreted to be used with "new_bit_size".
132  */
133 bool
nir_component_mask_can_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)134 nir_component_mask_can_reinterpret(nir_component_mask_t mask,
135                                    unsigned old_bit_size,
136                                    unsigned new_bit_size)
137 {
138    assert(util_is_power_of_two_nonzero(old_bit_size));
139    assert(util_is_power_of_two_nonzero(new_bit_size));
140 
141    if (old_bit_size == new_bit_size)
142       return true;
143 
144    if (old_bit_size == 1 || new_bit_size == 1)
145       return false;
146 
147    if (old_bit_size > new_bit_size) {
148       unsigned ratio = old_bit_size / new_bit_size;
149       return util_last_bit(mask) * ratio <= NIR_MAX_VEC_COMPONENTS;
150    }
151 
152    unsigned iter = mask;
153    while (iter) {
154       int start, count;
155       u_bit_scan_consecutive_range(&iter, &start, &count);
156       start *= old_bit_size;
157       count *= old_bit_size;
158       if (start % new_bit_size != 0)
159          return false;
160       if (count % new_bit_size != 0)
161          return false;
162    }
163    return true;
164 }
165 
166 /** Re-interprets a component mask "mask" with bit size "old_bit_size" so that
167  * it can be used can be used with "new_bit_size".
168  */
169 nir_component_mask_t
nir_component_mask_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)170 nir_component_mask_reinterpret(nir_component_mask_t mask,
171                                unsigned old_bit_size,
172                                unsigned new_bit_size)
173 {
174    assert(nir_component_mask_can_reinterpret(mask, old_bit_size, new_bit_size));
175 
176    if (old_bit_size == new_bit_size)
177       return mask;
178 
179    nir_component_mask_t new_mask = 0;
180    unsigned iter = mask;
181    while (iter) {
182       int start, count;
183       u_bit_scan_consecutive_range(&iter, &start, &count);
184       start = start * old_bit_size / new_bit_size;
185       count = count * old_bit_size / new_bit_size;
186       new_mask |= BITFIELD_RANGE(start, count);
187    }
188    return new_mask;
189 }
190 
191 nir_shader *
nir_shader_create(void * mem_ctx,gl_shader_stage stage,const nir_shader_compiler_options * options,shader_info * si)192 nir_shader_create(void *mem_ctx,
193                   gl_shader_stage stage,
194                   const nir_shader_compiler_options *options,
195                   shader_info *si)
196 {
197    nir_shader *shader = rzalloc(mem_ctx, nir_shader);
198 
199    shader->gctx = gc_context(shader);
200 
201 #ifndef NDEBUG
202    nir_process_debug_variable();
203 #endif
204 
205    exec_list_make_empty(&shader->variables);
206 
207    shader->options = options;
208 
209    if (si) {
210       assert(si->stage == stage);
211       shader->info = *si;
212    } else {
213       shader->info.stage = stage;
214    }
215 
216    exec_list_make_empty(&shader->functions);
217 
218    shader->num_inputs = 0;
219    shader->num_outputs = 0;
220    shader->num_uniforms = 0;
221 
222    return shader;
223 }
224 
225 void
nir_shader_add_variable(nir_shader * shader,nir_variable * var)226 nir_shader_add_variable(nir_shader *shader, nir_variable *var)
227 {
228    switch (var->data.mode) {
229    case nir_var_function_temp:
230       assert(!"nir_shader_add_variable cannot be used for local variables");
231       return;
232 
233    case nir_var_shader_temp:
234    case nir_var_shader_in:
235    case nir_var_shader_out:
236    case nir_var_uniform:
237    case nir_var_mem_ubo:
238    case nir_var_mem_ssbo:
239    case nir_var_image:
240    case nir_var_mem_shared:
241    case nir_var_system_value:
242    case nir_var_mem_push_const:
243    case nir_var_mem_constant:
244    case nir_var_shader_call_data:
245    case nir_var_ray_hit_attrib:
246    case nir_var_mem_task_payload:
247    case nir_var_mem_node_payload:
248    case nir_var_mem_node_payload_in:
249    case nir_var_mem_global:
250       break;
251 
252    default:
253       assert(!"invalid mode");
254       return;
255    }
256 
257    exec_list_push_tail(&shader->variables, &var->node);
258 }
259 
260 nir_variable *
nir_variable_create(nir_shader * shader,nir_variable_mode mode,const struct glsl_type * type,const char * name)261 nir_variable_create(nir_shader *shader, nir_variable_mode mode,
262                     const struct glsl_type *type, const char *name)
263 {
264    nir_variable *var = rzalloc(shader, nir_variable);
265    var->name = ralloc_strdup(var, name);
266    var->type = type;
267    var->data.mode = mode;
268    var->data.how_declared = nir_var_declared_normally;
269 
270    if ((mode == nir_var_shader_in &&
271         shader->info.stage != MESA_SHADER_VERTEX &&
272         shader->info.stage != MESA_SHADER_KERNEL) ||
273        (mode == nir_var_shader_out &&
274         shader->info.stage != MESA_SHADER_FRAGMENT))
275       var->data.interpolation = INTERP_MODE_SMOOTH;
276 
277    if (mode == nir_var_shader_in || mode == nir_var_uniform)
278       var->data.read_only = true;
279 
280    nir_shader_add_variable(shader, var);
281 
282    return var;
283 }
284 
285 nir_variable *
nir_local_variable_create(nir_function_impl * impl,const struct glsl_type * type,const char * name)286 nir_local_variable_create(nir_function_impl *impl,
287                           const struct glsl_type *type, const char *name)
288 {
289    nir_variable *var = rzalloc(impl->function->shader, nir_variable);
290    var->name = ralloc_strdup(var, name);
291    var->type = type;
292    var->data.mode = nir_var_function_temp;
293 
294    nir_function_impl_add_variable(impl, var);
295 
296    return var;
297 }
298 
299 nir_variable *
nir_state_variable_create(nir_shader * shader,const struct glsl_type * type,const char * name,const gl_state_index16 tokens[STATE_LENGTH])300 nir_state_variable_create(nir_shader *shader,
301                           const struct glsl_type *type,
302                           const char *name,
303                           const gl_state_index16 tokens[STATE_LENGTH])
304 {
305    nir_variable *var = nir_variable_create(shader, nir_var_uniform, type, name);
306    var->num_state_slots = 1;
307    var->state_slots = rzalloc_array(var, nir_state_slot, 1);
308    memcpy(var->state_slots[0].tokens, tokens,
309           sizeof(var->state_slots[0].tokens));
310    shader->num_uniforms++;
311    return var;
312 }
313 
314 nir_variable *
nir_create_variable_with_location(nir_shader * shader,nir_variable_mode mode,int location,const struct glsl_type * type)315 nir_create_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location,
316                                   const struct glsl_type *type)
317 {
318    /* Only supporting non-array, or arrayed-io types, because otherwise we don't
319     * know how much to increment num_inputs/outputs
320     */
321    assert(glsl_type_is_vector_or_scalar(type) || glsl_type_is_unsized_array(type));
322 
323    const char *name;
324    switch (mode) {
325    case nir_var_shader_in:
326       if (shader->info.stage == MESA_SHADER_VERTEX)
327          name = gl_vert_attrib_name(location);
328       else
329          name = gl_varying_slot_name_for_stage(location, shader->info.stage);
330       break;
331 
332    case nir_var_shader_out:
333       if (shader->info.stage == MESA_SHADER_FRAGMENT)
334          name = gl_frag_result_name(location);
335       else
336          name = gl_varying_slot_name_for_stage(location, shader->info.stage);
337       break;
338 
339    case nir_var_system_value:
340       name = gl_system_value_name(location);
341       break;
342 
343    default:
344       unreachable("Unsupported variable mode");
345    }
346 
347    nir_variable *var = nir_variable_create(shader, mode, type, name);
348    var->data.location = location;
349 
350    switch (mode) {
351    case nir_var_shader_in:
352       var->data.driver_location = shader->num_inputs++;
353       break;
354 
355    case nir_var_shader_out:
356       var->data.driver_location = shader->num_outputs++;
357       break;
358 
359    case nir_var_system_value:
360       break;
361 
362    default:
363       unreachable("Unsupported variable mode");
364    }
365 
366    return var;
367 }
368 
369 nir_variable *
nir_get_variable_with_location(nir_shader * shader,nir_variable_mode mode,int location,const struct glsl_type * type)370 nir_get_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location,
371                                const struct glsl_type *type)
372 {
373    nir_variable *var = nir_find_variable_with_location(shader, mode, location);
374    if (var) {
375       /* If this shader has location_fracs, this builder function is not suitable. */
376       assert(var->data.location_frac == 0);
377 
378       /* The variable for the slot should match what we expected. */
379       assert(type == var->type);
380       return var;
381    }
382 
383    return nir_create_variable_with_location(shader, mode, location, type);
384 }
385 
386 nir_variable *
nir_find_variable_with_location(nir_shader * shader,nir_variable_mode mode,unsigned location)387 nir_find_variable_with_location(nir_shader *shader,
388                                 nir_variable_mode mode,
389                                 unsigned location)
390 {
391    assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
392    nir_foreach_variable_with_modes(var, shader, mode) {
393       if (var->data.location == location)
394          return var;
395    }
396    return NULL;
397 }
398 
399 nir_variable *
nir_find_variable_with_driver_location(nir_shader * shader,nir_variable_mode mode,unsigned location)400 nir_find_variable_with_driver_location(nir_shader *shader,
401                                        nir_variable_mode mode,
402                                        unsigned location)
403 {
404    assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
405    nir_foreach_variable_with_modes(var, shader, mode) {
406       if (var->data.driver_location == location)
407          return var;
408    }
409    return NULL;
410 }
411 
412 nir_variable *
nir_find_state_variable(nir_shader * s,gl_state_index16 tokens[STATE_LENGTH])413 nir_find_state_variable(nir_shader *s,
414                         gl_state_index16 tokens[STATE_LENGTH])
415 {
416    nir_foreach_variable_with_modes(var, s, nir_var_uniform) {
417       if (var->num_state_slots == 1 &&
418           !memcmp(var->state_slots[0].tokens, tokens,
419                   sizeof(var->state_slots[0].tokens)))
420          return var;
421    }
422    return NULL;
423 }
424 
nir_find_sampler_variable_with_tex_index(nir_shader * shader,unsigned texture_index)425 nir_variable *nir_find_sampler_variable_with_tex_index(nir_shader *shader,
426                                                        unsigned texture_index)
427 {
428    nir_foreach_variable_with_modes(var, shader, nir_var_uniform) {
429       unsigned size =
430           glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
431       if ((glsl_type_is_texture(glsl_without_array(var->type)) ||
432            glsl_type_is_sampler(glsl_without_array(var->type))) &&
433           (var->data.binding == texture_index ||
434            (var->data.binding < texture_index &&
435             var->data.binding + size > texture_index)))
436          return var;
437    }
438    return NULL;
439 }
440 
441 /* Annoyingly, qsort_r is not in the C standard library and, in particular, we
442  * can't count on it on MSV and Android.  So we stuff the CMP function into
443  * each array element.  It's a bit messy and burns more memory but the list of
444  * variables should hever be all that long.
445  */
446 struct var_cmp {
447    nir_variable *var;
448    int (*cmp)(const nir_variable *, const nir_variable *);
449 };
450 
451 static int
var_sort_cmp(const void * _a,const void * _b,void * _cmp)452 var_sort_cmp(const void *_a, const void *_b, void *_cmp)
453 {
454    const struct var_cmp *a = _a;
455    const struct var_cmp *b = _b;
456    assert(a->cmp == b->cmp);
457    return a->cmp(a->var, b->var);
458 }
459 
460 void
nir_sort_variables_with_modes(nir_shader * shader,int (* cmp)(const nir_variable *,const nir_variable *),nir_variable_mode modes)461 nir_sort_variables_with_modes(nir_shader *shader,
462                               int (*cmp)(const nir_variable *,
463                                          const nir_variable *),
464                               nir_variable_mode modes)
465 {
466    unsigned num_vars = 0;
467    nir_foreach_variable_with_modes(var, shader, modes) {
468       ++num_vars;
469    }
470    struct var_cmp *vars = ralloc_array(shader, struct var_cmp, num_vars);
471    unsigned i = 0;
472    nir_foreach_variable_with_modes_safe(var, shader, modes) {
473       exec_node_remove(&var->node);
474       vars[i++] = (struct var_cmp){
475          .var = var,
476          .cmp = cmp,
477       };
478    }
479    assert(i == num_vars);
480 
481    util_qsort_r(vars, num_vars, sizeof(*vars), var_sort_cmp, cmp);
482 
483    for (i = 0; i < num_vars; i++)
484       exec_list_push_tail(&shader->variables, &vars[i].var->node);
485 
486    ralloc_free(vars);
487 }
488 
489 nir_function *
nir_function_create(nir_shader * shader,const char * name)490 nir_function_create(nir_shader *shader, const char *name)
491 {
492    nir_function *func = ralloc(shader, nir_function);
493 
494    exec_list_push_tail(&shader->functions, &func->node);
495 
496    func->name = ralloc_strdup(func, name);
497    func->shader = shader;
498    func->num_params = 0;
499    func->params = NULL;
500    func->impl = NULL;
501    func->is_entrypoint = false;
502    func->is_preamble = false;
503    func->dont_inline = false;
504    func->should_inline = false;
505 
506    /* Only meaningful for shader libraries, so don't export by default. */
507    func->is_exported = false;
508 
509    return func;
510 }
511 
512 void
nir_alu_src_copy(nir_alu_src * dest,const nir_alu_src * src)513 nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src)
514 {
515    dest->src = nir_src_for_ssa(src->src.ssa);
516    for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
517       dest->swizzle[i] = src->swizzle[i];
518 }
519 
520 bool
nir_alu_src_is_trivial_ssa(const nir_alu_instr * alu,unsigned srcn)521 nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn)
522 {
523    static uint8_t trivial_swizzle[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
524    STATIC_ASSERT(ARRAY_SIZE(trivial_swizzle) == NIR_MAX_VEC_COMPONENTS);
525 
526    const nir_alu_src *src = &alu->src[srcn];
527    unsigned num_components = nir_ssa_alu_instr_src_components(alu, srcn);
528 
529    return (src->src.ssa->num_components == num_components) &&
530           (memcmp(src->swizzle, trivial_swizzle, num_components) == 0);
531 }
532 
533 static void
cf_init(nir_cf_node * node,nir_cf_node_type type)534 cf_init(nir_cf_node *node, nir_cf_node_type type)
535 {
536    exec_node_init(&node->node);
537    node->parent = NULL;
538    node->type = type;
539 }
540 
541 nir_function_impl *
nir_function_impl_create_bare(nir_shader * shader)542 nir_function_impl_create_bare(nir_shader *shader)
543 {
544    nir_function_impl *impl = ralloc(shader, nir_function_impl);
545 
546    impl->function = NULL;
547    impl->preamble = NULL;
548 
549    cf_init(&impl->cf_node, nir_cf_node_function);
550 
551    exec_list_make_empty(&impl->body);
552    exec_list_make_empty(&impl->locals);
553    impl->ssa_alloc = 0;
554    impl->num_blocks = 0;
555    impl->valid_metadata = nir_metadata_none;
556    impl->structured = true;
557 
558    /* create start & end blocks */
559    nir_block *start_block = nir_block_create(shader);
560    nir_block *end_block = nir_block_create(shader);
561    start_block->cf_node.parent = &impl->cf_node;
562    end_block->cf_node.parent = &impl->cf_node;
563    impl->end_block = end_block;
564 
565    exec_list_push_tail(&impl->body, &start_block->cf_node.node);
566 
567    start_block->successors[0] = end_block;
568    _mesa_set_add(end_block->predecessors, start_block);
569    return impl;
570 }
571 
572 nir_function_impl *
nir_function_impl_create(nir_function * function)573 nir_function_impl_create(nir_function *function)
574 {
575    assert(function->impl == NULL);
576 
577    nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
578    nir_function_set_impl(function, impl);
579    return impl;
580 }
581 
582 nir_block *
nir_block_create(nir_shader * shader)583 nir_block_create(nir_shader *shader)
584 {
585    nir_block *block = rzalloc(shader, nir_block);
586 
587    cf_init(&block->cf_node, nir_cf_node_block);
588 
589    block->successors[0] = block->successors[1] = NULL;
590    block->predecessors = _mesa_pointer_set_create(block);
591    block->imm_dom = NULL;
592    /* XXX maybe it would be worth it to defer allocation?  This
593     * way it doesn't get allocated for shader refs that never run
594     * nir_calc_dominance?  For example, state-tracker creates an
595     * initial IR, clones that, runs appropriate lowering pass, passes
596     * to driver which does common lowering/opt, and then stores ref
597     * which is later used to do state specific lowering and futher
598     * opt.  Do any of the references not need dominance metadata?
599     */
600    block->dom_frontier = _mesa_pointer_set_create(block);
601 
602    exec_list_make_empty(&block->instr_list);
603 
604    return block;
605 }
606 
607 static inline void
src_init(nir_src * src)608 src_init(nir_src *src)
609 {
610    src->ssa = NULL;
611 }
612 
613 nir_if *
nir_if_create(nir_shader * shader)614 nir_if_create(nir_shader *shader)
615 {
616    nir_if *if_stmt = ralloc(shader, nir_if);
617 
618    if_stmt->control = nir_selection_control_none;
619 
620    cf_init(&if_stmt->cf_node, nir_cf_node_if);
621    src_init(&if_stmt->condition);
622 
623    nir_block *then = nir_block_create(shader);
624    exec_list_make_empty(&if_stmt->then_list);
625    exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
626    then->cf_node.parent = &if_stmt->cf_node;
627 
628    nir_block *else_stmt = nir_block_create(shader);
629    exec_list_make_empty(&if_stmt->else_list);
630    exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
631    else_stmt->cf_node.parent = &if_stmt->cf_node;
632 
633    return if_stmt;
634 }
635 
636 nir_loop *
nir_loop_create(nir_shader * shader)637 nir_loop_create(nir_shader *shader)
638 {
639    nir_loop *loop = rzalloc(shader, nir_loop);
640 
641    cf_init(&loop->cf_node, nir_cf_node_loop);
642    /* Assume that loops are divergent until proven otherwise */
643    loop->divergent = true;
644 
645    nir_block *body = nir_block_create(shader);
646    exec_list_make_empty(&loop->body);
647    exec_list_push_tail(&loop->body, &body->cf_node.node);
648    body->cf_node.parent = &loop->cf_node;
649 
650    body->successors[0] = body;
651    _mesa_set_add(body->predecessors, body);
652 
653    exec_list_make_empty(&loop->continue_list);
654 
655    return loop;
656 }
657 
658 static void
instr_init(nir_instr * instr,nir_instr_type type)659 instr_init(nir_instr *instr, nir_instr_type type)
660 {
661    instr->type = type;
662    instr->block = NULL;
663    exec_node_init(&instr->node);
664 }
665 
666 static void
alu_src_init(nir_alu_src * src)667 alu_src_init(nir_alu_src *src)
668 {
669    src_init(&src->src);
670    for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
671       src->swizzle[i] = i;
672 }
673 
674 nir_alu_instr *
nir_alu_instr_create(nir_shader * shader,nir_op op)675 nir_alu_instr_create(nir_shader *shader, nir_op op)
676 {
677    unsigned num_srcs = nir_op_infos[op].num_inputs;
678    nir_alu_instr *instr = gc_zalloc_zla(shader->gctx, nir_alu_instr, nir_alu_src, num_srcs);
679 
680    instr_init(&instr->instr, nir_instr_type_alu);
681    instr->op = op;
682    for (unsigned i = 0; i < num_srcs; i++)
683       alu_src_init(&instr->src[i]);
684 
685    return instr;
686 }
687 
688 nir_deref_instr *
nir_deref_instr_create(nir_shader * shader,nir_deref_type deref_type)689 nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
690 {
691    nir_deref_instr *instr = gc_zalloc(shader->gctx, nir_deref_instr, 1);
692 
693    instr_init(&instr->instr, nir_instr_type_deref);
694 
695    instr->deref_type = deref_type;
696    if (deref_type != nir_deref_type_var)
697       src_init(&instr->parent);
698 
699    if (deref_type == nir_deref_type_array ||
700        deref_type == nir_deref_type_ptr_as_array)
701       src_init(&instr->arr.index);
702 
703    return instr;
704 }
705 
706 nir_jump_instr *
nir_jump_instr_create(nir_shader * shader,nir_jump_type type)707 nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
708 {
709    nir_jump_instr *instr = gc_alloc(shader->gctx, nir_jump_instr, 1);
710    instr_init(&instr->instr, nir_instr_type_jump);
711    src_init(&instr->condition);
712    instr->type = type;
713    instr->target = NULL;
714    instr->else_target = NULL;
715 
716    return instr;
717 }
718 
719 nir_load_const_instr *
nir_load_const_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)720 nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
721                             unsigned bit_size)
722 {
723    nir_load_const_instr *instr =
724       gc_zalloc_zla(shader->gctx, nir_load_const_instr, nir_const_value, num_components);
725    instr_init(&instr->instr, nir_instr_type_load_const);
726 
727    nir_def_init(&instr->instr, &instr->def, num_components, bit_size);
728 
729    return instr;
730 }
731 
732 nir_intrinsic_instr *
nir_intrinsic_instr_create(nir_shader * shader,nir_intrinsic_op op)733 nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
734 {
735    unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
736    nir_intrinsic_instr *instr =
737       gc_zalloc_zla(shader->gctx, nir_intrinsic_instr, nir_src, num_srcs);
738 
739    instr_init(&instr->instr, nir_instr_type_intrinsic);
740    instr->intrinsic = op;
741 
742    for (unsigned i = 0; i < num_srcs; i++)
743       src_init(&instr->src[i]);
744 
745    return instr;
746 }
747 
748 nir_call_instr *
nir_call_instr_create(nir_shader * shader,nir_function * callee)749 nir_call_instr_create(nir_shader *shader, nir_function *callee)
750 {
751    const unsigned num_params = callee->num_params;
752    nir_call_instr *instr =
753       gc_zalloc_zla(shader->gctx, nir_call_instr, nir_src, num_params);
754 
755    instr_init(&instr->instr, nir_instr_type_call);
756    instr->callee = callee;
757    instr->num_params = num_params;
758    for (unsigned i = 0; i < num_params; i++)
759       src_init(&instr->params[i]);
760 
761    return instr;
762 }
763 
764 static int8_t default_tg4_offsets[4][2] = {
765    { 0, 1 },
766    { 1, 1 },
767    { 1, 0 },
768    { 0, 0 },
769 };
770 
771 nir_tex_instr *
nir_tex_instr_create(nir_shader * shader,unsigned num_srcs)772 nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
773 {
774    nir_tex_instr *instr = gc_zalloc(shader->gctx, nir_tex_instr, 1);
775    instr_init(&instr->instr, nir_instr_type_tex);
776 
777    instr->num_srcs = num_srcs;
778    instr->src = gc_alloc(shader->gctx, nir_tex_src, num_srcs);
779    for (unsigned i = 0; i < num_srcs; i++)
780       src_init(&instr->src[i].src);
781 
782    instr->texture_index = 0;
783    instr->sampler_index = 0;
784    memcpy(instr->tg4_offsets, default_tg4_offsets, sizeof(instr->tg4_offsets));
785 
786    return instr;
787 }
788 
789 void
nir_tex_instr_add_src(nir_tex_instr * tex,nir_tex_src_type src_type,nir_def * src)790 nir_tex_instr_add_src(nir_tex_instr *tex,
791                       nir_tex_src_type src_type,
792                       nir_def *src)
793 {
794    nir_tex_src *new_srcs = gc_zalloc(gc_get_context(tex), nir_tex_src, tex->num_srcs + 1);
795 
796    for (unsigned i = 0; i < tex->num_srcs; i++) {
797       new_srcs[i].src_type = tex->src[i].src_type;
798       nir_instr_move_src(&tex->instr, &new_srcs[i].src,
799                          &tex->src[i].src);
800    }
801 
802    gc_free(tex->src);
803    tex->src = new_srcs;
804 
805    tex->src[tex->num_srcs].src_type = src_type;
806    nir_instr_init_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
807    tex->num_srcs++;
808 }
809 
810 void
nir_tex_instr_remove_src(nir_tex_instr * tex,unsigned src_idx)811 nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
812 {
813    assert(src_idx < tex->num_srcs);
814 
815    /* First rewrite the source to NIR_SRC_INIT */
816    nir_instr_clear_src(&tex->instr, &tex->src[src_idx].src);
817 
818    /* Now, move all of the other sources down */
819    for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
820       tex->src[i - 1].src_type = tex->src[i].src_type;
821       nir_instr_move_src(&tex->instr, &tex->src[i - 1].src, &tex->src[i].src);
822    }
823    tex->num_srcs--;
824 }
825 
826 bool
nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr * tex)827 nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex)
828 {
829    if (tex->op != nir_texop_tg4)
830       return false;
831    return memcmp(tex->tg4_offsets, default_tg4_offsets,
832                  sizeof(tex->tg4_offsets)) != 0;
833 }
834 
835 nir_phi_instr *
nir_phi_instr_create(nir_shader * shader)836 nir_phi_instr_create(nir_shader *shader)
837 {
838    nir_phi_instr *instr = gc_alloc(shader->gctx, nir_phi_instr, 1);
839    instr_init(&instr->instr, nir_instr_type_phi);
840 
841    exec_list_make_empty(&instr->srcs);
842 
843    return instr;
844 }
845 
846 /**
847  * Adds a new source to a NIR instruction.
848  *
849  * Note that this does not update the def/use relationship for src, assuming
850  * that the instr is not in the shader.  If it is, you have to do:
851  *
852  * list_addtail(&phi_src->src.use_link, &src.ssa->uses);
853  */
854 nir_phi_src *
nir_phi_instr_add_src(nir_phi_instr * instr,nir_block * pred,nir_def * src)855 nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_def *src)
856 {
857    nir_phi_src *phi_src;
858 
859    phi_src = gc_zalloc(gc_get_context(instr), nir_phi_src, 1);
860    phi_src->pred = pred;
861    phi_src->src = nir_src_for_ssa(src);
862    nir_src_set_parent_instr(&phi_src->src, &instr->instr);
863    exec_list_push_tail(&instr->srcs, &phi_src->node);
864 
865    return phi_src;
866 }
867 
868 nir_parallel_copy_instr *
nir_parallel_copy_instr_create(nir_shader * shader)869 nir_parallel_copy_instr_create(nir_shader *shader)
870 {
871    nir_parallel_copy_instr *instr = gc_alloc(shader->gctx, nir_parallel_copy_instr, 1);
872    instr_init(&instr->instr, nir_instr_type_parallel_copy);
873 
874    exec_list_make_empty(&instr->entries);
875 
876    return instr;
877 }
878 
879 nir_undef_instr *
nir_undef_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)880 nir_undef_instr_create(nir_shader *shader,
881                        unsigned num_components,
882                        unsigned bit_size)
883 {
884    nir_undef_instr *instr = gc_alloc(shader->gctx, nir_undef_instr, 1);
885    instr_init(&instr->instr, nir_instr_type_undef);
886 
887    nir_def_init(&instr->instr, &instr->def, num_components, bit_size);
888 
889    return instr;
890 }
891 
892 static nir_const_value
const_value_float(double d,unsigned bit_size)893 const_value_float(double d, unsigned bit_size)
894 {
895    nir_const_value v;
896    memset(&v, 0, sizeof(v));
897 
898    /* clang-format off */
899    switch (bit_size) {
900    case 16: v.u16 = _mesa_float_to_half(d);  break;
901    case 32: v.f32 = d;                       break;
902    case 64: v.f64 = d;                       break;
903    default:
904       unreachable("Invalid bit size");
905    }
906    /* clang-format on */
907 
908    return v;
909 }
910 
911 static nir_const_value
const_value_int(int64_t i,unsigned bit_size)912 const_value_int(int64_t i, unsigned bit_size)
913 {
914    nir_const_value v;
915    memset(&v, 0, sizeof(v));
916 
917    /* clang-format off */
918    switch (bit_size) {
919    case 1:  v.b   = i & 1; break;
920    case 8:  v.i8  = i;     break;
921    case 16: v.i16 = i;     break;
922    case 32: v.i32 = i;     break;
923    case 64: v.i64 = i;     break;
924    default:
925       unreachable("Invalid bit size");
926    }
927    /* clang-format on */
928 
929    return v;
930 }
931 
932 nir_const_value
nir_alu_binop_identity(nir_op binop,unsigned bit_size)933 nir_alu_binop_identity(nir_op binop, unsigned bit_size)
934 {
935    const int64_t max_int = (1ull << (bit_size - 1)) - 1;
936    const int64_t min_int = -max_int - 1;
937    switch (binop) {
938    case nir_op_iadd:
939       return const_value_int(0, bit_size);
940    case nir_op_fadd:
941       return const_value_float(0, bit_size);
942    case nir_op_imul:
943       return const_value_int(1, bit_size);
944    case nir_op_fmul:
945       return const_value_float(1, bit_size);
946    case nir_op_imin:
947       return const_value_int(max_int, bit_size);
948    case nir_op_umin:
949       return const_value_int(~0ull, bit_size);
950    case nir_op_fmin:
951       return const_value_float(INFINITY, bit_size);
952    case nir_op_imax:
953       return const_value_int(min_int, bit_size);
954    case nir_op_umax:
955       return const_value_int(0, bit_size);
956    case nir_op_fmax:
957       return const_value_float(-INFINITY, bit_size);
958    case nir_op_iand:
959       return const_value_int(~0ull, bit_size);
960    case nir_op_ior:
961       return const_value_int(0, bit_size);
962    case nir_op_ixor:
963       return const_value_int(0, bit_size);
964    default:
965       unreachable("Invalid reduction operation");
966    }
967 }
968 
969 nir_function_impl *
nir_cf_node_get_function(nir_cf_node * node)970 nir_cf_node_get_function(nir_cf_node *node)
971 {
972    while (node->type != nir_cf_node_function) {
973       node = node->parent;
974    }
975 
976    return nir_cf_node_as_function(node);
977 }
978 
979 /* Reduces a cursor by trying to convert everything to after and trying to
980  * go up to block granularity when possible.
981  */
982 static nir_cursor
reduce_cursor(nir_cursor cursor)983 reduce_cursor(nir_cursor cursor)
984 {
985    switch (cursor.option) {
986    case nir_cursor_before_block:
987       if (exec_list_is_empty(&cursor.block->instr_list)) {
988          /* Empty block.  After is as good as before. */
989          cursor.option = nir_cursor_after_block;
990       }
991       return cursor;
992 
993    case nir_cursor_after_block:
994       return cursor;
995 
996    case nir_cursor_before_instr: {
997       nir_instr *prev_instr = nir_instr_prev(cursor.instr);
998       if (prev_instr) {
999          /* Before this instruction is after the previous */
1000          cursor.instr = prev_instr;
1001          cursor.option = nir_cursor_after_instr;
1002       } else {
1003          /* No previous instruction.  Switch to before block */
1004          cursor.block = cursor.instr->block;
1005          cursor.option = nir_cursor_before_block;
1006       }
1007       return reduce_cursor(cursor);
1008    }
1009 
1010    case nir_cursor_after_instr:
1011       if (nir_instr_next(cursor.instr) == NULL) {
1012          /* This is the last instruction, switch to after block */
1013          cursor.option = nir_cursor_after_block;
1014          cursor.block = cursor.instr->block;
1015       }
1016       return cursor;
1017 
1018    default:
1019       unreachable("Inavlid cursor option");
1020    }
1021 }
1022 
1023 bool
nir_cursors_equal(nir_cursor a,nir_cursor b)1024 nir_cursors_equal(nir_cursor a, nir_cursor b)
1025 {
1026    /* Reduced cursors should be unique */
1027    a = reduce_cursor(a);
1028    b = reduce_cursor(b);
1029 
1030    return a.block == b.block && a.option == b.option;
1031 }
1032 
1033 static bool
add_use_cb(nir_src * src,void * state)1034 add_use_cb(nir_src *src, void *state)
1035 {
1036    nir_instr *instr = state;
1037 
1038    nir_src_set_parent_instr(src, instr);
1039    list_addtail(&src->use_link, &src->ssa->uses);
1040 
1041    return true;
1042 }
1043 
1044 static bool
add_ssa_def_cb(nir_def * def,void * state)1045 add_ssa_def_cb(nir_def *def, void *state)
1046 {
1047    nir_instr *instr = state;
1048 
1049    if (instr->block && def->index == UINT_MAX) {
1050       nir_function_impl *impl =
1051          nir_cf_node_get_function(&instr->block->cf_node);
1052 
1053       def->index = impl->ssa_alloc++;
1054 
1055       impl->valid_metadata &= ~nir_metadata_live_defs;
1056    }
1057 
1058    return true;
1059 }
1060 
1061 static void
add_defs_uses(nir_instr * instr)1062 add_defs_uses(nir_instr *instr)
1063 {
1064    nir_foreach_src(instr, add_use_cb, instr);
1065    nir_foreach_def(instr, add_ssa_def_cb, instr);
1066 }
1067 
1068 void
nir_instr_insert(nir_cursor cursor,nir_instr * instr)1069 nir_instr_insert(nir_cursor cursor, nir_instr *instr)
1070 {
1071    switch (cursor.option) {
1072    case nir_cursor_before_block:
1073       /* Only allow inserting jumps into empty blocks. */
1074       if (instr->type == nir_instr_type_jump)
1075          assert(exec_list_is_empty(&cursor.block->instr_list));
1076 
1077       instr->block = cursor.block;
1078       add_defs_uses(instr);
1079       exec_list_push_head(&cursor.block->instr_list, &instr->node);
1080       break;
1081    case nir_cursor_after_block: {
1082       /* Inserting instructions after a jump is illegal. */
1083       nir_instr *last = nir_block_last_instr(cursor.block);
1084       assert(last == NULL || last->type != nir_instr_type_jump);
1085       (void)last;
1086 
1087       instr->block = cursor.block;
1088       add_defs_uses(instr);
1089       exec_list_push_tail(&cursor.block->instr_list, &instr->node);
1090       break;
1091    }
1092    case nir_cursor_before_instr:
1093       assert(instr->type != nir_instr_type_jump);
1094       instr->block = cursor.instr->block;
1095       add_defs_uses(instr);
1096       exec_node_insert_node_before(&cursor.instr->node, &instr->node);
1097       break;
1098    case nir_cursor_after_instr:
1099       /* Inserting instructions after a jump is illegal. */
1100       assert(cursor.instr->type != nir_instr_type_jump);
1101 
1102       /* Only allow inserting jumps at the end of the block. */
1103       if (instr->type == nir_instr_type_jump)
1104          assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
1105 
1106       instr->block = cursor.instr->block;
1107       add_defs_uses(instr);
1108       exec_node_insert_after(&cursor.instr->node, &instr->node);
1109       break;
1110    }
1111 
1112    if (instr->type == nir_instr_type_jump)
1113       nir_handle_add_jump(instr->block);
1114 
1115    nir_function_impl *impl = nir_cf_node_get_function(&instr->block->cf_node);
1116    impl->valid_metadata &= ~nir_metadata_instr_index;
1117 }
1118 
1119 bool
nir_instr_move(nir_cursor cursor,nir_instr * instr)1120 nir_instr_move(nir_cursor cursor, nir_instr *instr)
1121 {
1122    /* If the cursor happens to refer to this instruction (either before or
1123     * after), don't do anything.
1124     */
1125    if ((cursor.option == nir_cursor_before_instr ||
1126         cursor.option == nir_cursor_after_instr) &&
1127        cursor.instr == instr)
1128       return false;
1129 
1130    nir_instr_remove(instr);
1131    nir_instr_insert(cursor, instr);
1132    return true;
1133 }
1134 
1135 static bool
src_is_valid(const nir_src * src)1136 src_is_valid(const nir_src *src)
1137 {
1138    return (src->ssa != NULL);
1139 }
1140 
1141 static bool
remove_use_cb(nir_src * src,void * state)1142 remove_use_cb(nir_src *src, void *state)
1143 {
1144    (void)state;
1145 
1146    if (src_is_valid(src))
1147       list_del(&src->use_link);
1148 
1149    return true;
1150 }
1151 
1152 static void
remove_defs_uses(nir_instr * instr)1153 remove_defs_uses(nir_instr *instr)
1154 {
1155    nir_foreach_src(instr, remove_use_cb, instr);
1156 }
1157 
1158 void
nir_instr_remove_v(nir_instr * instr)1159 nir_instr_remove_v(nir_instr *instr)
1160 {
1161    remove_defs_uses(instr);
1162    exec_node_remove(&instr->node);
1163 
1164    if (instr->type == nir_instr_type_jump) {
1165       nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
1166       nir_handle_remove_jump(instr->block, jump_instr->type);
1167    }
1168 }
1169 
1170 void
nir_instr_free(nir_instr * instr)1171 nir_instr_free(nir_instr *instr)
1172 {
1173    switch (instr->type) {
1174    case nir_instr_type_tex:
1175       gc_free(nir_instr_as_tex(instr)->src);
1176       break;
1177 
1178    case nir_instr_type_phi: {
1179       nir_phi_instr *phi = nir_instr_as_phi(instr);
1180       nir_foreach_phi_src_safe(phi_src, phi)
1181          gc_free(phi_src);
1182       break;
1183    }
1184 
1185    default:
1186       break;
1187    }
1188 
1189    gc_free(instr);
1190 }
1191 
1192 void
nir_instr_free_list(struct exec_list * list)1193 nir_instr_free_list(struct exec_list *list)
1194 {
1195    struct exec_node *node;
1196    while ((node = exec_list_pop_head(list))) {
1197       nir_instr *removed_instr = exec_node_data(nir_instr, node, node);
1198       nir_instr_free(removed_instr);
1199    }
1200 }
1201 
1202 static bool
nir_instr_free_and_dce_live_cb(nir_def * def,void * state)1203 nir_instr_free_and_dce_live_cb(nir_def *def, void *state)
1204 {
1205    bool *live = state;
1206 
1207    if (!nir_def_is_unused(def)) {
1208       *live = true;
1209       return false;
1210    } else {
1211       return true;
1212    }
1213 }
1214 
1215 static bool
nir_instr_free_and_dce_is_live(nir_instr * instr)1216 nir_instr_free_and_dce_is_live(nir_instr *instr)
1217 {
1218    /* Note: don't have to worry about jumps because they don't have dests to
1219     * become unused.
1220     */
1221    if (instr->type == nir_instr_type_intrinsic) {
1222       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1223       const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
1224       if (!(info->flags & NIR_INTRINSIC_CAN_ELIMINATE))
1225          return true;
1226    }
1227 
1228    bool live = false;
1229    nir_foreach_def(instr, nir_instr_free_and_dce_live_cb, &live);
1230    return live;
1231 }
1232 
1233 static bool
nir_instr_dce_add_dead_srcs_cb(nir_src * src,void * state)1234 nir_instr_dce_add_dead_srcs_cb(nir_src *src, void *state)
1235 {
1236    nir_instr_worklist *wl = state;
1237 
1238    list_del(&src->use_link);
1239    if (!nir_instr_free_and_dce_is_live(src->ssa->parent_instr))
1240       nir_instr_worklist_push_tail(wl, src->ssa->parent_instr);
1241 
1242    /* Stop nir_instr_remove from trying to delete the link again. */
1243    src->ssa = NULL;
1244 
1245    return true;
1246 }
1247 
1248 static void
nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist * wl,nir_instr * instr)1249 nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist *wl, nir_instr *instr)
1250 {
1251    nir_foreach_src(instr, nir_instr_dce_add_dead_srcs_cb, wl);
1252 }
1253 
1254 /**
1255  * Frees an instruction and any SSA defs that it used that are now dead,
1256  * returning a nir_cursor where the instruction previously was.
1257  */
1258 nir_cursor
nir_instr_free_and_dce(nir_instr * instr)1259 nir_instr_free_and_dce(nir_instr *instr)
1260 {
1261    nir_instr_worklist *worklist = nir_instr_worklist_create();
1262 
1263    nir_instr_dce_add_dead_ssa_srcs(worklist, instr);
1264    nir_cursor c = nir_instr_remove(instr);
1265 
1266    struct exec_list to_free;
1267    exec_list_make_empty(&to_free);
1268 
1269    nir_instr *dce_instr;
1270    while ((dce_instr = nir_instr_worklist_pop_head(worklist))) {
1271       nir_instr_dce_add_dead_ssa_srcs(worklist, dce_instr);
1272 
1273       /* If we're removing the instr where our cursor is, then we have to
1274        * point the cursor elsewhere.
1275        */
1276       if ((c.option == nir_cursor_before_instr ||
1277            c.option == nir_cursor_after_instr) &&
1278           c.instr == dce_instr)
1279          c = nir_instr_remove(dce_instr);
1280       else
1281          nir_instr_remove(dce_instr);
1282       exec_list_push_tail(&to_free, &dce_instr->node);
1283    }
1284 
1285    nir_instr_free_list(&to_free);
1286 
1287    nir_instr_worklist_destroy(worklist);
1288 
1289    return c;
1290 }
1291 
1292 /*@}*/
1293 
1294 nir_def *
nir_instr_def(nir_instr * instr)1295 nir_instr_def(nir_instr *instr)
1296 {
1297    switch (instr->type) {
1298    case nir_instr_type_alu:
1299       return &nir_instr_as_alu(instr)->def;
1300 
1301    case nir_instr_type_deref:
1302       return &nir_instr_as_deref(instr)->def;
1303 
1304    case nir_instr_type_tex:
1305       return &nir_instr_as_tex(instr)->def;
1306 
1307    case nir_instr_type_intrinsic: {
1308       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1309       if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
1310          return &intrin->def;
1311       } else {
1312          return NULL;
1313       }
1314    }
1315 
1316    case nir_instr_type_phi:
1317       return &nir_instr_as_phi(instr)->def;
1318 
1319    case nir_instr_type_parallel_copy:
1320       unreachable("Parallel copies are unsupported by this function");
1321 
1322    case nir_instr_type_load_const:
1323       return &nir_instr_as_load_const(instr)->def;
1324 
1325    case nir_instr_type_undef:
1326       return &nir_instr_as_undef(instr)->def;
1327 
1328    case nir_instr_type_call:
1329    case nir_instr_type_jump:
1330       return NULL;
1331    }
1332 
1333    unreachable("Invalid instruction type");
1334 }
1335 
1336 bool
nir_foreach_phi_src_leaving_block(nir_block * block,nir_foreach_src_cb cb,void * state)1337 nir_foreach_phi_src_leaving_block(nir_block *block,
1338                                   nir_foreach_src_cb cb,
1339                                   void *state)
1340 {
1341    for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
1342       if (block->successors[i] == NULL)
1343          continue;
1344 
1345       nir_foreach_phi(phi, block->successors[i]) {
1346          nir_foreach_phi_src(phi_src, phi) {
1347             if (phi_src->pred == block) {
1348                if (!cb(&phi_src->src, state))
1349                   return false;
1350             }
1351          }
1352       }
1353    }
1354 
1355    return true;
1356 }
1357 
1358 nir_const_value
nir_const_value_for_float(double f,unsigned bit_size)1359 nir_const_value_for_float(double f, unsigned bit_size)
1360 {
1361    nir_const_value v;
1362    memset(&v, 0, sizeof(v));
1363 
1364    /* clang-format off */
1365    switch (bit_size) {
1366    case 16: v.u16 = _mesa_float_to_half(f);  break;
1367    case 32: v.f32 = f;                       break;
1368    case 64: v.f64 = f;                       break;
1369    default: unreachable("Invalid bit size");
1370    }
1371    /* clang-format on */
1372 
1373    return v;
1374 }
1375 
1376 double
nir_const_value_as_float(nir_const_value value,unsigned bit_size)1377 nir_const_value_as_float(nir_const_value value, unsigned bit_size)
1378 {
1379    /* clang-format off */
1380    switch (bit_size) {
1381    case 16: return _mesa_half_to_float(value.u16);
1382    case 32: return value.f32;
1383    case 64: return value.f64;
1384    default: unreachable("Invalid bit size");
1385    }
1386    /* clang-format on */
1387 }
1388 
1389 nir_const_value *
nir_src_as_const_value(nir_src src)1390 nir_src_as_const_value(nir_src src)
1391 {
1392    if (src.ssa->parent_instr->type != nir_instr_type_load_const)
1393       return NULL;
1394 
1395    nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1396 
1397    return load->value;
1398 }
1399 
1400 /**
1401  * Returns true if the source is known to be always uniform. Otherwise it
1402  * returns false which means it may or may not be uniform but it can't be
1403  * determined.
1404  *
1405  * For a more precise analysis of uniform values, use nir_divergence_analysis.
1406  */
1407 bool
nir_src_is_always_uniform(nir_src src)1408 nir_src_is_always_uniform(nir_src src)
1409 {
1410    /* Constants are trivially uniform */
1411    if (src.ssa->parent_instr->type == nir_instr_type_load_const)
1412       return true;
1413 
1414    if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
1415       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
1416       /* As are uniform variables */
1417       if (intr->intrinsic == nir_intrinsic_load_uniform &&
1418           nir_src_is_always_uniform(intr->src[0]))
1419          return true;
1420       /* From the Vulkan specification 15.6.1. Push Constant Interface:
1421        * "Any member of a push constant block that is declared as an array must
1422        * only be accessed with dynamically uniform indices."
1423        */
1424       if (intr->intrinsic == nir_intrinsic_load_push_constant)
1425          return true;
1426       if (intr->intrinsic == nir_intrinsic_load_deref &&
1427           nir_deref_mode_is(nir_src_as_deref(intr->src[0]), nir_var_mem_push_const))
1428          return true;
1429    }
1430 
1431    /* Operating together uniform expressions produces a uniform result */
1432    if (src.ssa->parent_instr->type == nir_instr_type_alu) {
1433       nir_alu_instr *alu = nir_instr_as_alu(src.ssa->parent_instr);
1434       for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
1435          if (!nir_src_is_always_uniform(alu->src[i].src))
1436             return false;
1437       }
1438 
1439       return true;
1440    }
1441 
1442    /* XXX: this could have many more tests, such as when a sampler function is
1443     * called with uniform arguments.
1444     */
1445    return false;
1446 }
1447 
1448 static void
src_remove_all_uses(nir_src * src)1449 src_remove_all_uses(nir_src *src)
1450 {
1451    if (src && src_is_valid(src))
1452       list_del(&src->use_link);
1453 }
1454 
1455 static void
src_add_all_uses(nir_src * src,nir_instr * parent_instr,nir_if * parent_if)1456 src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
1457 {
1458    if (!src)
1459       return;
1460 
1461    if (!src_is_valid(src))
1462       return;
1463 
1464    if (parent_instr) {
1465       nir_src_set_parent_instr(src, parent_instr);
1466    } else {
1467       assert(parent_if);
1468       nir_src_set_parent_if(src, parent_if);
1469    }
1470 
1471    list_addtail(&src->use_link, &src->ssa->uses);
1472 }
1473 
1474 void
nir_instr_init_src(nir_instr * instr,nir_src * src,nir_def * def)1475 nir_instr_init_src(nir_instr *instr, nir_src *src, nir_def *def)
1476 {
1477    *src = nir_src_for_ssa(def);
1478    src_add_all_uses(src, instr, NULL);
1479 }
1480 
1481 void
nir_instr_clear_src(nir_instr * instr,nir_src * src)1482 nir_instr_clear_src(nir_instr *instr, nir_src *src)
1483 {
1484    src_remove_all_uses(src);
1485    *src = NIR_SRC_INIT;
1486 }
1487 
1488 void
nir_instr_move_src(nir_instr * dest_instr,nir_src * dest,nir_src * src)1489 nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
1490 {
1491    assert(!src_is_valid(dest) || nir_src_parent_instr(dest) == dest_instr);
1492 
1493    src_remove_all_uses(dest);
1494    src_remove_all_uses(src);
1495    *dest = *src;
1496    *src = NIR_SRC_INIT;
1497    src_add_all_uses(dest, dest_instr, NULL);
1498 }
1499 
1500 void
nir_def_init(nir_instr * instr,nir_def * def,unsigned num_components,unsigned bit_size)1501 nir_def_init(nir_instr *instr, nir_def *def,
1502              unsigned num_components,
1503              unsigned bit_size)
1504 {
1505    def->parent_instr = instr;
1506    list_inithead(&def->uses);
1507    def->num_components = num_components;
1508    def->bit_size = bit_size;
1509    def->divergent = true; /* This is the safer default */
1510 
1511    if (instr->block) {
1512       nir_function_impl *impl =
1513          nir_cf_node_get_function(&instr->block->cf_node);
1514 
1515       def->index = impl->ssa_alloc++;
1516 
1517       impl->valid_metadata &= ~nir_metadata_live_defs;
1518    } else {
1519       def->index = UINT_MAX;
1520    }
1521 }
1522 
1523 void
nir_def_rewrite_uses(nir_def * def,nir_def * new_ssa)1524 nir_def_rewrite_uses(nir_def *def, nir_def *new_ssa)
1525 {
1526    assert(def != new_ssa);
1527    nir_foreach_use_including_if_safe(use_src, def) {
1528       nir_src_rewrite(use_src, new_ssa);
1529    }
1530 }
1531 
1532 void
nir_def_rewrite_uses_src(nir_def * def,nir_src new_src)1533 nir_def_rewrite_uses_src(nir_def *def, nir_src new_src)
1534 {
1535    nir_def_rewrite_uses(def, new_src.ssa);
1536 }
1537 
1538 static bool
is_instr_between(nir_instr * start,nir_instr * end,nir_instr * between)1539 is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
1540 {
1541    assert(start->block == end->block);
1542 
1543    if (between->block != start->block)
1544       return false;
1545 
1546    /* Search backwards looking for "between" */
1547    while (start != end) {
1548       if (between == end)
1549          return true;
1550 
1551       end = nir_instr_prev(end);
1552       assert(end);
1553    }
1554 
1555    return false;
1556 }
1557 
1558 /* Replaces all uses of the given SSA def with the given source but only if
1559  * the use comes after the after_me instruction.  This can be useful if you
1560  * are emitting code to fix up the result of some instruction: you can freely
1561  * use the result in that code and then call rewrite_uses_after and pass the
1562  * last fixup instruction as after_me and it will replace all of the uses you
1563  * want without touching the fixup code.
1564  *
1565  * This function assumes that after_me is in the same block as
1566  * def->parent_instr and that after_me comes after def->parent_instr.
1567  */
1568 void
nir_def_rewrite_uses_after(nir_def * def,nir_def * new_ssa,nir_instr * after_me)1569 nir_def_rewrite_uses_after(nir_def *def, nir_def *new_ssa,
1570                            nir_instr *after_me)
1571 {
1572    if (def == new_ssa)
1573       return;
1574 
1575    nir_foreach_use_including_if_safe(use_src, def) {
1576       if (!nir_src_is_if(use_src)) {
1577          assert(nir_src_parent_instr(use_src) != def->parent_instr);
1578 
1579          /* Since def already dominates all of its uses, the only way a use can
1580           * not be dominated by after_me is if it is between def and after_me in
1581           * the instruction list.
1582           */
1583          if (is_instr_between(def->parent_instr, after_me, nir_src_parent_instr(use_src)))
1584             continue;
1585       }
1586 
1587       nir_src_rewrite(use_src, new_ssa);
1588    }
1589 }
1590 
1591 static nir_def *
get_store_value(nir_intrinsic_instr * intrin)1592 get_store_value(nir_intrinsic_instr *intrin)
1593 {
1594    assert(nir_intrinsic_has_write_mask(intrin));
1595    /* deref stores have the deref in src[0] and the store value in src[1] */
1596    if (intrin->intrinsic == nir_intrinsic_store_deref ||
1597        intrin->intrinsic == nir_intrinsic_store_deref_block_intel)
1598       return intrin->src[1].ssa;
1599 
1600    /* all other stores have the store value in src[0] */
1601    return intrin->src[0].ssa;
1602 }
1603 
1604 nir_component_mask_t
nir_src_components_read(const nir_src * src)1605 nir_src_components_read(const nir_src *src)
1606 {
1607    assert(nir_src_parent_instr(src));
1608 
1609    if (nir_src_parent_instr(src)->type == nir_instr_type_alu) {
1610       nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(src));
1611       nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src);
1612       int src_idx = alu_src - &alu->src[0];
1613       assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
1614       return nir_alu_instr_src_read_mask(alu, src_idx);
1615    } else if (nir_src_parent_instr(src)->type == nir_instr_type_intrinsic) {
1616       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(nir_src_parent_instr(src));
1617       if (nir_intrinsic_has_write_mask(intrin) && src->ssa == get_store_value(intrin))
1618          return nir_intrinsic_write_mask(intrin);
1619       else
1620          return (1 << src->ssa->num_components) - 1;
1621    } else {
1622       return (1 << src->ssa->num_components) - 1;
1623    }
1624 }
1625 
1626 nir_component_mask_t
nir_def_components_read(const nir_def * def)1627 nir_def_components_read(const nir_def *def)
1628 {
1629    nir_component_mask_t read_mask = 0;
1630 
1631    nir_foreach_use_including_if(use, def) {
1632       read_mask |= nir_src_is_if(use) ? 1 : nir_src_components_read(use);
1633 
1634       if (read_mask == (1 << def->num_components) - 1)
1635          return read_mask;
1636    }
1637 
1638    return read_mask;
1639 }
1640 
1641 bool
nir_def_all_uses_are_fsat(const nir_def * def)1642 nir_def_all_uses_are_fsat(const nir_def *def)
1643 {
1644    nir_foreach_use(src, def) {
1645       if (nir_src_is_if(src))
1646          return false;
1647 
1648       nir_instr *use = nir_src_parent_instr(src);
1649       if (use->type != nir_instr_type_alu)
1650          return false;
1651 
1652       nir_alu_instr *alu = nir_instr_as_alu(use);
1653       if (alu->op != nir_op_fsat)
1654          return false;
1655    }
1656 
1657    return true;
1658 }
1659 
1660 nir_block *
nir_block_unstructured_next(nir_block * block)1661 nir_block_unstructured_next(nir_block *block)
1662 {
1663    if (block == NULL) {
1664       /* nir_foreach_block_unstructured_safe() will call this function on a
1665        * NULL block after the last iteration, but it won't use the result so
1666        * just return NULL here.
1667        */
1668       return NULL;
1669    }
1670 
1671    nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1672    if (cf_next == NULL && block->cf_node.parent->type == nir_cf_node_function)
1673       return NULL;
1674 
1675    if (cf_next && cf_next->type == nir_cf_node_block)
1676       return nir_cf_node_as_block(cf_next);
1677 
1678    return nir_block_cf_tree_next(block);
1679 }
1680 
1681 nir_block *
nir_unstructured_start_block(nir_function_impl * impl)1682 nir_unstructured_start_block(nir_function_impl *impl)
1683 {
1684    return nir_start_block(impl);
1685 }
1686 
1687 nir_block *
nir_block_cf_tree_next(nir_block * block)1688 nir_block_cf_tree_next(nir_block *block)
1689 {
1690    if (block == NULL) {
1691       /* nir_foreach_block_safe() will call this function on a NULL block
1692        * after the last iteration, but it won't use the result so just return
1693        * NULL here.
1694        */
1695       return NULL;
1696    }
1697 
1698    assert(nir_cf_node_get_function(&block->cf_node)->structured);
1699 
1700    nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1701    if (cf_next)
1702       return nir_cf_node_cf_tree_first(cf_next);
1703 
1704    nir_cf_node *parent = block->cf_node.parent;
1705    if (parent->type == nir_cf_node_function)
1706       return NULL;
1707 
1708    /* Is this the last block of a cf_node? Return the following block */
1709    if (block == nir_cf_node_cf_tree_last(parent))
1710       return nir_cf_node_as_block(nir_cf_node_next(parent));
1711 
1712    switch (parent->type) {
1713    case nir_cf_node_if: {
1714       /* We are at the end of the if. Go to the beginning of the else */
1715       nir_if *if_stmt = nir_cf_node_as_if(parent);
1716       assert(block == nir_if_last_then_block(if_stmt));
1717       return nir_if_first_else_block(if_stmt);
1718    }
1719 
1720    case nir_cf_node_loop: {
1721       /* We are at the end of the body and there is a continue construct */
1722       nir_loop *loop = nir_cf_node_as_loop(parent);
1723       assert(block == nir_loop_last_block(loop) &&
1724              nir_loop_has_continue_construct(loop));
1725       return nir_loop_first_continue_block(loop);
1726    }
1727 
1728    default:
1729       unreachable("unknown cf node type");
1730    }
1731 }
1732 
1733 nir_block *
nir_block_cf_tree_prev(nir_block * block)1734 nir_block_cf_tree_prev(nir_block *block)
1735 {
1736    if (block == NULL) {
1737       /* do this for consistency with nir_block_cf_tree_next() */
1738       return NULL;
1739    }
1740 
1741    assert(nir_cf_node_get_function(&block->cf_node)->structured);
1742 
1743    nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
1744    if (cf_prev)
1745       return nir_cf_node_cf_tree_last(cf_prev);
1746 
1747    nir_cf_node *parent = block->cf_node.parent;
1748    if (parent->type == nir_cf_node_function)
1749       return NULL;
1750 
1751    /* Is this the first block of a cf_node? Return the previous block */
1752    if (block == nir_cf_node_cf_tree_first(parent))
1753       return nir_cf_node_as_block(nir_cf_node_prev(parent));
1754 
1755    switch (parent->type) {
1756    case nir_cf_node_if: {
1757       /* We are at the beginning of the else. Go to the end of the if */
1758       nir_if *if_stmt = nir_cf_node_as_if(parent);
1759       assert(block == nir_if_first_else_block(if_stmt));
1760       return nir_if_last_then_block(if_stmt);
1761    }
1762    case nir_cf_node_loop: {
1763       /* We are at the beginning of the continue construct. */
1764       nir_loop *loop = nir_cf_node_as_loop(parent);
1765       assert(nir_loop_has_continue_construct(loop) &&
1766              block == nir_loop_first_continue_block(loop));
1767       return nir_loop_last_block(loop);
1768    }
1769 
1770    default:
1771       unreachable("unknown cf node type");
1772    }
1773 }
1774 
1775 nir_block *
nir_cf_node_cf_tree_first(nir_cf_node * node)1776 nir_cf_node_cf_tree_first(nir_cf_node *node)
1777 {
1778    switch (node->type) {
1779    case nir_cf_node_function: {
1780       nir_function_impl *impl = nir_cf_node_as_function(node);
1781       return nir_start_block(impl);
1782    }
1783 
1784    case nir_cf_node_if: {
1785       nir_if *if_stmt = nir_cf_node_as_if(node);
1786       return nir_if_first_then_block(if_stmt);
1787    }
1788 
1789    case nir_cf_node_loop: {
1790       nir_loop *loop = nir_cf_node_as_loop(node);
1791       return nir_loop_first_block(loop);
1792    }
1793 
1794    case nir_cf_node_block: {
1795       return nir_cf_node_as_block(node);
1796    }
1797 
1798    default:
1799       unreachable("unknown node type");
1800    }
1801 }
1802 
1803 nir_block *
nir_cf_node_cf_tree_last(nir_cf_node * node)1804 nir_cf_node_cf_tree_last(nir_cf_node *node)
1805 {
1806    switch (node->type) {
1807    case nir_cf_node_function: {
1808       nir_function_impl *impl = nir_cf_node_as_function(node);
1809       return nir_impl_last_block(impl);
1810    }
1811 
1812    case nir_cf_node_if: {
1813       nir_if *if_stmt = nir_cf_node_as_if(node);
1814       return nir_if_last_else_block(if_stmt);
1815    }
1816 
1817    case nir_cf_node_loop: {
1818       nir_loop *loop = nir_cf_node_as_loop(node);
1819       if (nir_loop_has_continue_construct(loop))
1820          return nir_loop_last_continue_block(loop);
1821       else
1822          return nir_loop_last_block(loop);
1823    }
1824 
1825    case nir_cf_node_block: {
1826       return nir_cf_node_as_block(node);
1827    }
1828 
1829    default:
1830       unreachable("unknown node type");
1831    }
1832 }
1833 
1834 nir_block *
nir_cf_node_cf_tree_next(nir_cf_node * node)1835 nir_cf_node_cf_tree_next(nir_cf_node *node)
1836 {
1837    if (node->type == nir_cf_node_block)
1838       return nir_block_cf_tree_next(nir_cf_node_as_block(node));
1839    else if (node->type == nir_cf_node_function)
1840       return NULL;
1841    else
1842       return nir_cf_node_as_block(nir_cf_node_next(node));
1843 }
1844 
1845 nir_block *
nir_cf_node_cf_tree_prev(nir_cf_node * node)1846 nir_cf_node_cf_tree_prev(nir_cf_node *node)
1847 {
1848    if (node->type == nir_cf_node_block)
1849       return nir_block_cf_tree_prev(nir_cf_node_as_block(node));
1850    else if (node->type == nir_cf_node_function)
1851       return NULL;
1852    else
1853       return nir_cf_node_as_block(nir_cf_node_prev(node));
1854 }
1855 
1856 nir_if *
nir_block_get_following_if(nir_block * block)1857 nir_block_get_following_if(nir_block *block)
1858 {
1859    if (exec_node_is_tail_sentinel(&block->cf_node.node))
1860       return NULL;
1861 
1862    if (nir_cf_node_is_last(&block->cf_node))
1863       return NULL;
1864 
1865    nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1866 
1867    if (next_node->type != nir_cf_node_if)
1868       return NULL;
1869 
1870    return nir_cf_node_as_if(next_node);
1871 }
1872 
1873 nir_loop *
nir_block_get_following_loop(nir_block * block)1874 nir_block_get_following_loop(nir_block *block)
1875 {
1876    if (exec_node_is_tail_sentinel(&block->cf_node.node))
1877       return NULL;
1878 
1879    if (nir_cf_node_is_last(&block->cf_node))
1880       return NULL;
1881 
1882    nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1883 
1884    if (next_node->type != nir_cf_node_loop)
1885       return NULL;
1886 
1887    return nir_cf_node_as_loop(next_node);
1888 }
1889 
1890 static int
compare_block_index(const void * p1,const void * p2)1891 compare_block_index(const void *p1, const void *p2)
1892 {
1893    const nir_block *block1 = *((const nir_block **)p1);
1894    const nir_block *block2 = *((const nir_block **)p2);
1895 
1896    return (int)block1->index - (int)block2->index;
1897 }
1898 
1899 nir_block **
nir_block_get_predecessors_sorted(const nir_block * block,void * mem_ctx)1900 nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx)
1901 {
1902    nir_block **preds =
1903       ralloc_array(mem_ctx, nir_block *, block->predecessors->entries);
1904 
1905    unsigned i = 0;
1906    set_foreach(block->predecessors, entry)
1907       preds[i++] = (nir_block *)entry->key;
1908    assert(i == block->predecessors->entries);
1909 
1910    qsort(preds, block->predecessors->entries, sizeof(nir_block *),
1911          compare_block_index);
1912 
1913    return preds;
1914 }
1915 
1916 void
nir_index_blocks(nir_function_impl * impl)1917 nir_index_blocks(nir_function_impl *impl)
1918 {
1919    unsigned index = 0;
1920 
1921    if (impl->valid_metadata & nir_metadata_block_index)
1922       return;
1923 
1924    nir_foreach_block_unstructured(block, impl) {
1925       block->index = index++;
1926    }
1927 
1928    /* The end_block isn't really part of the program, which is why its index
1929     * is >= num_blocks.
1930     */
1931    impl->num_blocks = impl->end_block->index = index;
1932 }
1933 
1934 static bool
index_ssa_def_cb(nir_def * def,void * state)1935 index_ssa_def_cb(nir_def *def, void *state)
1936 {
1937    unsigned *index = (unsigned *)state;
1938    def->index = (*index)++;
1939 
1940    return true;
1941 }
1942 
1943 /**
1944  * The indices are applied top-to-bottom which has the very nice property
1945  * that, if A dominates B, then A->index <= B->index.
1946  */
1947 void
nir_index_ssa_defs(nir_function_impl * impl)1948 nir_index_ssa_defs(nir_function_impl *impl)
1949 {
1950    unsigned index = 0;
1951 
1952    impl->valid_metadata &= ~nir_metadata_live_defs;
1953 
1954    nir_foreach_block_unstructured(block, impl) {
1955       nir_foreach_instr(instr, block)
1956          nir_foreach_def(instr, index_ssa_def_cb, &index);
1957    }
1958 
1959    impl->ssa_alloc = index;
1960 }
1961 
1962 /**
1963  * The indices are applied top-to-bottom which has the very nice property
1964  * that, if A dominates B, then A->index <= B->index.
1965  */
1966 unsigned
nir_index_instrs(nir_function_impl * impl)1967 nir_index_instrs(nir_function_impl *impl)
1968 {
1969    unsigned index = 0;
1970 
1971    nir_foreach_block(block, impl) {
1972       block->start_ip = index++;
1973 
1974       nir_foreach_instr(instr, block)
1975          instr->index = index++;
1976 
1977       block->end_ip = index++;
1978    }
1979 
1980    return index;
1981 }
1982 
1983 void
nir_shader_clear_pass_flags(nir_shader * shader)1984 nir_shader_clear_pass_flags(nir_shader *shader)
1985 {
1986    nir_foreach_function_impl(impl, shader) {
1987       nir_foreach_block(block, impl) {
1988          nir_foreach_instr(instr, block) {
1989             instr->pass_flags = 0;
1990          }
1991       }
1992    }
1993 }
1994 
1995 unsigned
nir_shader_index_vars(nir_shader * shader,nir_variable_mode modes)1996 nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes)
1997 {
1998    unsigned count = 0;
1999    nir_foreach_variable_with_modes(var, shader, modes)
2000       var->index = count++;
2001    return count;
2002 }
2003 
2004 unsigned
nir_function_impl_index_vars(nir_function_impl * impl)2005 nir_function_impl_index_vars(nir_function_impl *impl)
2006 {
2007    unsigned count = 0;
2008    nir_foreach_function_temp_variable(var, impl)
2009       var->index = count++;
2010    return count;
2011 }
2012 
2013 static nir_instr *
cursor_next_instr(nir_cursor cursor)2014 cursor_next_instr(nir_cursor cursor)
2015 {
2016    switch (cursor.option) {
2017    case nir_cursor_before_block:
2018       for (nir_block *block = cursor.block; block;
2019            block = nir_block_cf_tree_next(block)) {
2020          nir_instr *instr = nir_block_first_instr(block);
2021          if (instr)
2022             return instr;
2023       }
2024       return NULL;
2025 
2026    case nir_cursor_after_block:
2027       cursor.block = nir_block_cf_tree_next(cursor.block);
2028       if (cursor.block == NULL)
2029          return NULL;
2030 
2031       cursor.option = nir_cursor_before_block;
2032       return cursor_next_instr(cursor);
2033 
2034    case nir_cursor_before_instr:
2035       return cursor.instr;
2036 
2037    case nir_cursor_after_instr:
2038       if (nir_instr_next(cursor.instr))
2039          return nir_instr_next(cursor.instr);
2040 
2041       cursor.option = nir_cursor_after_block;
2042       cursor.block = cursor.instr->block;
2043       return cursor_next_instr(cursor);
2044    }
2045 
2046    unreachable("Inavlid cursor option");
2047 }
2048 
2049 bool
nir_function_impl_lower_instructions(nir_function_impl * impl,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2050 nir_function_impl_lower_instructions(nir_function_impl *impl,
2051                                      nir_instr_filter_cb filter,
2052                                      nir_lower_instr_cb lower,
2053                                      void *cb_data)
2054 {
2055    nir_builder b = nir_builder_create(impl);
2056 
2057    nir_metadata preserved = nir_metadata_block_index |
2058                             nir_metadata_dominance;
2059 
2060    bool progress = false;
2061    nir_cursor iter = nir_before_impl(impl);
2062    nir_instr *instr;
2063    while ((instr = cursor_next_instr(iter)) != NULL) {
2064       if (filter && !filter(instr, cb_data)) {
2065          iter = nir_after_instr(instr);
2066          continue;
2067       }
2068 
2069       nir_def *old_def = nir_instr_def(instr);
2070       struct list_head old_uses;
2071       if (old_def != NULL) {
2072          /* We're about to ask the callback to generate a replacement for instr.
2073           * Save off the uses from instr's SSA def so we know what uses to
2074           * rewrite later.  If we use nir_def_rewrite_uses, it fails in the
2075           * case where the generated replacement code uses the result of instr
2076           * itself.  If we use nir_def_rewrite_uses_after (which is the
2077           * normal solution to this problem), it doesn't work well if control-
2078           * flow is inserted as part of the replacement, doesn't handle cases
2079           * where the replacement is something consumed by instr, and suffers
2080           * from performance issues.  This is the only way to 100% guarantee
2081           * that we rewrite the correct set efficiently.
2082           */
2083 
2084          list_replace(&old_def->uses, &old_uses);
2085          list_inithead(&old_def->uses);
2086       }
2087 
2088       b.cursor = nir_after_instr(instr);
2089       nir_def *new_def = lower(&b, instr, cb_data);
2090       if (new_def && new_def != NIR_LOWER_INSTR_PROGRESS &&
2091           new_def != NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2092          assert(old_def != NULL);
2093          if (new_def->parent_instr->block != instr->block)
2094             preserved = nir_metadata_none;
2095 
2096          list_for_each_entry_safe(nir_src, use_src, &old_uses, use_link)
2097             nir_src_rewrite(use_src, new_def);
2098 
2099          if (nir_def_is_unused(old_def)) {
2100             iter = nir_instr_free_and_dce(instr);
2101          } else {
2102             iter = nir_after_instr(instr);
2103          }
2104          progress = true;
2105       } else {
2106          /* We didn't end up lowering after all.  Put the uses back */
2107          if (old_def)
2108             list_replace(&old_uses, &old_def->uses);
2109 
2110          if (new_def == NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2111             /* Only instructions without a return value can be removed like this */
2112             assert(!old_def);
2113             iter = nir_instr_free_and_dce(instr);
2114             progress = true;
2115          } else
2116             iter = nir_after_instr(instr);
2117 
2118          if (new_def == NIR_LOWER_INSTR_PROGRESS)
2119             progress = true;
2120       }
2121    }
2122 
2123    if (progress) {
2124       nir_metadata_preserve(impl, preserved);
2125    } else {
2126       nir_metadata_preserve(impl, nir_metadata_all);
2127    }
2128 
2129    return progress;
2130 }
2131 
2132 bool
nir_shader_lower_instructions(nir_shader * shader,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2133 nir_shader_lower_instructions(nir_shader *shader,
2134                               nir_instr_filter_cb filter,
2135                               nir_lower_instr_cb lower,
2136                               void *cb_data)
2137 {
2138    bool progress = false;
2139 
2140    nir_foreach_function_impl(impl, shader) {
2141       if (nir_function_impl_lower_instructions(impl, filter, lower, cb_data))
2142          progress = true;
2143    }
2144 
2145    return progress;
2146 }
2147 
2148 /**
2149  * Returns true if the shader supports quad-based implicit derivatives on
2150  * texture sampling.
2151  */
2152 bool
nir_shader_supports_implicit_lod(nir_shader * shader)2153 nir_shader_supports_implicit_lod(nir_shader *shader)
2154 {
2155    return (shader->info.stage == MESA_SHADER_FRAGMENT ||
2156            (shader->info.stage == MESA_SHADER_COMPUTE &&
2157             shader->info.cs.derivative_group != DERIVATIVE_GROUP_NONE));
2158 }
2159 
2160 nir_intrinsic_op
nir_intrinsic_from_system_value(gl_system_value val)2161 nir_intrinsic_from_system_value(gl_system_value val)
2162 {
2163    switch (val) {
2164    case SYSTEM_VALUE_VERTEX_ID:
2165       return nir_intrinsic_load_vertex_id;
2166    case SYSTEM_VALUE_INSTANCE_ID:
2167       return nir_intrinsic_load_instance_id;
2168    case SYSTEM_VALUE_DRAW_ID:
2169       return nir_intrinsic_load_draw_id;
2170    case SYSTEM_VALUE_BASE_INSTANCE:
2171       return nir_intrinsic_load_base_instance;
2172    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
2173       return nir_intrinsic_load_vertex_id_zero_base;
2174    case SYSTEM_VALUE_IS_INDEXED_DRAW:
2175       return nir_intrinsic_load_is_indexed_draw;
2176    case SYSTEM_VALUE_FIRST_VERTEX:
2177       return nir_intrinsic_load_first_vertex;
2178    case SYSTEM_VALUE_BASE_VERTEX:
2179       return nir_intrinsic_load_base_vertex;
2180    case SYSTEM_VALUE_INVOCATION_ID:
2181       return nir_intrinsic_load_invocation_id;
2182    case SYSTEM_VALUE_FRAG_COORD:
2183       return nir_intrinsic_load_frag_coord;
2184    case SYSTEM_VALUE_POINT_COORD:
2185       return nir_intrinsic_load_point_coord;
2186    case SYSTEM_VALUE_LINE_COORD:
2187       return nir_intrinsic_load_line_coord;
2188    case SYSTEM_VALUE_FRONT_FACE:
2189       return nir_intrinsic_load_front_face;
2190    case SYSTEM_VALUE_SAMPLE_ID:
2191       return nir_intrinsic_load_sample_id;
2192    case SYSTEM_VALUE_SAMPLE_POS:
2193       return nir_intrinsic_load_sample_pos;
2194    case SYSTEM_VALUE_SAMPLE_POS_OR_CENTER:
2195       return nir_intrinsic_load_sample_pos_or_center;
2196    case SYSTEM_VALUE_SAMPLE_MASK_IN:
2197       return nir_intrinsic_load_sample_mask_in;
2198    case SYSTEM_VALUE_LAYER_ID:
2199       return nir_intrinsic_load_layer_id;
2200    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
2201       return nir_intrinsic_load_local_invocation_id;
2202    case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
2203       return nir_intrinsic_load_local_invocation_index;
2204    case SYSTEM_VALUE_WORKGROUP_ID:
2205       return nir_intrinsic_load_workgroup_id;
2206    case SYSTEM_VALUE_WORKGROUP_INDEX:
2207       return nir_intrinsic_load_workgroup_index;
2208    case SYSTEM_VALUE_NUM_WORKGROUPS:
2209       return nir_intrinsic_load_num_workgroups;
2210    case SYSTEM_VALUE_PRIMITIVE_ID:
2211       return nir_intrinsic_load_primitive_id;
2212    case SYSTEM_VALUE_TESS_COORD:
2213       return nir_intrinsic_load_tess_coord;
2214    case SYSTEM_VALUE_TESS_LEVEL_OUTER:
2215       return nir_intrinsic_load_tess_level_outer;
2216    case SYSTEM_VALUE_TESS_LEVEL_INNER:
2217       return nir_intrinsic_load_tess_level_inner;
2218    case SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT:
2219       return nir_intrinsic_load_tess_level_outer_default;
2220    case SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT:
2221       return nir_intrinsic_load_tess_level_inner_default;
2222    case SYSTEM_VALUE_VERTICES_IN:
2223       return nir_intrinsic_load_patch_vertices_in;
2224    case SYSTEM_VALUE_HELPER_INVOCATION:
2225       return nir_intrinsic_load_helper_invocation;
2226    case SYSTEM_VALUE_COLOR0:
2227       return nir_intrinsic_load_color0;
2228    case SYSTEM_VALUE_COLOR1:
2229       return nir_intrinsic_load_color1;
2230    case SYSTEM_VALUE_VIEW_INDEX:
2231       return nir_intrinsic_load_view_index;
2232    case SYSTEM_VALUE_SUBGROUP_SIZE:
2233       return nir_intrinsic_load_subgroup_size;
2234    case SYSTEM_VALUE_SUBGROUP_INVOCATION:
2235       return nir_intrinsic_load_subgroup_invocation;
2236    case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
2237       return nir_intrinsic_load_subgroup_eq_mask;
2238    case SYSTEM_VALUE_SUBGROUP_GE_MASK:
2239       return nir_intrinsic_load_subgroup_ge_mask;
2240    case SYSTEM_VALUE_SUBGROUP_GT_MASK:
2241       return nir_intrinsic_load_subgroup_gt_mask;
2242    case SYSTEM_VALUE_SUBGROUP_LE_MASK:
2243       return nir_intrinsic_load_subgroup_le_mask;
2244    case SYSTEM_VALUE_SUBGROUP_LT_MASK:
2245       return nir_intrinsic_load_subgroup_lt_mask;
2246    case SYSTEM_VALUE_NUM_SUBGROUPS:
2247       return nir_intrinsic_load_num_subgroups;
2248    case SYSTEM_VALUE_SUBGROUP_ID:
2249       return nir_intrinsic_load_subgroup_id;
2250    case SYSTEM_VALUE_WORKGROUP_SIZE:
2251       return nir_intrinsic_load_workgroup_size;
2252    case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
2253       return nir_intrinsic_load_global_invocation_id;
2254    case SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID:
2255       return nir_intrinsic_load_base_global_invocation_id;
2256    case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX:
2257       return nir_intrinsic_load_global_invocation_index;
2258    case SYSTEM_VALUE_WORK_DIM:
2259       return nir_intrinsic_load_work_dim;
2260    case SYSTEM_VALUE_USER_DATA_AMD:
2261       return nir_intrinsic_load_user_data_amd;
2262    case SYSTEM_VALUE_RAY_LAUNCH_ID:
2263       return nir_intrinsic_load_ray_launch_id;
2264    case SYSTEM_VALUE_RAY_LAUNCH_SIZE:
2265       return nir_intrinsic_load_ray_launch_size;
2266    case SYSTEM_VALUE_RAY_WORLD_ORIGIN:
2267       return nir_intrinsic_load_ray_world_origin;
2268    case SYSTEM_VALUE_RAY_WORLD_DIRECTION:
2269       return nir_intrinsic_load_ray_world_direction;
2270    case SYSTEM_VALUE_RAY_OBJECT_ORIGIN:
2271       return nir_intrinsic_load_ray_object_origin;
2272    case SYSTEM_VALUE_RAY_OBJECT_DIRECTION:
2273       return nir_intrinsic_load_ray_object_direction;
2274    case SYSTEM_VALUE_RAY_T_MIN:
2275       return nir_intrinsic_load_ray_t_min;
2276    case SYSTEM_VALUE_RAY_T_MAX:
2277       return nir_intrinsic_load_ray_t_max;
2278    case SYSTEM_VALUE_RAY_OBJECT_TO_WORLD:
2279       return nir_intrinsic_load_ray_object_to_world;
2280    case SYSTEM_VALUE_RAY_WORLD_TO_OBJECT:
2281       return nir_intrinsic_load_ray_world_to_object;
2282    case SYSTEM_VALUE_RAY_HIT_KIND:
2283       return nir_intrinsic_load_ray_hit_kind;
2284    case SYSTEM_VALUE_RAY_FLAGS:
2285       return nir_intrinsic_load_ray_flags;
2286    case SYSTEM_VALUE_RAY_GEOMETRY_INDEX:
2287       return nir_intrinsic_load_ray_geometry_index;
2288    case SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX:
2289       return nir_intrinsic_load_ray_instance_custom_index;
2290    case SYSTEM_VALUE_CULL_MASK:
2291       return nir_intrinsic_load_cull_mask;
2292    case SYSTEM_VALUE_RAY_TRIANGLE_VERTEX_POSITIONS:
2293       return nir_intrinsic_load_ray_triangle_vertex_positions;
2294    case SYSTEM_VALUE_MESH_VIEW_COUNT:
2295       return nir_intrinsic_load_mesh_view_count;
2296    case SYSTEM_VALUE_FRAG_SHADING_RATE:
2297       return nir_intrinsic_load_frag_shading_rate;
2298    case SYSTEM_VALUE_FULLY_COVERED:
2299       return nir_intrinsic_load_fully_covered;
2300    case SYSTEM_VALUE_FRAG_SIZE:
2301       return nir_intrinsic_load_frag_size;
2302    case SYSTEM_VALUE_FRAG_INVOCATION_COUNT:
2303       return nir_intrinsic_load_frag_invocation_count;
2304    case SYSTEM_VALUE_SHADER_INDEX:
2305       return nir_intrinsic_load_shader_index;
2306    case SYSTEM_VALUE_COALESCED_INPUT_COUNT:
2307       return nir_intrinsic_load_coalesced_input_count;
2308    case SYSTEM_VALUE_WARPS_PER_SM_NV:
2309       return nir_intrinsic_load_warps_per_sm_nv;
2310    case SYSTEM_VALUE_SM_COUNT_NV:
2311       return nir_intrinsic_load_sm_count_nv;
2312    case SYSTEM_VALUE_WARP_ID_NV:
2313       return nir_intrinsic_load_warp_id_nv;
2314    case SYSTEM_VALUE_SM_ID_NV:
2315       return nir_intrinsic_load_sm_id_nv;
2316    default:
2317       unreachable("system value does not directly correspond to intrinsic");
2318    }
2319 }
2320 
2321 gl_system_value
nir_system_value_from_intrinsic(nir_intrinsic_op intrin)2322 nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
2323 {
2324    switch (intrin) {
2325    case nir_intrinsic_load_vertex_id:
2326       return SYSTEM_VALUE_VERTEX_ID;
2327    case nir_intrinsic_load_instance_id:
2328       return SYSTEM_VALUE_INSTANCE_ID;
2329    case nir_intrinsic_load_draw_id:
2330       return SYSTEM_VALUE_DRAW_ID;
2331    case nir_intrinsic_load_base_instance:
2332       return SYSTEM_VALUE_BASE_INSTANCE;
2333    case nir_intrinsic_load_vertex_id_zero_base:
2334       return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
2335    case nir_intrinsic_load_first_vertex:
2336       return SYSTEM_VALUE_FIRST_VERTEX;
2337    case nir_intrinsic_load_is_indexed_draw:
2338       return SYSTEM_VALUE_IS_INDEXED_DRAW;
2339    case nir_intrinsic_load_base_vertex:
2340       return SYSTEM_VALUE_BASE_VERTEX;
2341    case nir_intrinsic_load_invocation_id:
2342       return SYSTEM_VALUE_INVOCATION_ID;
2343    case nir_intrinsic_load_frag_coord:
2344       return SYSTEM_VALUE_FRAG_COORD;
2345    case nir_intrinsic_load_point_coord:
2346       return SYSTEM_VALUE_POINT_COORD;
2347    case nir_intrinsic_load_line_coord:
2348       return SYSTEM_VALUE_LINE_COORD;
2349    case nir_intrinsic_load_front_face:
2350       return SYSTEM_VALUE_FRONT_FACE;
2351    case nir_intrinsic_load_sample_id:
2352       return SYSTEM_VALUE_SAMPLE_ID;
2353    case nir_intrinsic_load_sample_pos:
2354       return SYSTEM_VALUE_SAMPLE_POS;
2355    case nir_intrinsic_load_sample_pos_or_center:
2356       return SYSTEM_VALUE_SAMPLE_POS_OR_CENTER;
2357    case nir_intrinsic_load_sample_mask_in:
2358       return SYSTEM_VALUE_SAMPLE_MASK_IN;
2359    case nir_intrinsic_load_layer_id:
2360       return SYSTEM_VALUE_LAYER_ID;
2361    case nir_intrinsic_load_local_invocation_id:
2362       return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
2363    case nir_intrinsic_load_local_invocation_index:
2364       return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
2365    case nir_intrinsic_load_num_workgroups:
2366       return SYSTEM_VALUE_NUM_WORKGROUPS;
2367    case nir_intrinsic_load_workgroup_id:
2368       return SYSTEM_VALUE_WORKGROUP_ID;
2369    case nir_intrinsic_load_workgroup_index:
2370       return SYSTEM_VALUE_WORKGROUP_INDEX;
2371    case nir_intrinsic_load_primitive_id:
2372       return SYSTEM_VALUE_PRIMITIVE_ID;
2373    case nir_intrinsic_load_tess_coord:
2374    case nir_intrinsic_load_tess_coord_xy:
2375       return SYSTEM_VALUE_TESS_COORD;
2376    case nir_intrinsic_load_tess_level_outer:
2377       return SYSTEM_VALUE_TESS_LEVEL_OUTER;
2378    case nir_intrinsic_load_tess_level_inner:
2379       return SYSTEM_VALUE_TESS_LEVEL_INNER;
2380    case nir_intrinsic_load_tess_level_outer_default:
2381       return SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT;
2382    case nir_intrinsic_load_tess_level_inner_default:
2383       return SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT;
2384    case nir_intrinsic_load_patch_vertices_in:
2385       return SYSTEM_VALUE_VERTICES_IN;
2386    case nir_intrinsic_load_helper_invocation:
2387       return SYSTEM_VALUE_HELPER_INVOCATION;
2388    case nir_intrinsic_load_color0:
2389       return SYSTEM_VALUE_COLOR0;
2390    case nir_intrinsic_load_color1:
2391       return SYSTEM_VALUE_COLOR1;
2392    case nir_intrinsic_load_view_index:
2393       return SYSTEM_VALUE_VIEW_INDEX;
2394    case nir_intrinsic_load_subgroup_size:
2395       return SYSTEM_VALUE_SUBGROUP_SIZE;
2396    case nir_intrinsic_load_subgroup_invocation:
2397       return SYSTEM_VALUE_SUBGROUP_INVOCATION;
2398    case nir_intrinsic_load_subgroup_eq_mask:
2399       return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
2400    case nir_intrinsic_load_subgroup_ge_mask:
2401       return SYSTEM_VALUE_SUBGROUP_GE_MASK;
2402    case nir_intrinsic_load_subgroup_gt_mask:
2403       return SYSTEM_VALUE_SUBGROUP_GT_MASK;
2404    case nir_intrinsic_load_subgroup_le_mask:
2405       return SYSTEM_VALUE_SUBGROUP_LE_MASK;
2406    case nir_intrinsic_load_subgroup_lt_mask:
2407       return SYSTEM_VALUE_SUBGROUP_LT_MASK;
2408    case nir_intrinsic_load_num_subgroups:
2409       return SYSTEM_VALUE_NUM_SUBGROUPS;
2410    case nir_intrinsic_load_subgroup_id:
2411       return SYSTEM_VALUE_SUBGROUP_ID;
2412    case nir_intrinsic_load_workgroup_size:
2413       return SYSTEM_VALUE_WORKGROUP_SIZE;
2414    case nir_intrinsic_load_global_invocation_id:
2415       return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
2416    case nir_intrinsic_load_base_global_invocation_id:
2417       return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID;
2418    case nir_intrinsic_load_global_invocation_index:
2419       return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX;
2420    case nir_intrinsic_load_work_dim:
2421       return SYSTEM_VALUE_WORK_DIM;
2422    case nir_intrinsic_load_user_data_amd:
2423       return SYSTEM_VALUE_USER_DATA_AMD;
2424    case nir_intrinsic_load_barycentric_model:
2425       return SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL;
2426    case nir_intrinsic_load_gs_header_ir3:
2427       return SYSTEM_VALUE_GS_HEADER_IR3;
2428    case nir_intrinsic_load_tcs_header_ir3:
2429       return SYSTEM_VALUE_TCS_HEADER_IR3;
2430    case nir_intrinsic_load_ray_launch_id:
2431       return SYSTEM_VALUE_RAY_LAUNCH_ID;
2432    case nir_intrinsic_load_ray_launch_size:
2433       return SYSTEM_VALUE_RAY_LAUNCH_SIZE;
2434    case nir_intrinsic_load_ray_world_origin:
2435       return SYSTEM_VALUE_RAY_WORLD_ORIGIN;
2436    case nir_intrinsic_load_ray_world_direction:
2437       return SYSTEM_VALUE_RAY_WORLD_DIRECTION;
2438    case nir_intrinsic_load_ray_object_origin:
2439       return SYSTEM_VALUE_RAY_OBJECT_ORIGIN;
2440    case nir_intrinsic_load_ray_object_direction:
2441       return SYSTEM_VALUE_RAY_OBJECT_DIRECTION;
2442    case nir_intrinsic_load_ray_t_min:
2443       return SYSTEM_VALUE_RAY_T_MIN;
2444    case nir_intrinsic_load_ray_t_max:
2445       return SYSTEM_VALUE_RAY_T_MAX;
2446    case nir_intrinsic_load_ray_object_to_world:
2447       return SYSTEM_VALUE_RAY_OBJECT_TO_WORLD;
2448    case nir_intrinsic_load_ray_world_to_object:
2449       return SYSTEM_VALUE_RAY_WORLD_TO_OBJECT;
2450    case nir_intrinsic_load_ray_hit_kind:
2451       return SYSTEM_VALUE_RAY_HIT_KIND;
2452    case nir_intrinsic_load_ray_flags:
2453       return SYSTEM_VALUE_RAY_FLAGS;
2454    case nir_intrinsic_load_ray_geometry_index:
2455       return SYSTEM_VALUE_RAY_GEOMETRY_INDEX;
2456    case nir_intrinsic_load_ray_instance_custom_index:
2457       return SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX;
2458    case nir_intrinsic_load_cull_mask:
2459       return SYSTEM_VALUE_CULL_MASK;
2460    case nir_intrinsic_load_ray_triangle_vertex_positions:
2461       return SYSTEM_VALUE_RAY_TRIANGLE_VERTEX_POSITIONS;
2462    case nir_intrinsic_load_frag_shading_rate:
2463       return SYSTEM_VALUE_FRAG_SHADING_RATE;
2464    case nir_intrinsic_load_mesh_view_count:
2465       return SYSTEM_VALUE_MESH_VIEW_COUNT;
2466    case nir_intrinsic_load_fully_covered:
2467       return SYSTEM_VALUE_FULLY_COVERED;
2468    case nir_intrinsic_load_frag_size:
2469       return SYSTEM_VALUE_FRAG_SIZE;
2470    case nir_intrinsic_load_frag_invocation_count:
2471       return SYSTEM_VALUE_FRAG_INVOCATION_COUNT;
2472    case nir_intrinsic_load_shader_index:
2473       return SYSTEM_VALUE_SHADER_INDEX;
2474    case nir_intrinsic_load_coalesced_input_count:
2475       return SYSTEM_VALUE_COALESCED_INPUT_COUNT;
2476    case nir_intrinsic_load_warps_per_sm_nv:
2477       return SYSTEM_VALUE_WARPS_PER_SM_NV;
2478    case nir_intrinsic_load_sm_count_nv:
2479       return SYSTEM_VALUE_SM_COUNT_NV;
2480    case nir_intrinsic_load_warp_id_nv:
2481       return SYSTEM_VALUE_WARP_ID_NV;
2482    case nir_intrinsic_load_sm_id_nv:
2483       return SYSTEM_VALUE_SM_ID_NV;
2484    default:
2485       unreachable("intrinsic doesn't produce a system value");
2486    }
2487 }
2488 
2489 /* OpenGL utility method that remaps the location attributes if they are
2490  * doubles. Not needed for vulkan due the differences on the input location
2491  * count for doubles on vulkan vs OpenGL
2492  *
2493  * The bitfield returned in dual_slot is one bit for each double input slot in
2494  * the original OpenGL single-slot input numbering.  The mapping from old
2495  * locations to new locations is as follows:
2496  *
2497  *    new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc))
2498  */
2499 void
nir_remap_dual_slot_attributes(nir_shader * shader,uint64_t * dual_slot)2500 nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot)
2501 {
2502    assert(shader->info.stage == MESA_SHADER_VERTEX);
2503 
2504    *dual_slot = 0;
2505    nir_foreach_shader_in_variable(var, shader) {
2506       if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
2507          unsigned slots = glsl_count_attribute_slots(var->type, true);
2508          *dual_slot |= BITFIELD64_MASK(slots) << var->data.location;
2509       }
2510    }
2511 
2512    nir_foreach_shader_in_variable(var, shader) {
2513       var->data.location +=
2514          util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location));
2515    }
2516 }
2517 
2518 /* Returns an attribute mask that has been re-compacted using the given
2519  * dual_slot mask.
2520  */
2521 uint64_t
nir_get_single_slot_attribs_mask(uint64_t attribs,uint64_t dual_slot)2522 nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot)
2523 {
2524    while (dual_slot) {
2525       unsigned loc = u_bit_scan64(&dual_slot);
2526       /* mask of all bits up to and including loc */
2527       uint64_t mask = BITFIELD64_MASK(loc + 1);
2528       attribs = (attribs & mask) | ((attribs & ~mask) >> 1);
2529    }
2530    return attribs;
2531 }
2532 
2533 void
nir_rewrite_image_intrinsic(nir_intrinsic_instr * intrin,nir_def * src,bool bindless)2534 nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_def *src,
2535                             bool bindless)
2536 {
2537    enum gl_access_qualifier access = nir_intrinsic_access(intrin);
2538 
2539    /* Image intrinsics only have one of these */
2540    assert(!nir_intrinsic_has_src_type(intrin) ||
2541           !nir_intrinsic_has_dest_type(intrin));
2542 
2543    nir_alu_type data_type = nir_type_invalid;
2544    if (nir_intrinsic_has_src_type(intrin))
2545       data_type = nir_intrinsic_src_type(intrin);
2546    if (nir_intrinsic_has_dest_type(intrin))
2547       data_type = nir_intrinsic_dest_type(intrin);
2548 
2549    nir_atomic_op atomic_op = 0;
2550    if (nir_intrinsic_has_atomic_op(intrin))
2551       atomic_op = nir_intrinsic_atomic_op(intrin);
2552 
2553    switch (intrin->intrinsic) {
2554 #define CASE(op)                                                       \
2555    case nir_intrinsic_image_deref_##op:                                \
2556       intrin->intrinsic = bindless ? nir_intrinsic_bindless_image_##op \
2557                                    : nir_intrinsic_image_##op;         \
2558       break;
2559       CASE(load)
2560       CASE(sparse_load)
2561       CASE(store)
2562       CASE(atomic)
2563       CASE(atomic_swap)
2564       CASE(size)
2565       CASE(samples)
2566       CASE(load_raw_intel)
2567       CASE(store_raw_intel)
2568       CASE(fragment_mask_load_amd)
2569 #undef CASE
2570    default:
2571       unreachable("Unhanded image intrinsic");
2572    }
2573 
2574    nir_variable *var = nir_intrinsic_get_var(intrin, 0);
2575 
2576    /* Only update the format if the intrinsic doesn't have one set */
2577    if (nir_intrinsic_format(intrin) == PIPE_FORMAT_NONE)
2578       nir_intrinsic_set_format(intrin, var->data.image.format);
2579 
2580    nir_intrinsic_set_access(intrin, access | var->data.access);
2581    if (nir_intrinsic_has_src_type(intrin))
2582       nir_intrinsic_set_src_type(intrin, data_type);
2583    if (nir_intrinsic_has_dest_type(intrin))
2584       nir_intrinsic_set_dest_type(intrin, data_type);
2585 
2586    if (nir_intrinsic_has_atomic_op(intrin))
2587       nir_intrinsic_set_atomic_op(intrin, atomic_op);
2588 
2589    nir_src_rewrite(&intrin->src[0], src);
2590 }
2591 
2592 unsigned
nir_image_intrinsic_coord_components(const nir_intrinsic_instr * instr)2593 nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr)
2594 {
2595    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
2596    int coords = glsl_get_sampler_dim_coordinate_components(dim);
2597    if (dim == GLSL_SAMPLER_DIM_CUBE)
2598       return coords;
2599    else
2600       return coords + nir_intrinsic_image_array(instr);
2601 }
2602 
2603 nir_src *
nir_get_shader_call_payload_src(nir_intrinsic_instr * call)2604 nir_get_shader_call_payload_src(nir_intrinsic_instr *call)
2605 {
2606    switch (call->intrinsic) {
2607    case nir_intrinsic_trace_ray:
2608    case nir_intrinsic_rt_trace_ray:
2609       return &call->src[10];
2610    case nir_intrinsic_execute_callable:
2611    case nir_intrinsic_rt_execute_callable:
2612       return &call->src[1];
2613    default:
2614       unreachable("Not a call intrinsic");
2615       return NULL;
2616    }
2617 }
2618 
2619 nir_binding
nir_chase_binding(nir_src rsrc)2620 nir_chase_binding(nir_src rsrc)
2621 {
2622    nir_binding res = { 0 };
2623    if (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2624       const struct glsl_type *type = glsl_without_array(nir_src_as_deref(rsrc)->type);
2625       bool is_image = glsl_type_is_image(type) || glsl_type_is_sampler(type);
2626       while (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2627          nir_deref_instr *deref = nir_src_as_deref(rsrc);
2628 
2629          if (deref->deref_type == nir_deref_type_var) {
2630             res.success = true;
2631             res.var = deref->var;
2632             res.desc_set = deref->var->data.descriptor_set;
2633             res.binding = deref->var->data.binding;
2634             return res;
2635          } else if (deref->deref_type == nir_deref_type_array && is_image) {
2636             if (res.num_indices == ARRAY_SIZE(res.indices))
2637                return (nir_binding){ 0 };
2638             res.indices[res.num_indices++] = deref->arr.index;
2639          }
2640 
2641          rsrc = deref->parent;
2642       }
2643    }
2644 
2645    /* Skip copies and trimming. Trimming can appear as nir_op_mov instructions
2646     * when removing the offset from addresses. We also consider
2647     * nir_op_is_vec_or_mov() instructions to skip trimming of
2648     * vec2_index_32bit_offset addresses after lowering ALU to scalar.
2649     */
2650    unsigned num_components = nir_src_num_components(rsrc);
2651    while (true) {
2652       nir_alu_instr *alu = nir_src_as_alu_instr(rsrc);
2653       nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2654       if (alu && alu->op == nir_op_mov) {
2655          for (unsigned i = 0; i < num_components; i++) {
2656             if (alu->src[0].swizzle[i] != i)
2657                return (nir_binding){ 0 };
2658          }
2659          rsrc = alu->src[0].src;
2660       } else if (alu && nir_op_is_vec(alu->op)) {
2661          for (unsigned i = 0; i < num_components; i++) {
2662             if (alu->src[i].swizzle[0] != i || alu->src[i].src.ssa != alu->src[0].src.ssa)
2663                return (nir_binding){ 0 };
2664          }
2665          rsrc = alu->src[0].src;
2666       } else if (intrin && intrin->intrinsic == nir_intrinsic_read_first_invocation) {
2667          /* The caller might want to be aware if only the first invocation of
2668           * the indices are used.
2669           */
2670          res.read_first_invocation = true;
2671          rsrc = intrin->src[0];
2672       } else {
2673          break;
2674       }
2675    }
2676 
2677    if (nir_src_is_const(rsrc)) {
2678       /* GL binding model after deref lowering */
2679       res.success = true;
2680       /* Can't use just nir_src_as_uint. Vulkan resource index produces a
2681        * vec2. Some drivers lower it to vec1 (to handle get_ssbo_size for
2682        * example) but others just keep it around as a vec2 (v3dv).
2683        */
2684       res.binding = nir_src_comp_as_uint(rsrc, 0);
2685       return res;
2686    }
2687 
2688    /* otherwise, must be Vulkan binding model after deref lowering or GL bindless */
2689 
2690    nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2691    if (!intrin)
2692       return (nir_binding){ 0 };
2693 
2694    /* Intel resource, similar to load_vulkan_descriptor after it has been
2695     * lowered.
2696     */
2697    if (intrin->intrinsic == nir_intrinsic_resource_intel) {
2698       res.success = true;
2699       res.desc_set = nir_intrinsic_desc_set(intrin);
2700       res.binding = nir_intrinsic_binding(intrin);
2701       /* nir_intrinsic_resource_intel has 3 sources, but src[2] is included in
2702        * src[1], it is kept around for other purposes.
2703        */
2704       res.num_indices = 2;
2705       res.indices[0] = intrin->src[0];
2706       res.indices[1] = intrin->src[1];
2707       return res;
2708    }
2709 
2710    /* skip load_vulkan_descriptor */
2711    if (intrin->intrinsic == nir_intrinsic_load_vulkan_descriptor) {
2712       intrin = nir_src_as_intrinsic(intrin->src[0]);
2713       if (!intrin)
2714          return (nir_binding){ 0 };
2715    }
2716 
2717    if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
2718       return (nir_binding){ 0 };
2719 
2720    assert(res.num_indices == 0);
2721    res.success = true;
2722    res.desc_set = nir_intrinsic_desc_set(intrin);
2723    res.binding = nir_intrinsic_binding(intrin);
2724    res.num_indices = 1;
2725    res.indices[0] = intrin->src[0];
2726    return res;
2727 }
2728 
2729 nir_variable *
nir_get_binding_variable(nir_shader * shader,nir_binding binding)2730 nir_get_binding_variable(nir_shader *shader, nir_binding binding)
2731 {
2732    nir_variable *binding_var = NULL;
2733    unsigned count = 0;
2734 
2735    if (!binding.success)
2736       return NULL;
2737 
2738    if (binding.var)
2739       return binding.var;
2740 
2741    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ubo | nir_var_mem_ssbo) {
2742       if (var->data.descriptor_set == binding.desc_set && var->data.binding == binding.binding) {
2743          binding_var = var;
2744          count++;
2745       }
2746    }
2747 
2748    /* Be conservative if another variable is using the same binding/desc_set
2749     * because the access mask might be different and we can't get it reliably.
2750     */
2751    if (count > 1)
2752       return NULL;
2753 
2754    return binding_var;
2755 }
2756 
2757 nir_scalar
nir_scalar_chase_movs(nir_scalar s)2758 nir_scalar_chase_movs(nir_scalar s)
2759 {
2760    while (nir_scalar_is_alu(s)) {
2761       nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
2762       if (alu->op == nir_op_mov) {
2763          s.def = alu->src[0].src.ssa;
2764          s.comp = alu->src[0].swizzle[s.comp];
2765       } else if (nir_op_is_vec(alu->op)) {
2766          s.def = alu->src[s.comp].src.ssa;
2767          s.comp = alu->src[s.comp].swizzle[0];
2768       } else {
2769          break;
2770       }
2771    }
2772 
2773    return s;
2774 }
2775 
2776 nir_alu_type
nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)2777 nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)
2778 {
2779    switch (base_type) {
2780    /* clang-format off */
2781    case GLSL_TYPE_BOOL:    return nir_type_bool1;
2782    case GLSL_TYPE_UINT:    return nir_type_uint32;
2783    case GLSL_TYPE_INT:     return nir_type_int32;
2784    case GLSL_TYPE_UINT16:  return nir_type_uint16;
2785    case GLSL_TYPE_INT16:   return nir_type_int16;
2786    case GLSL_TYPE_UINT8:   return nir_type_uint8;
2787    case GLSL_TYPE_INT8:    return nir_type_int8;
2788    case GLSL_TYPE_UINT64:  return nir_type_uint64;
2789    case GLSL_TYPE_INT64:   return nir_type_int64;
2790    case GLSL_TYPE_FLOAT:   return nir_type_float32;
2791    case GLSL_TYPE_FLOAT16: return nir_type_float16;
2792    case GLSL_TYPE_DOUBLE:  return nir_type_float64;
2793       /* clang-format on */
2794 
2795    case GLSL_TYPE_COOPERATIVE_MATRIX:
2796    case GLSL_TYPE_SAMPLER:
2797    case GLSL_TYPE_TEXTURE:
2798    case GLSL_TYPE_IMAGE:
2799    case GLSL_TYPE_ATOMIC_UINT:
2800    case GLSL_TYPE_STRUCT:
2801    case GLSL_TYPE_INTERFACE:
2802    case GLSL_TYPE_ARRAY:
2803    case GLSL_TYPE_VOID:
2804    case GLSL_TYPE_SUBROUTINE:
2805    case GLSL_TYPE_ERROR:
2806       return nir_type_invalid;
2807    }
2808 
2809    unreachable("unknown type");
2810 }
2811 
2812 enum glsl_base_type
nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)2813 nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)
2814 {
2815    /* clang-format off */
2816    switch (base_type) {
2817    case nir_type_bool1:    return GLSL_TYPE_BOOL;
2818    case nir_type_uint32:   return GLSL_TYPE_UINT;
2819    case nir_type_int32:    return GLSL_TYPE_INT;
2820    case nir_type_uint16:   return GLSL_TYPE_UINT16;
2821    case nir_type_int16:    return GLSL_TYPE_INT16;
2822    case nir_type_uint8:    return GLSL_TYPE_UINT8;
2823    case nir_type_int8:     return GLSL_TYPE_INT8;
2824    case nir_type_uint64:   return GLSL_TYPE_UINT64;
2825    case nir_type_int64:    return GLSL_TYPE_INT64;
2826    case nir_type_float32:  return GLSL_TYPE_FLOAT;
2827    case nir_type_float16:  return GLSL_TYPE_FLOAT16;
2828    case nir_type_float64:  return GLSL_TYPE_DOUBLE;
2829    default: unreachable("Not a sized nir_alu_type");
2830    }
2831    /* clang-format on */
2832 }
2833 
2834 nir_op
nir_op_vec(unsigned num_components)2835 nir_op_vec(unsigned num_components)
2836 {
2837    /* clang-format off */
2838    switch (num_components) {
2839    case  1: return nir_op_mov;
2840    case  2: return nir_op_vec2;
2841    case  3: return nir_op_vec3;
2842    case  4: return nir_op_vec4;
2843    case  5: return nir_op_vec5;
2844    case  8: return nir_op_vec8;
2845    case 16: return nir_op_vec16;
2846    default: unreachable("bad component count");
2847    }
2848    /* clang-format on */
2849 }
2850 
2851 bool
nir_op_is_vec(nir_op op)2852 nir_op_is_vec(nir_op op)
2853 {
2854    switch (op) {
2855    case nir_op_vec2:
2856    case nir_op_vec3:
2857    case nir_op_vec4:
2858    case nir_op_vec5:
2859    case nir_op_vec8:
2860    case nir_op_vec16:
2861       return true;
2862    default:
2863       return false;
2864    }
2865 }
2866 
2867 nir_component_mask_t
nir_alu_instr_src_read_mask(const nir_alu_instr * instr,unsigned src)2868 nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src)
2869 {
2870    nir_component_mask_t read_mask = 0;
2871    for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
2872       if (!nir_alu_instr_channel_used(instr, src, c))
2873          continue;
2874 
2875       read_mask |= (1 << instr->src[src].swizzle[c]);
2876    }
2877    return read_mask;
2878 }
2879 
2880 unsigned
nir_ssa_alu_instr_src_components(const nir_alu_instr * instr,unsigned src)2881 nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
2882 {
2883    if (nir_op_infos[instr->op].input_sizes[src] > 0)
2884       return nir_op_infos[instr->op].input_sizes[src];
2885 
2886    return instr->def.num_components;
2887 }
2888 
2889 #define CASE_ALL_SIZES(op) \
2890    case op:                \
2891    case op##8:             \
2892    case op##16:            \
2893    case op##32:
2894 
2895 bool
nir_alu_instr_is_comparison(const nir_alu_instr * instr)2896 nir_alu_instr_is_comparison(const nir_alu_instr *instr)
2897 {
2898    switch (instr->op) {
2899       CASE_ALL_SIZES(nir_op_flt)
2900       CASE_ALL_SIZES(nir_op_fge)
2901       CASE_ALL_SIZES(nir_op_feq)
2902       CASE_ALL_SIZES(nir_op_fneu)
2903       CASE_ALL_SIZES(nir_op_ilt)
2904       CASE_ALL_SIZES(nir_op_ult)
2905       CASE_ALL_SIZES(nir_op_ige)
2906       CASE_ALL_SIZES(nir_op_uge)
2907       CASE_ALL_SIZES(nir_op_ieq)
2908       CASE_ALL_SIZES(nir_op_ine)
2909       CASE_ALL_SIZES(nir_op_bitz)
2910       CASE_ALL_SIZES(nir_op_bitnz)
2911    case nir_op_inot:
2912       return true;
2913    default:
2914       return false;
2915    }
2916 }
2917 
2918 #undef CASE_ALL_SIZES
2919 
2920 unsigned
nir_intrinsic_src_components(const nir_intrinsic_instr * intr,unsigned srcn)2921 nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn)
2922 {
2923    const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
2924    assert(srcn < info->num_srcs);
2925    if (info->src_components[srcn] > 0)
2926       return info->src_components[srcn];
2927    else if (info->src_components[srcn] == 0)
2928       return intr->num_components;
2929    else
2930       return nir_src_num_components(intr->src[srcn]);
2931 }
2932 
2933 unsigned
nir_intrinsic_dest_components(nir_intrinsic_instr * intr)2934 nir_intrinsic_dest_components(nir_intrinsic_instr *intr)
2935 {
2936    const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
2937    if (!info->has_dest)
2938       return 0;
2939    else if (info->dest_components)
2940       return info->dest_components;
2941    else
2942       return intr->num_components;
2943 }
2944 
2945 nir_alu_type
nir_intrinsic_instr_src_type(const nir_intrinsic_instr * intrin,unsigned src)2946 nir_intrinsic_instr_src_type(const nir_intrinsic_instr *intrin, unsigned src)
2947 {
2948    /* We could go nuts here, but we'll just handle a few simple
2949     * cases and let everything else be untyped.
2950     */
2951    switch (intrin->intrinsic) {
2952    case nir_intrinsic_store_deref: {
2953       nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2954       if (src == 1)
2955          return nir_get_nir_type_for_glsl_type(deref->type);
2956       break;
2957    }
2958 
2959    case nir_intrinsic_store_output:
2960       if (src == 0)
2961          return nir_intrinsic_src_type(intrin);
2962       break;
2963 
2964    default:
2965       break;
2966    }
2967 
2968    /* For the most part, we leave other intrinsics alone.  Most
2969     * of them don't matter in OpenGL ES 2.0 drivers anyway.
2970     * However, we should at least check if this is some sort of
2971     * IO intrinsic and flag it's offset and index sources.
2972     */
2973    {
2974       int offset_src_idx = nir_get_io_offset_src_number(intrin);
2975       if (src == offset_src_idx) {
2976          const nir_src *offset_src = offset_src_idx >= 0 ? &intrin->src[offset_src_idx] : NULL;
2977          if (offset_src)
2978             return nir_type_int;
2979       }
2980    }
2981 
2982    return nir_type_invalid;
2983 }
2984 
2985 nir_alu_type
nir_intrinsic_instr_dest_type(const nir_intrinsic_instr * intrin)2986 nir_intrinsic_instr_dest_type(const nir_intrinsic_instr *intrin)
2987 {
2988    /* We could go nuts here, but we'll just handle a few simple
2989     * cases and let everything else be untyped.
2990     */
2991    switch (intrin->intrinsic) {
2992    case nir_intrinsic_load_deref: {
2993       nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2994       return nir_get_nir_type_for_glsl_type(deref->type);
2995    }
2996 
2997    case nir_intrinsic_load_input:
2998    case nir_intrinsic_load_uniform:
2999       return nir_intrinsic_dest_type(intrin);
3000 
3001    default:
3002       break;
3003    }
3004 
3005    return nir_type_invalid;
3006 }
3007 
3008 /**
3009  * Helper to copy const_index[] from src to dst, without assuming they
3010  * match in order.
3011  */
3012 void
nir_intrinsic_copy_const_indices(nir_intrinsic_instr * dst,nir_intrinsic_instr * src)3013 nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src)
3014 {
3015    if (src->intrinsic == dst->intrinsic) {
3016       memcpy(dst->const_index, src->const_index, sizeof(dst->const_index));
3017       return;
3018    }
3019 
3020    const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic];
3021    const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic];
3022 
3023    for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) {
3024       if (src_info->index_map[i] == 0)
3025          continue;
3026 
3027       /* require that dst instruction also uses the same const_index[]: */
3028       assert(dst_info->index_map[i] > 0);
3029 
3030       dst->const_index[dst_info->index_map[i] - 1] =
3031          src->const_index[src_info->index_map[i] - 1];
3032    }
3033 }
3034 
3035 bool
nir_tex_instr_need_sampler(const nir_tex_instr * instr)3036 nir_tex_instr_need_sampler(const nir_tex_instr *instr)
3037 {
3038    switch (instr->op) {
3039    case nir_texop_txf:
3040    case nir_texop_txf_ms:
3041    case nir_texop_txs:
3042    case nir_texop_query_levels:
3043    case nir_texop_texture_samples:
3044    case nir_texop_samples_identical:
3045    case nir_texop_descriptor_amd:
3046       return false;
3047    default:
3048       return true;
3049    }
3050 }
3051 
3052 unsigned
nir_tex_instr_result_size(const nir_tex_instr * instr)3053 nir_tex_instr_result_size(const nir_tex_instr *instr)
3054 {
3055    switch (instr->op) {
3056    case nir_texop_txs: {
3057       unsigned ret;
3058       switch (instr->sampler_dim) {
3059       case GLSL_SAMPLER_DIM_1D:
3060       case GLSL_SAMPLER_DIM_BUF:
3061          ret = 1;
3062          break;
3063       case GLSL_SAMPLER_DIM_2D:
3064       case GLSL_SAMPLER_DIM_CUBE:
3065       case GLSL_SAMPLER_DIM_MS:
3066       case GLSL_SAMPLER_DIM_RECT:
3067       case GLSL_SAMPLER_DIM_EXTERNAL:
3068       case GLSL_SAMPLER_DIM_SUBPASS:
3069          ret = 2;
3070          break;
3071       case GLSL_SAMPLER_DIM_3D:
3072          ret = 3;
3073          break;
3074       default:
3075          unreachable("not reached");
3076       }
3077       if (instr->is_array)
3078          ret++;
3079       return ret;
3080    }
3081 
3082    case nir_texop_lod:
3083       return 2;
3084 
3085    case nir_texop_texture_samples:
3086    case nir_texop_query_levels:
3087    case nir_texop_samples_identical:
3088    case nir_texop_fragment_mask_fetch_amd:
3089    case nir_texop_lod_bias_agx:
3090       return 1;
3091 
3092    case nir_texop_descriptor_amd:
3093       return instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8;
3094 
3095    case nir_texop_sampler_descriptor_amd:
3096       return 4;
3097 
3098    case nir_texop_hdr_dim_nv:
3099    case nir_texop_tex_type_nv:
3100       return 4;
3101 
3102    default:
3103       if (instr->is_shadow && instr->is_new_style_shadow)
3104          return 1;
3105 
3106       return 4;
3107    }
3108 }
3109 
3110 bool
nir_tex_instr_is_query(const nir_tex_instr * instr)3111 nir_tex_instr_is_query(const nir_tex_instr *instr)
3112 {
3113    switch (instr->op) {
3114    case nir_texop_txs:
3115    case nir_texop_lod:
3116    case nir_texop_texture_samples:
3117    case nir_texop_query_levels:
3118    case nir_texop_descriptor_amd:
3119    case nir_texop_sampler_descriptor_amd:
3120    case nir_texop_lod_bias_agx:
3121       return true;
3122    case nir_texop_tex:
3123    case nir_texop_txb:
3124    case nir_texop_txl:
3125    case nir_texop_txd:
3126    case nir_texop_txf:
3127    case nir_texop_txf_ms:
3128    case nir_texop_txf_ms_fb:
3129    case nir_texop_txf_ms_mcs_intel:
3130    case nir_texop_tg4:
3131    case nir_texop_samples_identical:
3132    case nir_texop_fragment_mask_fetch_amd:
3133    case nir_texop_fragment_fetch_amd:
3134       return false;
3135    default:
3136       unreachable("Invalid texture opcode");
3137    }
3138 }
3139 
3140 bool
nir_tex_instr_has_implicit_derivative(const nir_tex_instr * instr)3141 nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr)
3142 {
3143    switch (instr->op) {
3144    case nir_texop_tex:
3145    case nir_texop_txb:
3146    case nir_texop_lod:
3147       return true;
3148    case nir_texop_tg4:
3149       return instr->is_gather_implicit_lod;
3150    default:
3151       return false;
3152    }
3153 }
3154 
3155 nir_alu_type
nir_tex_instr_src_type(const nir_tex_instr * instr,unsigned src)3156 nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
3157 {
3158    switch (instr->src[src].src_type) {
3159    case nir_tex_src_coord:
3160       switch (instr->op) {
3161       case nir_texop_txf:
3162       case nir_texop_txf_ms:
3163       case nir_texop_txf_ms_fb:
3164       case nir_texop_txf_ms_mcs_intel:
3165       case nir_texop_samples_identical:
3166       case nir_texop_fragment_fetch_amd:
3167       case nir_texop_fragment_mask_fetch_amd:
3168          return nir_type_int;
3169 
3170       default:
3171          return nir_type_float;
3172       }
3173 
3174    case nir_tex_src_lod:
3175       switch (instr->op) {
3176       case nir_texop_txs:
3177       case nir_texop_txf:
3178       case nir_texop_txf_ms:
3179       case nir_texop_fragment_fetch_amd:
3180       case nir_texop_fragment_mask_fetch_amd:
3181          return nir_type_int;
3182 
3183       default:
3184          return nir_type_float;
3185       }
3186 
3187    case nir_tex_src_projector:
3188    case nir_tex_src_comparator:
3189    case nir_tex_src_bias:
3190    case nir_tex_src_min_lod:
3191    case nir_tex_src_ddx:
3192    case nir_tex_src_ddy:
3193    case nir_tex_src_backend1:
3194    case nir_tex_src_backend2:
3195       return nir_type_float;
3196 
3197    case nir_tex_src_offset:
3198    case nir_tex_src_ms_index:
3199    case nir_tex_src_plane:
3200       return nir_type_int;
3201 
3202    case nir_tex_src_ms_mcs_intel:
3203    case nir_tex_src_texture_deref:
3204    case nir_tex_src_sampler_deref:
3205    case nir_tex_src_texture_offset:
3206    case nir_tex_src_sampler_offset:
3207    case nir_tex_src_texture_handle:
3208    case nir_tex_src_sampler_handle:
3209       return nir_type_uint;
3210 
3211    case nir_num_tex_src_types:
3212       unreachable("nir_num_tex_src_types is not a valid source type");
3213    }
3214 
3215    unreachable("Invalid texture source type");
3216 }
3217 
3218 unsigned
nir_tex_instr_src_size(const nir_tex_instr * instr,unsigned src)3219 nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src)
3220 {
3221    if (instr->src[src].src_type == nir_tex_src_coord)
3222       return instr->coord_components;
3223 
3224    /* The MCS value is expected to be a vec4 returned by a txf_ms_mcs_intel */
3225    if (instr->src[src].src_type == nir_tex_src_ms_mcs_intel)
3226       return 4;
3227 
3228    if (instr->src[src].src_type == nir_tex_src_ddx ||
3229        instr->src[src].src_type == nir_tex_src_ddy) {
3230 
3231       if (instr->is_array && !instr->array_is_lowered_cube)
3232          return instr->coord_components - 1;
3233       else
3234          return instr->coord_components;
3235    }
3236 
3237    if (instr->src[src].src_type == nir_tex_src_offset) {
3238       if (instr->is_array)
3239          return instr->coord_components - 1;
3240       else
3241          return instr->coord_components;
3242    }
3243 
3244    if (instr->src[src].src_type == nir_tex_src_backend1 ||
3245        instr->src[src].src_type == nir_tex_src_backend2)
3246       return nir_src_num_components(instr->src[src].src);
3247 
3248    /* For AMD, this can be a vec8/vec4 image/sampler descriptor. */
3249    if (instr->src[src].src_type == nir_tex_src_texture_handle ||
3250        instr->src[src].src_type == nir_tex_src_sampler_handle)
3251       return 0;
3252 
3253    return 1;
3254 }
3255 
3256 /**
3257  * Return which components are written into transform feedback buffers.
3258  * The result is relative to 0, not "component".
3259  */
3260 unsigned
nir_instr_xfb_write_mask(nir_intrinsic_instr * instr)3261 nir_instr_xfb_write_mask(nir_intrinsic_instr *instr)
3262 {
3263    unsigned mask = 0;
3264 
3265    if (nir_intrinsic_has_io_xfb(instr)) {
3266       unsigned wr_mask = nir_intrinsic_write_mask(instr) << nir_intrinsic_component(instr);
3267       assert((wr_mask & ~0xf) == 0); /* only 4 components allowed */
3268 
3269       unsigned iter_mask = wr_mask;
3270       while (iter_mask) {
3271          unsigned i = u_bit_scan(&iter_mask);
3272          nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(instr) : nir_intrinsic_io_xfb2(instr);
3273          if (xfb.out[i % 2].num_components)
3274             mask |= BITFIELD_RANGE(i, xfb.out[i % 2].num_components) & wr_mask;
3275       }
3276    }
3277 
3278    return mask;
3279 }
3280 
3281 /**
3282  * Whether an output slot is consumed by fixed-function logic.
3283  */
3284 bool
nir_slot_is_sysval_output(gl_varying_slot slot,gl_shader_stage next_shader)3285 nir_slot_is_sysval_output(gl_varying_slot slot, gl_shader_stage next_shader)
3286 {
3287    switch (next_shader) {
3288    case MESA_SHADER_FRAGMENT:
3289       return slot == VARYING_SLOT_POS ||
3290              slot == VARYING_SLOT_PSIZ ||
3291              slot == VARYING_SLOT_EDGE ||
3292              slot == VARYING_SLOT_CLIP_VERTEX ||
3293              slot == VARYING_SLOT_CLIP_DIST0 ||
3294              slot == VARYING_SLOT_CLIP_DIST1 ||
3295              slot == VARYING_SLOT_CULL_DIST0 ||
3296              slot == VARYING_SLOT_CULL_DIST1 ||
3297              slot == VARYING_SLOT_LAYER ||
3298              slot == VARYING_SLOT_VIEWPORT ||
3299              slot == VARYING_SLOT_VIEW_INDEX ||
3300              slot == VARYING_SLOT_VIEWPORT_MASK ||
3301              slot == VARYING_SLOT_PRIMITIVE_SHADING_RATE ||
3302              /* NV_mesh_shader_only */
3303              slot == VARYING_SLOT_PRIMITIVE_COUNT ||
3304              slot == VARYING_SLOT_PRIMITIVE_INDICES;
3305 
3306    case MESA_SHADER_TESS_EVAL:
3307       return slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
3308              slot == VARYING_SLOT_TESS_LEVEL_INNER ||
3309              slot == VARYING_SLOT_BOUNDING_BOX0 ||
3310              slot == VARYING_SLOT_BOUNDING_BOX1;
3311 
3312    case MESA_SHADER_MESH:
3313       /* NV_mesh_shader only */
3314       return slot == VARYING_SLOT_TASK_COUNT;
3315 
3316    case MESA_SHADER_NONE:
3317       /* NONE means unknown. Check all possibilities. */
3318       return nir_slot_is_sysval_output(slot, MESA_SHADER_FRAGMENT) ||
3319              nir_slot_is_sysval_output(slot, MESA_SHADER_TESS_EVAL) ||
3320              nir_slot_is_sysval_output(slot, MESA_SHADER_MESH);
3321 
3322    default:
3323       /* No other shaders have preceding shaders with sysval outputs. */
3324       return false;
3325    }
3326 }
3327 
3328 /**
3329  * Whether an input/output slot is consumed by the next shader stage,
3330  * or written by the previous shader stage.
3331  */
3332 bool
nir_slot_is_varying(gl_varying_slot slot)3333 nir_slot_is_varying(gl_varying_slot slot)
3334 {
3335    return slot >= VARYING_SLOT_VAR0 ||
3336           slot == VARYING_SLOT_COL0 ||
3337           slot == VARYING_SLOT_COL1 ||
3338           slot == VARYING_SLOT_BFC0 ||
3339           slot == VARYING_SLOT_BFC1 ||
3340           slot == VARYING_SLOT_FOGC ||
3341           (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) ||
3342           slot == VARYING_SLOT_PNTC ||
3343           slot == VARYING_SLOT_CLIP_DIST0 ||
3344           slot == VARYING_SLOT_CLIP_DIST1 ||
3345           slot == VARYING_SLOT_CULL_DIST0 ||
3346           slot == VARYING_SLOT_CULL_DIST1 ||
3347           slot == VARYING_SLOT_PRIMITIVE_ID ||
3348           slot == VARYING_SLOT_LAYER ||
3349           slot == VARYING_SLOT_VIEWPORT ||
3350           slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
3351           slot == VARYING_SLOT_TESS_LEVEL_INNER;
3352 }
3353 
3354 bool
nir_slot_is_sysval_output_and_varying(gl_varying_slot slot,gl_shader_stage next_shader)3355 nir_slot_is_sysval_output_and_varying(gl_varying_slot slot,
3356                                       gl_shader_stage next_shader)
3357 {
3358    return nir_slot_is_sysval_output(slot, next_shader) &&
3359           nir_slot_is_varying(slot);
3360 }
3361 
3362 /**
3363  * This marks the output store instruction as not feeding the next shader
3364  * stage. If the instruction has no other use, it's removed.
3365  */
3366 bool
nir_remove_varying(nir_intrinsic_instr * intr,gl_shader_stage next_shader)3367 nir_remove_varying(nir_intrinsic_instr *intr, gl_shader_stage next_shader)
3368 {
3369    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3370 
3371    if ((!sem.no_sysval_output &&
3372         nir_slot_is_sysval_output(sem.location, next_shader)) ||
3373        nir_instr_xfb_write_mask(intr)) {
3374       /* Demote the store instruction. */
3375       sem.no_varying = true;
3376       nir_intrinsic_set_io_semantics(intr, sem);
3377       return false;
3378    } else {
3379       nir_instr_remove(&intr->instr);
3380       return true;
3381    }
3382 }
3383 
3384 /**
3385  * This marks the output store instruction as not feeding fixed-function
3386  * logic. If the instruction has no other use, it's removed.
3387  */
3388 bool
nir_remove_sysval_output(nir_intrinsic_instr * intr)3389 nir_remove_sysval_output(nir_intrinsic_instr *intr)
3390 {
3391    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3392 
3393    if ((!sem.no_varying && nir_slot_is_varying(sem.location)) ||
3394        nir_instr_xfb_write_mask(intr)) {
3395       /* Demote the store instruction. */
3396       sem.no_sysval_output = true;
3397       nir_intrinsic_set_io_semantics(intr, sem);
3398       return false;
3399    } else {
3400       nir_instr_remove(&intr->instr);
3401       return true;
3402    }
3403 }
3404 
3405 void
nir_remove_non_entrypoints(nir_shader * nir)3406 nir_remove_non_entrypoints(nir_shader *nir)
3407 {
3408    nir_foreach_function_safe(func, nir) {
3409       if (!func->is_entrypoint)
3410          exec_node_remove(&func->node);
3411    }
3412    assert(exec_list_length(&nir->functions) == 1);
3413 }
3414 
3415 void
nir_remove_non_exported(nir_shader * nir)3416 nir_remove_non_exported(nir_shader *nir)
3417 {
3418    nir_foreach_function_safe(func, nir) {
3419       if (!func->is_exported)
3420          exec_node_remove(&func->node);
3421    }
3422 }
3423 
3424 unsigned
nir_static_workgroup_size(const nir_shader * s)3425 nir_static_workgroup_size(const nir_shader *s)
3426 {
3427    return s->info.workgroup_size[0] * s->info.workgroup_size[1] *
3428           s->info.workgroup_size[2];
3429 }
3430