• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Connor Abbott (cwabbott0@gmail.com)
25  *
26  */
27 
28 #include "nir.h"
29 #include <assert.h>
30 #include <limits.h>
31 #include <math.h>
32 #include "util/half_float.h"
33 #include "util/macros.h"
34 #include "util/u_math.h"
35 #include "util/u_qsort.h"
36 #include "nir_builder.h"
37 #include "nir_control_flow_private.h"
38 #include "nir_worklist.h"
39 
40 #ifndef NDEBUG
41 uint32_t nir_debug = 0;
42 bool nir_debug_print_shader[MESA_SHADER_KERNEL + 1] = { 0 };
43 
44 static const struct debug_named_value nir_debug_control[] = {
45    { "clone", NIR_DEBUG_CLONE,
46      "Test cloning a shader at each successful lowering/optimization call" },
47    { "serialize", NIR_DEBUG_SERIALIZE,
48      "Test serialize and deserialize shader at each successful lowering/optimization call" },
49    { "novalidate", NIR_DEBUG_NOVALIDATE,
50      "Disable shader validation at each successful lowering/optimization call" },
51    { "validate_ssa_dominance", NIR_DEBUG_VALIDATE_SSA_DOMINANCE,
52      "Validate SSA dominance in shader at each successful lowering/optimization call" },
53    { "tgsi", NIR_DEBUG_TGSI,
54      "Dump NIR/TGSI shaders when doing a NIR<->TGSI translation" },
55    { "print", NIR_DEBUG_PRINT,
56      "Dump resulting shader after each successful lowering/optimization call" },
57    { "print_vs", NIR_DEBUG_PRINT_VS,
58      "Dump resulting vertex shader after each successful lowering/optimization call" },
59    { "print_tcs", NIR_DEBUG_PRINT_TCS,
60      "Dump resulting tessellation control shader after each successful lowering/optimization call" },
61    { "print_tes", NIR_DEBUG_PRINT_TES,
62      "Dump resulting tessellation evaluation shader after each successful lowering/optimization call" },
63    { "print_gs", NIR_DEBUG_PRINT_GS,
64      "Dump resulting geometry shader after each successful lowering/optimization call" },
65    { "print_fs", NIR_DEBUG_PRINT_FS,
66      "Dump resulting fragment shader after each successful lowering/optimization call" },
67    { "print_cs", NIR_DEBUG_PRINT_CS,
68      "Dump resulting compute shader after each successful lowering/optimization call" },
69    { "print_ts", NIR_DEBUG_PRINT_TS,
70      "Dump resulting task shader after each successful lowering/optimization call" },
71    { "print_ms", NIR_DEBUG_PRINT_MS,
72      "Dump resulting mesh shader after each successful lowering/optimization call" },
73    { "print_rgs", NIR_DEBUG_PRINT_RGS,
74      "Dump resulting raygen shader after each successful lowering/optimization call" },
75    { "print_ahs", NIR_DEBUG_PRINT_AHS,
76      "Dump resulting any-hit shader after each successful lowering/optimization call" },
77    { "print_chs", NIR_DEBUG_PRINT_CHS,
78      "Dump resulting closest-hit shader after each successful lowering/optimization call" },
79    { "print_mhs", NIR_DEBUG_PRINT_MHS,
80      "Dump resulting miss-hit shader after each successful lowering/optimization call" },
81    { "print_is", NIR_DEBUG_PRINT_IS,
82      "Dump resulting intersection shader after each successful lowering/optimization call" },
83    { "print_cbs", NIR_DEBUG_PRINT_CBS,
84      "Dump resulting callable shader after each successful lowering/optimization call" },
85    { "print_ks", NIR_DEBUG_PRINT_KS,
86      "Dump resulting kernel shader after each successful lowering/optimization call" },
87    { "print_no_inline_consts", NIR_DEBUG_PRINT_NO_INLINE_CONSTS,
88      "Do not print const value near each use of const SSA variable" },
89    { "print_internal", NIR_DEBUG_PRINT_INTERNAL,
90      "Print shaders even if they are marked as internal" },
91    { "print_pass_flags", NIR_DEBUG_PRINT_PASS_FLAGS,
92      "Print pass_flags for every instruction when pass_flags are non-zero" },
93    DEBUG_NAMED_VALUE_END
94 };
95 
96 DEBUG_GET_ONCE_FLAGS_OPTION(nir_debug, "NIR_DEBUG", nir_debug_control, 0)
97 
98 static void
nir_process_debug_variable_once(void)99 nir_process_debug_variable_once(void)
100 {
101    nir_debug = debug_get_option_nir_debug();
102 
103    /* clang-format off */
104    nir_debug_print_shader[MESA_SHADER_VERTEX]       = NIR_DEBUG(PRINT_VS);
105    nir_debug_print_shader[MESA_SHADER_TESS_CTRL]    = NIR_DEBUG(PRINT_TCS);
106    nir_debug_print_shader[MESA_SHADER_TESS_EVAL]    = NIR_DEBUG(PRINT_TES);
107    nir_debug_print_shader[MESA_SHADER_GEOMETRY]     = NIR_DEBUG(PRINT_GS);
108    nir_debug_print_shader[MESA_SHADER_FRAGMENT]     = NIR_DEBUG(PRINT_FS);
109    nir_debug_print_shader[MESA_SHADER_COMPUTE]      = NIR_DEBUG(PRINT_CS);
110    nir_debug_print_shader[MESA_SHADER_TASK]         = NIR_DEBUG(PRINT_TS);
111    nir_debug_print_shader[MESA_SHADER_MESH]         = NIR_DEBUG(PRINT_MS);
112    nir_debug_print_shader[MESA_SHADER_RAYGEN]       = NIR_DEBUG(PRINT_RGS);
113    nir_debug_print_shader[MESA_SHADER_ANY_HIT]      = NIR_DEBUG(PRINT_AHS);
114    nir_debug_print_shader[MESA_SHADER_CLOSEST_HIT]  = NIR_DEBUG(PRINT_CHS);
115    nir_debug_print_shader[MESA_SHADER_MISS]         = NIR_DEBUG(PRINT_MHS);
116    nir_debug_print_shader[MESA_SHADER_INTERSECTION] = NIR_DEBUG(PRINT_IS);
117    nir_debug_print_shader[MESA_SHADER_CALLABLE]     = NIR_DEBUG(PRINT_CBS);
118    nir_debug_print_shader[MESA_SHADER_KERNEL]       = NIR_DEBUG(PRINT_KS);
119    /* clang-format on */
120 }
121 
122 void
nir_process_debug_variable(void)123 nir_process_debug_variable(void)
124 {
125    static once_flag flag = ONCE_FLAG_INIT;
126    call_once(&flag, nir_process_debug_variable_once);
127 }
128 #endif
129 
130 /** Return true if the component mask "mask" with bit size "old_bit_size" can
131  * be re-interpreted to be used with "new_bit_size".
132  */
133 bool
nir_component_mask_can_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)134 nir_component_mask_can_reinterpret(nir_component_mask_t mask,
135                                    unsigned old_bit_size,
136                                    unsigned new_bit_size)
137 {
138    assert(util_is_power_of_two_nonzero(old_bit_size));
139    assert(util_is_power_of_two_nonzero(new_bit_size));
140 
141    if (old_bit_size == new_bit_size)
142       return true;
143 
144    if (old_bit_size == 1 || new_bit_size == 1)
145       return false;
146 
147    if (old_bit_size > new_bit_size) {
148       unsigned ratio = old_bit_size / new_bit_size;
149       return util_last_bit(mask) * ratio <= NIR_MAX_VEC_COMPONENTS;
150    }
151 
152    unsigned iter = mask;
153    while (iter) {
154       int start, count;
155       u_bit_scan_consecutive_range(&iter, &start, &count);
156       start *= old_bit_size;
157       count *= old_bit_size;
158       if (start % new_bit_size != 0)
159          return false;
160       if (count % new_bit_size != 0)
161          return false;
162    }
163    return true;
164 }
165 
166 /** Re-interprets a component mask "mask" with bit size "old_bit_size" so that
167  * it can be used can be used with "new_bit_size".
168  */
169 nir_component_mask_t
nir_component_mask_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)170 nir_component_mask_reinterpret(nir_component_mask_t mask,
171                                unsigned old_bit_size,
172                                unsigned new_bit_size)
173 {
174    assert(nir_component_mask_can_reinterpret(mask, old_bit_size, new_bit_size));
175 
176    if (old_bit_size == new_bit_size)
177       return mask;
178 
179    nir_component_mask_t new_mask = 0;
180    unsigned iter = mask;
181    while (iter) {
182       int start, count;
183       u_bit_scan_consecutive_range(&iter, &start, &count);
184       start = start * old_bit_size / new_bit_size;
185       count = count * old_bit_size / new_bit_size;
186       new_mask |= BITFIELD_RANGE(start, count);
187    }
188    return new_mask;
189 }
190 
191 nir_shader *
nir_shader_create(void * mem_ctx,gl_shader_stage stage,const nir_shader_compiler_options * options,shader_info * si)192 nir_shader_create(void *mem_ctx,
193                   gl_shader_stage stage,
194                   const nir_shader_compiler_options *options,
195                   shader_info *si)
196 {
197    nir_shader *shader = rzalloc(mem_ctx, nir_shader);
198 
199    shader->gctx = gc_context(shader);
200 
201 #ifndef NDEBUG
202    nir_process_debug_variable();
203 #endif
204 
205    exec_list_make_empty(&shader->variables);
206 
207    shader->options = options;
208 
209    if (si) {
210       assert(si->stage == stage);
211       shader->info = *si;
212    } else {
213       shader->info.stage = stage;
214    }
215 
216    exec_list_make_empty(&shader->functions);
217 
218    shader->num_inputs = 0;
219    shader->num_outputs = 0;
220    shader->num_uniforms = 0;
221 
222    return shader;
223 }
224 
225 void
nir_shader_add_variable(nir_shader * shader,nir_variable * var)226 nir_shader_add_variable(nir_shader *shader, nir_variable *var)
227 {
228    switch (var->data.mode) {
229    case nir_var_function_temp:
230       assert(!"nir_shader_add_variable cannot be used for local variables");
231       return;
232 
233    case nir_var_shader_temp:
234    case nir_var_shader_in:
235    case nir_var_shader_out:
236    case nir_var_uniform:
237    case nir_var_mem_ubo:
238    case nir_var_mem_ssbo:
239    case nir_var_image:
240    case nir_var_mem_shared:
241    case nir_var_system_value:
242    case nir_var_mem_push_const:
243    case nir_var_mem_constant:
244    case nir_var_shader_call_data:
245    case nir_var_ray_hit_attrib:
246    case nir_var_mem_task_payload:
247    case nir_var_mem_node_payload:
248    case nir_var_mem_node_payload_in:
249    case nir_var_mem_global:
250       break;
251 
252    default:
253       assert(!"invalid mode");
254       return;
255    }
256 
257    exec_list_push_tail(&shader->variables, &var->node);
258 }
259 
260 nir_variable *
nir_variable_create(nir_shader * shader,nir_variable_mode mode,const struct glsl_type * type,const char * name)261 nir_variable_create(nir_shader *shader, nir_variable_mode mode,
262                     const struct glsl_type *type, const char *name)
263 {
264    nir_variable *var = rzalloc(shader, nir_variable);
265    var->name = ralloc_strdup(var, name);
266    var->type = type;
267    var->data.mode = mode;
268    var->data.how_declared = nir_var_declared_normally;
269 
270    if ((mode == nir_var_shader_in &&
271         shader->info.stage != MESA_SHADER_VERTEX &&
272         shader->info.stage != MESA_SHADER_KERNEL) ||
273        (mode == nir_var_shader_out &&
274         shader->info.stage != MESA_SHADER_FRAGMENT))
275       var->data.interpolation = INTERP_MODE_SMOOTH;
276 
277    if (mode == nir_var_shader_in || mode == nir_var_uniform)
278       var->data.read_only = true;
279 
280    nir_shader_add_variable(shader, var);
281 
282    return var;
283 }
284 
285 nir_variable *
nir_local_variable_create(nir_function_impl * impl,const struct glsl_type * type,const char * name)286 nir_local_variable_create(nir_function_impl *impl,
287                           const struct glsl_type *type, const char *name)
288 {
289    nir_variable *var = rzalloc(impl->function->shader, nir_variable);
290    var->name = ralloc_strdup(var, name);
291    var->type = type;
292    var->data.mode = nir_var_function_temp;
293 
294    nir_function_impl_add_variable(impl, var);
295 
296    return var;
297 }
298 
299 nir_variable *
nir_state_variable_create(nir_shader * shader,const struct glsl_type * type,const char * name,const gl_state_index16 tokens[STATE_LENGTH])300 nir_state_variable_create(nir_shader *shader,
301                           const struct glsl_type *type,
302                           const char *name,
303                           const gl_state_index16 tokens[STATE_LENGTH])
304 {
305    nir_variable *var = nir_variable_create(shader, nir_var_uniform, type, name);
306    var->num_state_slots = 1;
307    var->state_slots = rzalloc_array(var, nir_state_slot, 1);
308    memcpy(var->state_slots[0].tokens, tokens,
309           sizeof(var->state_slots[0].tokens));
310    shader->num_uniforms++;
311    return var;
312 }
313 
314 nir_variable *
nir_create_variable_with_location(nir_shader * shader,nir_variable_mode mode,int location,const struct glsl_type * type)315 nir_create_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location,
316                                   const struct glsl_type *type)
317 {
318    /* Only supporting non-array, or arrayed-io types, because otherwise we don't
319     * know how much to increment num_inputs/outputs
320     */
321    assert(glsl_type_is_vector_or_scalar(type) || glsl_type_is_unsized_array(type));
322 
323    const char *name;
324    switch (mode) {
325    case nir_var_shader_in:
326       if (shader->info.stage == MESA_SHADER_VERTEX)
327          name = gl_vert_attrib_name(location);
328       else
329          name = gl_varying_slot_name_for_stage(location, shader->info.stage);
330       break;
331 
332    case nir_var_shader_out:
333       if (shader->info.stage == MESA_SHADER_FRAGMENT)
334          name = gl_frag_result_name(location);
335       else
336          name = gl_varying_slot_name_for_stage(location, shader->info.stage);
337       break;
338 
339    case nir_var_system_value:
340       name = gl_system_value_name(location);
341       break;
342 
343    default:
344       unreachable("Unsupported variable mode");
345    }
346 
347    nir_variable *var = nir_variable_create(shader, mode, type, name);
348    var->data.location = location;
349 
350    switch (mode) {
351    case nir_var_shader_in:
352       var->data.driver_location = shader->num_inputs++;
353       break;
354 
355    case nir_var_shader_out:
356       var->data.driver_location = shader->num_outputs++;
357       break;
358 
359    case nir_var_system_value:
360       break;
361 
362    default:
363       unreachable("Unsupported variable mode");
364    }
365 
366    return var;
367 }
368 
369 nir_variable *
nir_get_variable_with_location(nir_shader * shader,nir_variable_mode mode,int location,const struct glsl_type * type)370 nir_get_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location,
371                                const struct glsl_type *type)
372 {
373    nir_variable *var = nir_find_variable_with_location(shader, mode, location);
374    if (var) {
375       /* If this shader has location_fracs, this builder function is not suitable. */
376       assert(var->data.location_frac == 0);
377 
378       /* The variable for the slot should match what we expected. */
379       assert(type == var->type);
380       return var;
381    }
382 
383    return nir_create_variable_with_location(shader, mode, location, type);
384 }
385 
386 nir_variable *
nir_find_variable_with_location(nir_shader * shader,nir_variable_mode mode,unsigned location)387 nir_find_variable_with_location(nir_shader *shader,
388                                 nir_variable_mode mode,
389                                 unsigned location)
390 {
391    assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
392    nir_foreach_variable_with_modes(var, shader, mode) {
393       if (var->data.location == location)
394          return var;
395    }
396    return NULL;
397 }
398 
399 nir_variable *
nir_find_variable_with_driver_location(nir_shader * shader,nir_variable_mode mode,unsigned location)400 nir_find_variable_with_driver_location(nir_shader *shader,
401                                        nir_variable_mode mode,
402                                        unsigned location)
403 {
404    assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
405    nir_foreach_variable_with_modes(var, shader, mode) {
406       if (var->data.driver_location == location)
407          return var;
408    }
409    return NULL;
410 }
411 
412 nir_variable *
nir_find_state_variable(nir_shader * s,gl_state_index16 tokens[STATE_LENGTH])413 nir_find_state_variable(nir_shader *s,
414                         gl_state_index16 tokens[STATE_LENGTH])
415 {
416    nir_foreach_variable_with_modes(var, s, nir_var_uniform) {
417       if (var->num_state_slots == 1 &&
418           !memcmp(var->state_slots[0].tokens, tokens,
419                   sizeof(var->state_slots[0].tokens)))
420          return var;
421    }
422    return NULL;
423 }
424 
nir_find_sampler_variable_with_tex_index(nir_shader * shader,unsigned texture_index)425 nir_variable *nir_find_sampler_variable_with_tex_index(nir_shader *shader,
426                                                        unsigned texture_index)
427 {
428    nir_foreach_variable_with_modes(var, shader, nir_var_uniform) {
429       unsigned size =
430           glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
431       if ((glsl_type_is_texture(glsl_without_array(var->type)) ||
432            glsl_type_is_sampler(glsl_without_array(var->type))) &&
433           (var->data.binding == texture_index ||
434            (var->data.binding < texture_index &&
435             var->data.binding + size > texture_index)))
436          return var;
437    }
438    return NULL;
439 }
440 
441 /* Annoyingly, qsort_r is not in the C standard library and, in particular, we
442  * can't count on it on MSV and Android.  So we stuff the CMP function into
443  * each array element.  It's a bit messy and burns more memory but the list of
444  * variables should hever be all that long.
445  */
446 struct var_cmp {
447    nir_variable *var;
448    int (*cmp)(const nir_variable *, const nir_variable *);
449 };
450 
451 static int
var_sort_cmp(const void * _a,const void * _b,void * _cmp)452 var_sort_cmp(const void *_a, const void *_b, void *_cmp)
453 {
454    const struct var_cmp *a = _a;
455    const struct var_cmp *b = _b;
456    assert(a->cmp == b->cmp);
457    return a->cmp(a->var, b->var);
458 }
459 
460 void
nir_sort_variables_with_modes(nir_shader * shader,int (* cmp)(const nir_variable *,const nir_variable *),nir_variable_mode modes)461 nir_sort_variables_with_modes(nir_shader *shader,
462                               int (*cmp)(const nir_variable *,
463                                          const nir_variable *),
464                               nir_variable_mode modes)
465 {
466    unsigned num_vars = 0;
467    nir_foreach_variable_with_modes(var, shader, modes) {
468       ++num_vars;
469    }
470    struct var_cmp *vars = ralloc_array(shader, struct var_cmp, num_vars);
471    unsigned i = 0;
472    nir_foreach_variable_with_modes_safe(var, shader, modes) {
473       exec_node_remove(&var->node);
474       vars[i++] = (struct var_cmp){
475          .var = var,
476          .cmp = cmp,
477       };
478    }
479    assert(i == num_vars);
480 
481    util_qsort_r(vars, num_vars, sizeof(*vars), var_sort_cmp, cmp);
482 
483    for (i = 0; i < num_vars; i++)
484       exec_list_push_tail(&shader->variables, &vars[i].var->node);
485 
486    ralloc_free(vars);
487 }
488 
489 nir_function *
nir_function_create(nir_shader * shader,const char * name)490 nir_function_create(nir_shader *shader, const char *name)
491 {
492    nir_function *func = ralloc(shader, nir_function);
493 
494    exec_list_push_tail(&shader->functions, &func->node);
495 
496    func->name = ralloc_strdup(func, name);
497    func->shader = shader;
498    func->num_params = 0;
499    func->params = NULL;
500    func->impl = NULL;
501    func->is_entrypoint = false;
502    func->is_preamble = false;
503    func->dont_inline = false;
504    func->should_inline = false;
505    func->is_subroutine = false;
506    func->is_tmp_globals_wrapper = false;
507    func->subroutine_index = 0;
508    func->num_subroutine_types = 0;
509    func->subroutine_types = NULL;
510    func->workgroup_size[0] = 0;
511    func->workgroup_size[1] = 0;
512    func->workgroup_size[2] = 0;
513 
514    /* Only meaningful for shader libraries, so don't export by default. */
515    func->is_exported = false;
516 
517    return func;
518 }
519 
520 void
nir_alu_src_copy(nir_alu_src * dest,const nir_alu_src * src)521 nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src)
522 {
523    dest->src = nir_src_for_ssa(src->src.ssa);
524    for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
525       dest->swizzle[i] = src->swizzle[i];
526 }
527 
528 bool
nir_alu_src_is_trivial_ssa(const nir_alu_instr * alu,unsigned srcn)529 nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn)
530 {
531    static uint8_t trivial_swizzle[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
532    STATIC_ASSERT(ARRAY_SIZE(trivial_swizzle) == NIR_MAX_VEC_COMPONENTS);
533 
534    const nir_alu_src *src = &alu->src[srcn];
535    unsigned num_components = nir_ssa_alu_instr_src_components(alu, srcn);
536 
537    return (src->src.ssa->num_components == num_components) &&
538           (memcmp(src->swizzle, trivial_swizzle, num_components) == 0);
539 }
540 
541 static void
cf_init(nir_cf_node * node,nir_cf_node_type type)542 cf_init(nir_cf_node *node, nir_cf_node_type type)
543 {
544    exec_node_init(&node->node);
545    node->parent = NULL;
546    node->type = type;
547 }
548 
549 nir_function_impl *
nir_function_impl_create_bare(nir_shader * shader)550 nir_function_impl_create_bare(nir_shader *shader)
551 {
552    nir_function_impl *impl = ralloc(shader, nir_function_impl);
553 
554    impl->function = NULL;
555    impl->preamble = NULL;
556 
557    cf_init(&impl->cf_node, nir_cf_node_function);
558 
559    exec_list_make_empty(&impl->body);
560    exec_list_make_empty(&impl->locals);
561    impl->ssa_alloc = 0;
562    impl->num_blocks = 0;
563    impl->valid_metadata = nir_metadata_none;
564    impl->structured = true;
565 
566    /* create start & end blocks */
567    nir_block *start_block = nir_block_create(shader);
568    nir_block *end_block = nir_block_create(shader);
569    start_block->cf_node.parent = &impl->cf_node;
570    end_block->cf_node.parent = &impl->cf_node;
571    impl->end_block = end_block;
572 
573    exec_list_push_tail(&impl->body, &start_block->cf_node.node);
574 
575    start_block->successors[0] = end_block;
576    _mesa_set_add(end_block->predecessors, start_block);
577    return impl;
578 }
579 
580 nir_function_impl *
nir_function_impl_create(nir_function * function)581 nir_function_impl_create(nir_function *function)
582 {
583    assert(function->impl == NULL);
584 
585    nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
586    nir_function_set_impl(function, impl);
587    return impl;
588 }
589 
590 nir_block *
nir_block_create(nir_shader * shader)591 nir_block_create(nir_shader *shader)
592 {
593    nir_block *block = rzalloc(shader, nir_block);
594 
595    cf_init(&block->cf_node, nir_cf_node_block);
596 
597    block->successors[0] = block->successors[1] = NULL;
598    block->predecessors = _mesa_pointer_set_create(block);
599    block->imm_dom = NULL;
600    /* XXX maybe it would be worth it to defer allocation?  This
601     * way it doesn't get allocated for shader refs that never run
602     * nir_calc_dominance?  For example, state-tracker creates an
603     * initial IR, clones that, runs appropriate lowering pass, passes
604     * to driver which does common lowering/opt, and then stores ref
605     * which is later used to do state specific lowering and futher
606     * opt.  Do any of the references not need dominance metadata?
607     */
608    block->dom_frontier = _mesa_pointer_set_create(block);
609 
610    exec_list_make_empty(&block->instr_list);
611 
612    return block;
613 }
614 
615 static inline void
src_init(nir_src * src)616 src_init(nir_src *src)
617 {
618    src->ssa = NULL;
619 }
620 
621 nir_if *
nir_if_create(nir_shader * shader)622 nir_if_create(nir_shader *shader)
623 {
624    nir_if *if_stmt = ralloc(shader, nir_if);
625 
626    if_stmt->control = nir_selection_control_none;
627 
628    cf_init(&if_stmt->cf_node, nir_cf_node_if);
629    src_init(&if_stmt->condition);
630 
631    nir_block *then = nir_block_create(shader);
632    exec_list_make_empty(&if_stmt->then_list);
633    exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
634    then->cf_node.parent = &if_stmt->cf_node;
635 
636    nir_block *else_stmt = nir_block_create(shader);
637    exec_list_make_empty(&if_stmt->else_list);
638    exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
639    else_stmt->cf_node.parent = &if_stmt->cf_node;
640 
641    return if_stmt;
642 }
643 
644 nir_loop *
nir_loop_create(nir_shader * shader)645 nir_loop_create(nir_shader *shader)
646 {
647    nir_loop *loop = rzalloc(shader, nir_loop);
648 
649    cf_init(&loop->cf_node, nir_cf_node_loop);
650    /* Assume that loops are divergent until proven otherwise */
651    loop->divergent_break = true;
652    loop->divergent_continue = true;
653 
654    nir_block *body = nir_block_create(shader);
655    exec_list_make_empty(&loop->body);
656    exec_list_push_tail(&loop->body, &body->cf_node.node);
657    body->cf_node.parent = &loop->cf_node;
658 
659    body->successors[0] = body;
660    _mesa_set_add(body->predecessors, body);
661 
662    exec_list_make_empty(&loop->continue_list);
663 
664    return loop;
665 }
666 
667 static void
instr_init(nir_instr * instr,nir_instr_type type)668 instr_init(nir_instr *instr, nir_instr_type type)
669 {
670    instr->type = type;
671    instr->block = NULL;
672    exec_node_init(&instr->node);
673 }
674 
675 static void
alu_src_init(nir_alu_src * src)676 alu_src_init(nir_alu_src *src)
677 {
678    src_init(&src->src);
679    for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
680       src->swizzle[i] = i;
681 }
682 
683 nir_alu_instr *
nir_alu_instr_create(nir_shader * shader,nir_op op)684 nir_alu_instr_create(nir_shader *shader, nir_op op)
685 {
686    unsigned num_srcs = nir_op_infos[op].num_inputs;
687    nir_alu_instr *instr = gc_zalloc_zla(shader->gctx, nir_alu_instr, nir_alu_src, num_srcs);
688 
689    instr_init(&instr->instr, nir_instr_type_alu);
690    instr->op = op;
691    for (unsigned i = 0; i < num_srcs; i++)
692       alu_src_init(&instr->src[i]);
693 
694    return instr;
695 }
696 
697 nir_deref_instr *
nir_deref_instr_create(nir_shader * shader,nir_deref_type deref_type)698 nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
699 {
700    nir_deref_instr *instr = gc_zalloc(shader->gctx, nir_deref_instr, 1);
701 
702    instr_init(&instr->instr, nir_instr_type_deref);
703 
704    instr->deref_type = deref_type;
705    if (deref_type != nir_deref_type_var)
706       src_init(&instr->parent);
707 
708    if (deref_type == nir_deref_type_array ||
709        deref_type == nir_deref_type_ptr_as_array)
710       src_init(&instr->arr.index);
711 
712    return instr;
713 }
714 
715 nir_jump_instr *
nir_jump_instr_create(nir_shader * shader,nir_jump_type type)716 nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
717 {
718    nir_jump_instr *instr = gc_alloc(shader->gctx, nir_jump_instr, 1);
719    instr_init(&instr->instr, nir_instr_type_jump);
720    src_init(&instr->condition);
721    instr->type = type;
722    instr->target = NULL;
723    instr->else_target = NULL;
724 
725    return instr;
726 }
727 
728 nir_load_const_instr *
nir_load_const_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)729 nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
730                             unsigned bit_size)
731 {
732    nir_load_const_instr *instr =
733       gc_zalloc_zla(shader->gctx, nir_load_const_instr, nir_const_value, num_components);
734    instr_init(&instr->instr, nir_instr_type_load_const);
735 
736    nir_def_init(&instr->instr, &instr->def, num_components, bit_size);
737 
738    return instr;
739 }
740 
741 nir_intrinsic_instr *
nir_intrinsic_instr_create(nir_shader * shader,nir_intrinsic_op op)742 nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
743 {
744    unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
745    nir_intrinsic_instr *instr =
746       gc_zalloc_zla(shader->gctx, nir_intrinsic_instr, nir_src, num_srcs);
747 
748    instr_init(&instr->instr, nir_instr_type_intrinsic);
749    instr->intrinsic = op;
750 
751    for (unsigned i = 0; i < num_srcs; i++)
752       src_init(&instr->src[i]);
753 
754    return instr;
755 }
756 
757 nir_call_instr *
nir_call_instr_create(nir_shader * shader,nir_function * callee)758 nir_call_instr_create(nir_shader *shader, nir_function *callee)
759 {
760    const unsigned num_params = callee->num_params;
761    nir_call_instr *instr =
762       gc_zalloc_zla(shader->gctx, nir_call_instr, nir_src, num_params);
763 
764    instr_init(&instr->instr, nir_instr_type_call);
765    instr->callee = callee;
766    instr->num_params = num_params;
767    for (unsigned i = 0; i < num_params; i++)
768       src_init(&instr->params[i]);
769 
770    return instr;
771 }
772 
773 static int8_t default_tg4_offsets[4][2] = {
774    { 0, 1 },
775    { 1, 1 },
776    { 1, 0 },
777    { 0, 0 },
778 };
779 
780 nir_tex_instr *
nir_tex_instr_create(nir_shader * shader,unsigned num_srcs)781 nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
782 {
783    nir_tex_instr *instr = gc_zalloc(shader->gctx, nir_tex_instr, 1);
784    instr_init(&instr->instr, nir_instr_type_tex);
785 
786    instr->num_srcs = num_srcs;
787    instr->src = gc_alloc(shader->gctx, nir_tex_src, num_srcs);
788    for (unsigned i = 0; i < num_srcs; i++)
789       src_init(&instr->src[i].src);
790 
791    instr->texture_index = 0;
792    instr->sampler_index = 0;
793    memcpy(instr->tg4_offsets, default_tg4_offsets, sizeof(instr->tg4_offsets));
794 
795    return instr;
796 }
797 
798 void
nir_tex_instr_add_src(nir_tex_instr * tex,nir_tex_src_type src_type,nir_def * src)799 nir_tex_instr_add_src(nir_tex_instr *tex,
800                       nir_tex_src_type src_type,
801                       nir_def *src)
802 {
803    nir_tex_src *new_srcs = gc_zalloc(gc_get_context(tex), nir_tex_src, tex->num_srcs + 1);
804 
805    for (unsigned i = 0; i < tex->num_srcs; i++) {
806       new_srcs[i].src_type = tex->src[i].src_type;
807       nir_instr_move_src(&tex->instr, &new_srcs[i].src,
808                          &tex->src[i].src);
809    }
810 
811    gc_free(tex->src);
812    tex->src = new_srcs;
813 
814    tex->src[tex->num_srcs].src_type = src_type;
815    nir_instr_init_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
816    tex->num_srcs++;
817 }
818 
819 void
nir_tex_instr_remove_src(nir_tex_instr * tex,unsigned src_idx)820 nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
821 {
822    assert(src_idx < tex->num_srcs);
823 
824    /* First rewrite the source to NIR_SRC_INIT */
825    nir_instr_clear_src(&tex->instr, &tex->src[src_idx].src);
826 
827    /* Now, move all of the other sources down */
828    for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
829       tex->src[i - 1].src_type = tex->src[i].src_type;
830       nir_instr_move_src(&tex->instr, &tex->src[i - 1].src, &tex->src[i].src);
831    }
832    tex->num_srcs--;
833 }
834 
835 bool
nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr * tex)836 nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex)
837 {
838    if (tex->op != nir_texop_tg4)
839       return false;
840    return memcmp(tex->tg4_offsets, default_tg4_offsets,
841                  sizeof(tex->tg4_offsets)) != 0;
842 }
843 
844 nir_phi_instr *
nir_phi_instr_create(nir_shader * shader)845 nir_phi_instr_create(nir_shader *shader)
846 {
847    nir_phi_instr *instr = gc_alloc(shader->gctx, nir_phi_instr, 1);
848    instr_init(&instr->instr, nir_instr_type_phi);
849 
850    exec_list_make_empty(&instr->srcs);
851 
852    return instr;
853 }
854 
855 /**
856  * Adds a new source to a NIR instruction.
857  *
858  * Note that this does not update the def/use relationship for src, assuming
859  * that the instr is not in the shader.  If it is, you have to do:
860  *
861  * list_addtail(&phi_src->src.use_link, &src.ssa->uses);
862  */
863 nir_phi_src *
nir_phi_instr_add_src(nir_phi_instr * instr,nir_block * pred,nir_def * src)864 nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_def *src)
865 {
866    nir_phi_src *phi_src;
867 
868    phi_src = gc_zalloc(gc_get_context(instr), nir_phi_src, 1);
869    phi_src->pred = pred;
870    phi_src->src = nir_src_for_ssa(src);
871    nir_src_set_parent_instr(&phi_src->src, &instr->instr);
872    exec_list_push_tail(&instr->srcs, &phi_src->node);
873 
874    return phi_src;
875 }
876 
877 nir_parallel_copy_instr *
nir_parallel_copy_instr_create(nir_shader * shader)878 nir_parallel_copy_instr_create(nir_shader *shader)
879 {
880    nir_parallel_copy_instr *instr = gc_alloc(shader->gctx, nir_parallel_copy_instr, 1);
881    instr_init(&instr->instr, nir_instr_type_parallel_copy);
882 
883    exec_list_make_empty(&instr->entries);
884 
885    return instr;
886 }
887 
888 nir_debug_info_instr *
nir_debug_info_instr_create(nir_shader * shader,nir_debug_info_type type,uint32_t string_length)889 nir_debug_info_instr_create(nir_shader *shader, nir_debug_info_type type,
890                             uint32_t string_length)
891 {
892    uint32_t additional_size = 0;
893    if (type == nir_debug_info_string)
894       additional_size = string_length + 1;
895 
896    nir_debug_info_instr *instr = gc_zalloc_size(
897       shader->gctx, sizeof(nir_debug_info_instr) + additional_size, 1);
898    instr_init(&instr->instr, nir_instr_type_debug_info);
899 
900    instr->type = type;
901 
902    if (type == nir_debug_info_string)
903       instr->string_length = string_length;
904 
905    return instr;
906 }
907 
908 nir_undef_instr *
nir_undef_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)909 nir_undef_instr_create(nir_shader *shader,
910                        unsigned num_components,
911                        unsigned bit_size)
912 {
913    nir_undef_instr *instr = gc_alloc(shader->gctx, nir_undef_instr, 1);
914    instr_init(&instr->instr, nir_instr_type_undef);
915 
916    nir_def_init(&instr->instr, &instr->def, num_components, bit_size);
917 
918    return instr;
919 }
920 
921 static nir_const_value
const_value_float(double d,unsigned bit_size)922 const_value_float(double d, unsigned bit_size)
923 {
924    nir_const_value v;
925    memset(&v, 0, sizeof(v));
926 
927    /* clang-format off */
928    switch (bit_size) {
929    case 16: v.u16 = _mesa_float_to_half(d);  break;
930    case 32: v.f32 = d;                       break;
931    case 64: v.f64 = d;                       break;
932    default:
933       unreachable("Invalid bit size");
934    }
935    /* clang-format on */
936 
937    return v;
938 }
939 
940 static nir_const_value
const_value_int(int64_t i,unsigned bit_size)941 const_value_int(int64_t i, unsigned bit_size)
942 {
943    nir_const_value v;
944    memset(&v, 0, sizeof(v));
945 
946    /* clang-format off */
947    switch (bit_size) {
948    case 1:  v.b   = i & 1; break;
949    case 8:  v.i8  = i;     break;
950    case 16: v.i16 = i;     break;
951    case 32: v.i32 = i;     break;
952    case 64: v.i64 = i;     break;
953    default:
954       unreachable("Invalid bit size");
955    }
956    /* clang-format on */
957 
958    return v;
959 }
960 
961 nir_const_value
nir_alu_binop_identity(nir_op binop,unsigned bit_size)962 nir_alu_binop_identity(nir_op binop, unsigned bit_size)
963 {
964    const int64_t max_int = (1ull << (bit_size - 1)) - 1;
965    const int64_t min_int = -max_int - 1;
966    switch (binop) {
967    case nir_op_iadd:
968       return const_value_int(0, bit_size);
969    case nir_op_fadd:
970       return const_value_float(0, bit_size);
971    case nir_op_imul:
972       return const_value_int(1, bit_size);
973    case nir_op_fmul:
974       return const_value_float(1, bit_size);
975    case nir_op_imin:
976       return const_value_int(max_int, bit_size);
977    case nir_op_umin:
978       return const_value_int(~0ull, bit_size);
979    case nir_op_fmin:
980       return const_value_float(INFINITY, bit_size);
981    case nir_op_imax:
982       return const_value_int(min_int, bit_size);
983    case nir_op_umax:
984       return const_value_int(0, bit_size);
985    case nir_op_fmax:
986       return const_value_float(-INFINITY, bit_size);
987    case nir_op_iand:
988       return const_value_int(~0ull, bit_size);
989    case nir_op_ior:
990       return const_value_int(0, bit_size);
991    case nir_op_ixor:
992       return const_value_int(0, bit_size);
993    default:
994       unreachable("Invalid reduction operation");
995    }
996 }
997 
998 nir_function_impl *
nir_cf_node_get_function(nir_cf_node * node)999 nir_cf_node_get_function(nir_cf_node *node)
1000 {
1001    while (node->type != nir_cf_node_function) {
1002       node = node->parent;
1003    }
1004 
1005    return nir_cf_node_as_function(node);
1006 }
1007 
1008 /* Reduces a cursor by trying to convert everything to after and trying to
1009  * go up to block granularity when possible.
1010  */
1011 static nir_cursor
reduce_cursor(nir_cursor cursor)1012 reduce_cursor(nir_cursor cursor)
1013 {
1014    switch (cursor.option) {
1015    case nir_cursor_before_block:
1016       if (exec_list_is_empty(&cursor.block->instr_list)) {
1017          /* Empty block.  After is as good as before. */
1018          cursor.option = nir_cursor_after_block;
1019       }
1020       return cursor;
1021 
1022    case nir_cursor_after_block:
1023       return cursor;
1024 
1025    case nir_cursor_before_instr: {
1026       nir_instr *prev_instr = nir_instr_prev(cursor.instr);
1027       if (prev_instr) {
1028          /* Before this instruction is after the previous */
1029          cursor.instr = prev_instr;
1030          cursor.option = nir_cursor_after_instr;
1031       } else {
1032          /* No previous instruction.  Switch to before block */
1033          cursor.block = cursor.instr->block;
1034          cursor.option = nir_cursor_before_block;
1035       }
1036       return reduce_cursor(cursor);
1037    }
1038 
1039    case nir_cursor_after_instr:
1040       if (nir_instr_next(cursor.instr) == NULL) {
1041          /* This is the last instruction, switch to after block */
1042          cursor.option = nir_cursor_after_block;
1043          cursor.block = cursor.instr->block;
1044       }
1045       return cursor;
1046 
1047    default:
1048       unreachable("Inavlid cursor option");
1049    }
1050 }
1051 
1052 bool
nir_cursors_equal(nir_cursor a,nir_cursor b)1053 nir_cursors_equal(nir_cursor a, nir_cursor b)
1054 {
1055    /* Reduced cursors should be unique */
1056    a = reduce_cursor(a);
1057    b = reduce_cursor(b);
1058 
1059    return a.block == b.block && a.option == b.option;
1060 }
1061 
1062 static bool
add_use_cb(nir_src * src,void * state)1063 add_use_cb(nir_src *src, void *state)
1064 {
1065    nir_instr *instr = state;
1066 
1067    nir_src_set_parent_instr(src, instr);
1068    list_addtail(&src->use_link, &src->ssa->uses);
1069 
1070    return true;
1071 }
1072 
1073 static bool
add_ssa_def_cb(nir_def * def,void * state)1074 add_ssa_def_cb(nir_def *def, void *state)
1075 {
1076    nir_instr *instr = state;
1077 
1078    if (instr->block && def->index == UINT_MAX) {
1079       nir_function_impl *impl =
1080          nir_cf_node_get_function(&instr->block->cf_node);
1081 
1082       def->index = impl->ssa_alloc++;
1083 
1084       impl->valid_metadata &= ~nir_metadata_live_defs;
1085    }
1086 
1087    return true;
1088 }
1089 
1090 static void
add_defs_uses(nir_instr * instr)1091 add_defs_uses(nir_instr *instr)
1092 {
1093    nir_foreach_src(instr, add_use_cb, instr);
1094    nir_foreach_def(instr, add_ssa_def_cb, instr);
1095 }
1096 
1097 void
nir_instr_insert(nir_cursor cursor,nir_instr * instr)1098 nir_instr_insert(nir_cursor cursor, nir_instr *instr)
1099 {
1100    switch (cursor.option) {
1101    case nir_cursor_before_block:
1102       /* Only allow inserting jumps into empty blocks. */
1103       if (instr->type == nir_instr_type_jump)
1104          assert(exec_list_is_empty(&cursor.block->instr_list));
1105 
1106       instr->block = cursor.block;
1107       add_defs_uses(instr);
1108       exec_list_push_head(&cursor.block->instr_list, &instr->node);
1109       break;
1110    case nir_cursor_after_block: {
1111       /* Inserting instructions after a jump is illegal. */
1112       nir_instr *last = nir_block_last_instr(cursor.block);
1113       assert(last == NULL || last->type != nir_instr_type_jump);
1114       (void)last;
1115 
1116       instr->block = cursor.block;
1117       add_defs_uses(instr);
1118       exec_list_push_tail(&cursor.block->instr_list, &instr->node);
1119       break;
1120    }
1121    case nir_cursor_before_instr:
1122       assert(instr->type != nir_instr_type_jump);
1123       instr->block = cursor.instr->block;
1124       add_defs_uses(instr);
1125       exec_node_insert_node_before(&cursor.instr->node, &instr->node);
1126       break;
1127    case nir_cursor_after_instr:
1128       /* Inserting instructions after a jump is illegal. */
1129       assert(cursor.instr->type != nir_instr_type_jump);
1130 
1131       /* Only allow inserting jumps at the end of the block. */
1132       if (instr->type == nir_instr_type_jump)
1133          assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
1134 
1135       instr->block = cursor.instr->block;
1136       add_defs_uses(instr);
1137       exec_node_insert_after(&cursor.instr->node, &instr->node);
1138       break;
1139    }
1140 
1141    if (instr->type == nir_instr_type_jump)
1142       nir_handle_add_jump(instr->block);
1143 
1144    nir_function_impl *impl = nir_cf_node_get_function(&instr->block->cf_node);
1145    impl->valid_metadata &= ~nir_metadata_instr_index;
1146 }
1147 
1148 bool
nir_instr_move(nir_cursor cursor,nir_instr * instr)1149 nir_instr_move(nir_cursor cursor, nir_instr *instr)
1150 {
1151    /* If the cursor happens to refer to this instruction (either before or
1152     * after), don't do anything.
1153     */
1154    switch (cursor.option) {
1155    case nir_cursor_before_instr:
1156       if (cursor.instr == instr || nir_instr_prev(cursor.instr) == instr)
1157          return false;
1158       break;
1159    case nir_cursor_after_instr:
1160       if (cursor.instr == instr || nir_instr_next(cursor.instr) == instr)
1161          return false;
1162       break;
1163    case nir_cursor_before_block:
1164       if (cursor.block == instr->block && nir_instr_is_first(instr))
1165          return false;
1166       break;
1167    case nir_cursor_after_block:
1168       if (cursor.block == instr->block && nir_instr_is_last(instr))
1169          return false;
1170       break;
1171    }
1172 
1173    nir_instr_remove(instr);
1174    nir_instr_insert(cursor, instr);
1175    return true;
1176 }
1177 
1178 static bool
src_is_valid(const nir_src * src)1179 src_is_valid(const nir_src *src)
1180 {
1181    return (src->ssa != NULL);
1182 }
1183 
1184 static bool
remove_use_cb(nir_src * src,void * state)1185 remove_use_cb(nir_src *src, void *state)
1186 {
1187    (void)state;
1188 
1189    if (src_is_valid(src))
1190       list_del(&src->use_link);
1191 
1192    return true;
1193 }
1194 
1195 static void
remove_defs_uses(nir_instr * instr)1196 remove_defs_uses(nir_instr *instr)
1197 {
1198    nir_foreach_src(instr, remove_use_cb, instr);
1199 }
1200 
1201 void
nir_instr_remove_v(nir_instr * instr)1202 nir_instr_remove_v(nir_instr *instr)
1203 {
1204    remove_defs_uses(instr);
1205    exec_node_remove(&instr->node);
1206 
1207    if (instr->type == nir_instr_type_jump) {
1208       nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
1209       nir_handle_remove_jump(instr->block, jump_instr->type);
1210    }
1211 }
1212 
1213 void
nir_instr_free(nir_instr * instr)1214 nir_instr_free(nir_instr *instr)
1215 {
1216    switch (instr->type) {
1217    case nir_instr_type_tex:
1218       gc_free(nir_instr_as_tex(instr)->src);
1219       break;
1220 
1221    case nir_instr_type_phi: {
1222       nir_phi_instr *phi = nir_instr_as_phi(instr);
1223       nir_foreach_phi_src_safe(phi_src, phi)
1224          gc_free(phi_src);
1225       break;
1226    }
1227 
1228    default:
1229       break;
1230    }
1231 
1232    gc_free(instr);
1233 }
1234 
1235 void
nir_instr_free_list(struct exec_list * list)1236 nir_instr_free_list(struct exec_list *list)
1237 {
1238    struct exec_node *node;
1239    while ((node = exec_list_pop_head(list))) {
1240       nir_instr *removed_instr = exec_node_data(nir_instr, node, node);
1241       nir_instr_free(removed_instr);
1242    }
1243 }
1244 
1245 static bool
nir_instr_free_and_dce_live_cb(nir_def * def,void * state)1246 nir_instr_free_and_dce_live_cb(nir_def *def, void *state)
1247 {
1248    bool *live = state;
1249 
1250    if (!nir_def_is_unused(def)) {
1251       *live = true;
1252       return false;
1253    } else {
1254       return true;
1255    }
1256 }
1257 
1258 static bool
nir_instr_free_and_dce_is_live(nir_instr * instr)1259 nir_instr_free_and_dce_is_live(nir_instr *instr)
1260 {
1261    /* Note: don't have to worry about jumps because they don't have dests to
1262     * become unused.
1263     */
1264    if (instr->type == nir_instr_type_intrinsic) {
1265       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1266       const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
1267       if (!(info->flags & NIR_INTRINSIC_CAN_ELIMINATE))
1268          return true;
1269    }
1270 
1271    bool live = false;
1272    nir_foreach_def(instr, nir_instr_free_and_dce_live_cb, &live);
1273    return live;
1274 }
1275 
1276 static bool
nir_instr_dce_add_dead_srcs_cb(nir_src * src,void * state)1277 nir_instr_dce_add_dead_srcs_cb(nir_src *src, void *state)
1278 {
1279    nir_instr_worklist *wl = state;
1280 
1281    list_del(&src->use_link);
1282    if (!nir_instr_free_and_dce_is_live(src->ssa->parent_instr))
1283       nir_instr_worklist_push_tail(wl, src->ssa->parent_instr);
1284 
1285    /* Stop nir_instr_remove from trying to delete the link again. */
1286    src->ssa = NULL;
1287 
1288    return true;
1289 }
1290 
1291 static void
nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist * wl,nir_instr * instr)1292 nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist *wl, nir_instr *instr)
1293 {
1294    nir_foreach_src(instr, nir_instr_dce_add_dead_srcs_cb, wl);
1295 }
1296 
1297 /**
1298  * Frees an instruction and any SSA defs that it used that are now dead,
1299  * returning a nir_cursor where the instruction previously was.
1300  */
1301 nir_cursor
nir_instr_free_and_dce(nir_instr * instr)1302 nir_instr_free_and_dce(nir_instr *instr)
1303 {
1304    nir_instr_worklist *worklist = nir_instr_worklist_create();
1305 
1306    nir_instr_dce_add_dead_ssa_srcs(worklist, instr);
1307    nir_cursor c = nir_instr_remove(instr);
1308 
1309    struct exec_list to_free;
1310    exec_list_make_empty(&to_free);
1311 
1312    nir_instr *dce_instr;
1313    while ((dce_instr = nir_instr_worklist_pop_head(worklist))) {
1314       nir_instr_dce_add_dead_ssa_srcs(worklist, dce_instr);
1315 
1316       /* If we're removing the instr where our cursor is, then we have to
1317        * point the cursor elsewhere.
1318        */
1319       if ((c.option == nir_cursor_before_instr ||
1320            c.option == nir_cursor_after_instr) &&
1321           c.instr == dce_instr)
1322          c = nir_instr_remove(dce_instr);
1323       else
1324          nir_instr_remove(dce_instr);
1325       exec_list_push_tail(&to_free, &dce_instr->node);
1326    }
1327 
1328    nir_instr_free_list(&to_free);
1329 
1330    nir_instr_worklist_destroy(worklist);
1331 
1332    return c;
1333 }
1334 
1335 /*@}*/
1336 
1337 nir_def *
nir_instr_def(nir_instr * instr)1338 nir_instr_def(nir_instr *instr)
1339 {
1340    switch (instr->type) {
1341    case nir_instr_type_alu:
1342       return &nir_instr_as_alu(instr)->def;
1343 
1344    case nir_instr_type_deref:
1345       return &nir_instr_as_deref(instr)->def;
1346 
1347    case nir_instr_type_tex:
1348       return &nir_instr_as_tex(instr)->def;
1349 
1350    case nir_instr_type_intrinsic: {
1351       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1352       if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
1353          return &intrin->def;
1354       } else {
1355          return NULL;
1356       }
1357    }
1358 
1359    case nir_instr_type_phi:
1360       return &nir_instr_as_phi(instr)->def;
1361 
1362    case nir_instr_type_parallel_copy:
1363       unreachable("Parallel copies are unsupported by this function");
1364 
1365    case nir_instr_type_load_const:
1366       return &nir_instr_as_load_const(instr)->def;
1367 
1368    case nir_instr_type_undef:
1369       return &nir_instr_as_undef(instr)->def;
1370 
1371    case nir_instr_type_debug_info:
1372       return &nir_instr_as_debug_info(instr)->def;
1373 
1374    case nir_instr_type_call:
1375    case nir_instr_type_jump:
1376       return NULL;
1377    }
1378 
1379    unreachable("Invalid instruction type");
1380 }
1381 
1382 bool
nir_foreach_phi_src_leaving_block(nir_block * block,nir_foreach_src_cb cb,void * state)1383 nir_foreach_phi_src_leaving_block(nir_block *block,
1384                                   nir_foreach_src_cb cb,
1385                                   void *state)
1386 {
1387    for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
1388       if (block->successors[i] == NULL)
1389          continue;
1390 
1391       nir_foreach_phi(phi, block->successors[i]) {
1392          nir_foreach_phi_src(phi_src, phi) {
1393             if (phi_src->pred == block) {
1394                if (!cb(&phi_src->src, state))
1395                   return false;
1396             }
1397          }
1398       }
1399    }
1400 
1401    return true;
1402 }
1403 
1404 nir_const_value
nir_const_value_for_float(double f,unsigned bit_size)1405 nir_const_value_for_float(double f, unsigned bit_size)
1406 {
1407    nir_const_value v;
1408    memset(&v, 0, sizeof(v));
1409 
1410    /* clang-format off */
1411    switch (bit_size) {
1412    case 16: v.u16 = _mesa_float_to_half(f);  break;
1413    case 32: v.f32 = f;                       break;
1414    case 64: v.f64 = f;                       break;
1415    default: unreachable("Invalid bit size");
1416    }
1417    /* clang-format on */
1418 
1419    return v;
1420 }
1421 
1422 double
nir_const_value_as_float(nir_const_value value,unsigned bit_size)1423 nir_const_value_as_float(nir_const_value value, unsigned bit_size)
1424 {
1425    /* clang-format off */
1426    switch (bit_size) {
1427    case 16: return _mesa_half_to_float(value.u16);
1428    case 32: return value.f32;
1429    case 64: return value.f64;
1430    default: unreachable("Invalid bit size");
1431    }
1432    /* clang-format on */
1433 }
1434 
1435 nir_const_value *
nir_src_as_const_value(nir_src src)1436 nir_src_as_const_value(nir_src src)
1437 {
1438    if (src.ssa->parent_instr->type != nir_instr_type_load_const)
1439       return NULL;
1440 
1441    nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1442 
1443    return load->value;
1444 }
1445 
1446 const char *
nir_src_as_string(nir_src src)1447 nir_src_as_string(nir_src src)
1448 {
1449    nir_debug_info_instr *di = nir_src_as_debug_info(src);
1450    if (di && di->type == nir_debug_info_string)
1451       return di->string;
1452 
1453    return NULL;
1454 }
1455 
1456 /**
1457  * Returns true if the source is known to be always uniform. Otherwise it
1458  * returns false which means it may or may not be uniform but it can't be
1459  * determined.
1460  *
1461  * For a more precise analysis of uniform values, use nir_divergence_analysis.
1462  */
1463 bool
nir_src_is_always_uniform(nir_src src)1464 nir_src_is_always_uniform(nir_src src)
1465 {
1466    /* Constants are trivially uniform */
1467    if (src.ssa->parent_instr->type == nir_instr_type_load_const)
1468       return true;
1469 
1470    if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
1471       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
1472       /* As are uniform variables */
1473       if (intr->intrinsic == nir_intrinsic_load_uniform &&
1474           nir_src_is_always_uniform(intr->src[0]))
1475          return true;
1476       /* From the Vulkan specification 15.6.1. Push Constant Interface:
1477        * "Any member of a push constant block that is declared as an array must
1478        * only be accessed with dynamically uniform indices."
1479        */
1480       if (intr->intrinsic == nir_intrinsic_load_push_constant)
1481          return true;
1482       if (intr->intrinsic == nir_intrinsic_load_deref &&
1483           nir_deref_mode_is(nir_src_as_deref(intr->src[0]), nir_var_mem_push_const))
1484          return true;
1485    }
1486 
1487    /* Operating together uniform expressions produces a uniform result */
1488    if (src.ssa->parent_instr->type == nir_instr_type_alu) {
1489       nir_alu_instr *alu = nir_instr_as_alu(src.ssa->parent_instr);
1490       for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
1491          if (!nir_src_is_always_uniform(alu->src[i].src))
1492             return false;
1493       }
1494 
1495       return true;
1496    }
1497 
1498    /* XXX: this could have many more tests, such as when a sampler function is
1499     * called with uniform arguments.
1500     */
1501    return false;
1502 }
1503 
1504 nir_block *
nir_src_get_block(nir_src * src)1505 nir_src_get_block(nir_src *src)
1506 {
1507    if (nir_src_is_if(src))
1508       return nir_cf_node_cf_tree_prev(&nir_src_parent_if(src)->cf_node);
1509    else if (nir_src_parent_instr(src)->type == nir_instr_type_phi)
1510       return list_entry(src, nir_phi_src, src)->pred;
1511    else
1512       return nir_src_parent_instr(src)->block;
1513 }
1514 
1515 static void
src_remove_all_uses(nir_src * src)1516 src_remove_all_uses(nir_src *src)
1517 {
1518    if (src && src_is_valid(src))
1519       list_del(&src->use_link);
1520 }
1521 
1522 static void
src_add_all_uses(nir_src * src,nir_instr * parent_instr,nir_if * parent_if)1523 src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
1524 {
1525    if (!src)
1526       return;
1527 
1528    if (!src_is_valid(src))
1529       return;
1530 
1531    if (parent_instr) {
1532       nir_src_set_parent_instr(src, parent_instr);
1533    } else {
1534       assert(parent_if);
1535       nir_src_set_parent_if(src, parent_if);
1536    }
1537 
1538    list_addtail(&src->use_link, &src->ssa->uses);
1539 }
1540 
1541 void
nir_instr_init_src(nir_instr * instr,nir_src * src,nir_def * def)1542 nir_instr_init_src(nir_instr *instr, nir_src *src, nir_def *def)
1543 {
1544    *src = nir_src_for_ssa(def);
1545    src_add_all_uses(src, instr, NULL);
1546 }
1547 
1548 void
nir_instr_clear_src(nir_instr * instr,nir_src * src)1549 nir_instr_clear_src(nir_instr *instr, nir_src *src)
1550 {
1551    src_remove_all_uses(src);
1552    *src = NIR_SRC_INIT;
1553 }
1554 
1555 void
nir_instr_move_src(nir_instr * dest_instr,nir_src * dest,nir_src * src)1556 nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
1557 {
1558    assert(!src_is_valid(dest) || nir_src_parent_instr(dest) == dest_instr);
1559 
1560    src_remove_all_uses(dest);
1561    src_remove_all_uses(src);
1562    *dest = *src;
1563    *src = NIR_SRC_INIT;
1564    src_add_all_uses(dest, dest_instr, NULL);
1565 }
1566 
1567 void
nir_def_init(nir_instr * instr,nir_def * def,unsigned num_components,unsigned bit_size)1568 nir_def_init(nir_instr *instr, nir_def *def,
1569              unsigned num_components,
1570              unsigned bit_size)
1571 {
1572    def->parent_instr = instr;
1573    list_inithead(&def->uses);
1574    def->num_components = num_components;
1575    def->bit_size = bit_size;
1576    def->divergent = true; /* This is the safer default */
1577    def->loop_invariant = false;
1578 
1579    if (instr->block) {
1580       nir_function_impl *impl =
1581          nir_cf_node_get_function(&instr->block->cf_node);
1582 
1583       def->index = impl->ssa_alloc++;
1584 
1585       impl->valid_metadata &= ~nir_metadata_live_defs;
1586    } else {
1587       def->index = UINT_MAX;
1588    }
1589 }
1590 
1591 void
nir_def_rewrite_uses(nir_def * def,nir_def * new_ssa)1592 nir_def_rewrite_uses(nir_def *def, nir_def *new_ssa)
1593 {
1594    assert(def != new_ssa);
1595    nir_foreach_use_including_if_safe(use_src, def) {
1596       nir_src_rewrite(use_src, new_ssa);
1597    }
1598 }
1599 
1600 void
nir_def_rewrite_uses_src(nir_def * def,nir_src new_src)1601 nir_def_rewrite_uses_src(nir_def *def, nir_src new_src)
1602 {
1603    nir_def_rewrite_uses(def, new_src.ssa);
1604 }
1605 
1606 static bool
is_instr_between(nir_instr * start,nir_instr * end,nir_instr * between)1607 is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
1608 {
1609    assert(start->block == end->block);
1610 
1611    if (between->block != start->block)
1612       return false;
1613 
1614    /* Search backwards looking for "between" */
1615    while (start != end) {
1616       if (between == end)
1617          return true;
1618 
1619       end = nir_instr_prev(end);
1620       assert(end);
1621    }
1622 
1623    return false;
1624 }
1625 
1626 /* Replaces all uses of the given SSA def with the given source but only if
1627  * the use comes after the after_me instruction.  This can be useful if you
1628  * are emitting code to fix up the result of some instruction: you can freely
1629  * use the result in that code and then call rewrite_uses_after and pass the
1630  * last fixup instruction as after_me and it will replace all of the uses you
1631  * want without touching the fixup code.
1632  *
1633  * This function assumes that after_me is in the same block as
1634  * def->parent_instr and that after_me comes after def->parent_instr.
1635  */
1636 void
nir_def_rewrite_uses_after(nir_def * def,nir_def * new_ssa,nir_instr * after_me)1637 nir_def_rewrite_uses_after(nir_def *def, nir_def *new_ssa,
1638                            nir_instr *after_me)
1639 {
1640    if (def == new_ssa)
1641       return;
1642 
1643    nir_foreach_use_including_if_safe(use_src, def) {
1644       if (!nir_src_is_if(use_src)) {
1645          assert(nir_src_parent_instr(use_src) != def->parent_instr);
1646 
1647          /* Since def already dominates all of its uses, the only way a use can
1648           * not be dominated by after_me is if it is between def and after_me in
1649           * the instruction list.
1650           */
1651          if (is_instr_between(def->parent_instr, after_me, nir_src_parent_instr(use_src)))
1652             continue;
1653       }
1654 
1655       nir_src_rewrite(use_src, new_ssa);
1656    }
1657 }
1658 
1659 static nir_def *
get_store_value(nir_intrinsic_instr * intrin)1660 get_store_value(nir_intrinsic_instr *intrin)
1661 {
1662    assert(nir_intrinsic_has_write_mask(intrin));
1663    /* deref stores have the deref in src[0] and the store value in src[1] */
1664    if (intrin->intrinsic == nir_intrinsic_store_deref ||
1665        intrin->intrinsic == nir_intrinsic_store_deref_block_intel)
1666       return intrin->src[1].ssa;
1667 
1668    /* all other stores have the store value in src[0] */
1669    return intrin->src[0].ssa;
1670 }
1671 
1672 nir_component_mask_t
nir_src_components_read(const nir_src * src)1673 nir_src_components_read(const nir_src *src)
1674 {
1675    assert(nir_src_parent_instr(src));
1676 
1677    if (nir_src_parent_instr(src)->type == nir_instr_type_alu) {
1678       nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(src));
1679       nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src);
1680       int src_idx = alu_src - &alu->src[0];
1681       assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
1682       return nir_alu_instr_src_read_mask(alu, src_idx);
1683    } else if (nir_src_parent_instr(src)->type == nir_instr_type_intrinsic) {
1684       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(nir_src_parent_instr(src));
1685       if (nir_intrinsic_has_write_mask(intrin) && src->ssa == get_store_value(intrin))
1686          return nir_intrinsic_write_mask(intrin);
1687       else
1688          return (1 << src->ssa->num_components) - 1;
1689    } else {
1690       return (1 << src->ssa->num_components) - 1;
1691    }
1692 }
1693 
1694 nir_component_mask_t
nir_def_components_read(const nir_def * def)1695 nir_def_components_read(const nir_def *def)
1696 {
1697    nir_component_mask_t read_mask = 0;
1698 
1699    nir_foreach_use_including_if(use, def) {
1700       read_mask |= nir_src_is_if(use) ? 1 : nir_src_components_read(use);
1701 
1702       if (read_mask == (1 << def->num_components) - 1)
1703          return read_mask;
1704    }
1705 
1706    return read_mask;
1707 }
1708 
1709 bool
nir_def_all_uses_are_fsat(const nir_def * def)1710 nir_def_all_uses_are_fsat(const nir_def *def)
1711 {
1712    nir_foreach_use(src, def) {
1713       if (nir_src_is_if(src))
1714          return false;
1715 
1716       nir_instr *use = nir_src_parent_instr(src);
1717       if (use->type != nir_instr_type_alu)
1718          return false;
1719 
1720       nir_alu_instr *alu = nir_instr_as_alu(use);
1721       if (alu->op != nir_op_fsat)
1722          return false;
1723    }
1724 
1725    return true;
1726 }
1727 
1728 bool
nir_def_all_uses_ignore_sign_bit(const nir_def * def)1729 nir_def_all_uses_ignore_sign_bit(const nir_def *def)
1730 {
1731    nir_foreach_use(use, def) {
1732       if (nir_src_is_if(use))
1733          return false;
1734       nir_instr *instr = nir_src_parent_instr(use);
1735 
1736       if (instr->type != nir_instr_type_alu)
1737          return false;
1738 
1739       nir_alu_instr *alu = nir_instr_as_alu(instr);
1740       if (alu->op == nir_op_fabs) {
1741          continue;
1742       } else if (alu->op == nir_op_fmul || alu->op == nir_op_ffma) {
1743          nir_alu_src *alu_src = list_entry(use, nir_alu_src, src);
1744          unsigned src_index = alu_src - alu->src;
1745          /* a * a doesn't care about sign of a. */
1746          if (src_index < 2 && nir_alu_srcs_equal(alu, alu, 0, 1))
1747             continue;
1748       }
1749 
1750       return false;
1751    }
1752    return true;
1753 }
1754 
1755 nir_block *
nir_block_unstructured_next(nir_block * block)1756 nir_block_unstructured_next(nir_block *block)
1757 {
1758    if (block == NULL) {
1759       /* nir_foreach_block_unstructured_safe() will call this function on a
1760        * NULL block after the last iteration, but it won't use the result so
1761        * just return NULL here.
1762        */
1763       return NULL;
1764    }
1765 
1766    nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1767    if (cf_next == NULL && block->cf_node.parent->type == nir_cf_node_function)
1768       return NULL;
1769 
1770    if (cf_next && cf_next->type == nir_cf_node_block)
1771       return nir_cf_node_as_block(cf_next);
1772 
1773    return nir_block_cf_tree_next(block);
1774 }
1775 
1776 nir_block *
nir_unstructured_start_block(nir_function_impl * impl)1777 nir_unstructured_start_block(nir_function_impl *impl)
1778 {
1779    return nir_start_block(impl);
1780 }
1781 
1782 nir_block *
nir_block_cf_tree_next(nir_block * block)1783 nir_block_cf_tree_next(nir_block *block)
1784 {
1785    if (block == NULL) {
1786       /* nir_foreach_block_safe() will call this function on a NULL block
1787        * after the last iteration, but it won't use the result so just return
1788        * NULL here.
1789        */
1790       return NULL;
1791    }
1792 
1793    assert(nir_cf_node_get_function(&block->cf_node)->structured);
1794 
1795    nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1796    if (cf_next)
1797       return nir_cf_node_cf_tree_first(cf_next);
1798 
1799    nir_cf_node *parent = block->cf_node.parent;
1800    if (parent->type == nir_cf_node_function)
1801       return NULL;
1802 
1803    /* Is this the last block of a cf_node? Return the following block */
1804    if (block == nir_cf_node_cf_tree_last(parent))
1805       return nir_cf_node_as_block(nir_cf_node_next(parent));
1806 
1807    switch (parent->type) {
1808    case nir_cf_node_if: {
1809       /* We are at the end of the if. Go to the beginning of the else */
1810       nir_if *if_stmt = nir_cf_node_as_if(parent);
1811       assert(block == nir_if_last_then_block(if_stmt));
1812       return nir_if_first_else_block(if_stmt);
1813    }
1814 
1815    case nir_cf_node_loop: {
1816       /* We are at the end of the body and there is a continue construct */
1817       nir_loop *loop = nir_cf_node_as_loop(parent);
1818       assert(block == nir_loop_last_block(loop) &&
1819              nir_loop_has_continue_construct(loop));
1820       return nir_loop_first_continue_block(loop);
1821    }
1822 
1823    default:
1824       unreachable("unknown cf node type");
1825    }
1826 }
1827 
1828 nir_block *
nir_block_cf_tree_prev(nir_block * block)1829 nir_block_cf_tree_prev(nir_block *block)
1830 {
1831    if (block == NULL) {
1832       /* do this for consistency with nir_block_cf_tree_next() */
1833       return NULL;
1834    }
1835 
1836    assert(nir_cf_node_get_function(&block->cf_node)->structured);
1837 
1838    nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
1839    if (cf_prev)
1840       return nir_cf_node_cf_tree_last(cf_prev);
1841 
1842    nir_cf_node *parent = block->cf_node.parent;
1843    if (parent->type == nir_cf_node_function)
1844       return NULL;
1845 
1846    /* Is this the first block of a cf_node? Return the previous block */
1847    if (block == nir_cf_node_cf_tree_first(parent))
1848       return nir_cf_node_as_block(nir_cf_node_prev(parent));
1849 
1850    switch (parent->type) {
1851    case nir_cf_node_if: {
1852       /* We are at the beginning of the else. Go to the end of the if */
1853       nir_if *if_stmt = nir_cf_node_as_if(parent);
1854       assert(block == nir_if_first_else_block(if_stmt));
1855       return nir_if_last_then_block(if_stmt);
1856    }
1857    case nir_cf_node_loop: {
1858       /* We are at the beginning of the continue construct. */
1859       nir_loop *loop = nir_cf_node_as_loop(parent);
1860       assert(nir_loop_has_continue_construct(loop) &&
1861              block == nir_loop_first_continue_block(loop));
1862       return nir_loop_last_block(loop);
1863    }
1864 
1865    default:
1866       unreachable("unknown cf node type");
1867    }
1868 }
1869 
1870 nir_block *
nir_cf_node_cf_tree_first(nir_cf_node * node)1871 nir_cf_node_cf_tree_first(nir_cf_node *node)
1872 {
1873    switch (node->type) {
1874    case nir_cf_node_function: {
1875       nir_function_impl *impl = nir_cf_node_as_function(node);
1876       return nir_start_block(impl);
1877    }
1878 
1879    case nir_cf_node_if: {
1880       nir_if *if_stmt = nir_cf_node_as_if(node);
1881       return nir_if_first_then_block(if_stmt);
1882    }
1883 
1884    case nir_cf_node_loop: {
1885       nir_loop *loop = nir_cf_node_as_loop(node);
1886       return nir_loop_first_block(loop);
1887    }
1888 
1889    case nir_cf_node_block: {
1890       return nir_cf_node_as_block(node);
1891    }
1892 
1893    default:
1894       unreachable("unknown node type");
1895    }
1896 }
1897 
1898 nir_block *
nir_cf_node_cf_tree_last(nir_cf_node * node)1899 nir_cf_node_cf_tree_last(nir_cf_node *node)
1900 {
1901    switch (node->type) {
1902    case nir_cf_node_function: {
1903       nir_function_impl *impl = nir_cf_node_as_function(node);
1904       return nir_impl_last_block(impl);
1905    }
1906 
1907    case nir_cf_node_if: {
1908       nir_if *if_stmt = nir_cf_node_as_if(node);
1909       return nir_if_last_else_block(if_stmt);
1910    }
1911 
1912    case nir_cf_node_loop: {
1913       nir_loop *loop = nir_cf_node_as_loop(node);
1914       if (nir_loop_has_continue_construct(loop))
1915          return nir_loop_last_continue_block(loop);
1916       else
1917          return nir_loop_last_block(loop);
1918    }
1919 
1920    case nir_cf_node_block: {
1921       return nir_cf_node_as_block(node);
1922    }
1923 
1924    default:
1925       unreachable("unknown node type");
1926    }
1927 }
1928 
1929 nir_block *
nir_cf_node_cf_tree_next(nir_cf_node * node)1930 nir_cf_node_cf_tree_next(nir_cf_node *node)
1931 {
1932    if (node->type == nir_cf_node_block)
1933       return nir_block_cf_tree_next(nir_cf_node_as_block(node));
1934    else if (node->type == nir_cf_node_function)
1935       return NULL;
1936    else
1937       return nir_cf_node_as_block(nir_cf_node_next(node));
1938 }
1939 
1940 nir_block *
nir_cf_node_cf_tree_prev(nir_cf_node * node)1941 nir_cf_node_cf_tree_prev(nir_cf_node *node)
1942 {
1943    if (node->type == nir_cf_node_block)
1944       return nir_block_cf_tree_prev(nir_cf_node_as_block(node));
1945    else if (node->type == nir_cf_node_function)
1946       return NULL;
1947    else
1948       return nir_cf_node_as_block(nir_cf_node_prev(node));
1949 }
1950 
1951 nir_if *
nir_block_get_following_if(nir_block * block)1952 nir_block_get_following_if(nir_block *block)
1953 {
1954    if (exec_node_is_tail_sentinel(&block->cf_node.node))
1955       return NULL;
1956 
1957    if (nir_cf_node_is_last(&block->cf_node))
1958       return NULL;
1959 
1960    nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1961 
1962    if (next_node->type != nir_cf_node_if)
1963       return NULL;
1964 
1965    return nir_cf_node_as_if(next_node);
1966 }
1967 
1968 nir_loop *
nir_block_get_following_loop(nir_block * block)1969 nir_block_get_following_loop(nir_block *block)
1970 {
1971    if (exec_node_is_tail_sentinel(&block->cf_node.node))
1972       return NULL;
1973 
1974    if (nir_cf_node_is_last(&block->cf_node))
1975       return NULL;
1976 
1977    nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1978 
1979    if (next_node->type != nir_cf_node_loop)
1980       return NULL;
1981 
1982    return nir_cf_node_as_loop(next_node);
1983 }
1984 
1985 static int
compare_block_index(const void * p1,const void * p2)1986 compare_block_index(const void *p1, const void *p2)
1987 {
1988    const nir_block *block1 = *((const nir_block **)p1);
1989    const nir_block *block2 = *((const nir_block **)p2);
1990 
1991    return (int)block1->index - (int)block2->index;
1992 }
1993 
1994 nir_block **
nir_block_get_predecessors_sorted(const nir_block * block,void * mem_ctx)1995 nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx)
1996 {
1997    nir_block **preds =
1998       ralloc_array(mem_ctx, nir_block *, block->predecessors->entries);
1999 
2000    unsigned i = 0;
2001    set_foreach(block->predecessors, entry)
2002       preds[i++] = (nir_block *)entry->key;
2003    assert(i == block->predecessors->entries);
2004 
2005    qsort(preds, block->predecessors->entries, sizeof(nir_block *),
2006          compare_block_index);
2007 
2008    return preds;
2009 }
2010 
2011 void
nir_index_blocks(nir_function_impl * impl)2012 nir_index_blocks(nir_function_impl *impl)
2013 {
2014    unsigned index = 0;
2015 
2016    if (impl->valid_metadata & nir_metadata_block_index)
2017       return;
2018 
2019    nir_foreach_block_unstructured(block, impl) {
2020       block->index = index++;
2021    }
2022 
2023    /* The end_block isn't really part of the program, which is why its index
2024     * is >= num_blocks.
2025     */
2026    impl->num_blocks = impl->end_block->index = index;
2027 }
2028 
2029 static bool
index_ssa_def_cb(nir_def * def,void * state)2030 index_ssa_def_cb(nir_def *def, void *state)
2031 {
2032    unsigned *index = (unsigned *)state;
2033    def->index = (*index)++;
2034 
2035    return true;
2036 }
2037 
2038 /**
2039  * The indices are applied top-to-bottom which has the very nice property
2040  * that, if A dominates B, then A->index <= B->index.
2041  */
2042 void
nir_index_ssa_defs(nir_function_impl * impl)2043 nir_index_ssa_defs(nir_function_impl *impl)
2044 {
2045    unsigned index = 0;
2046 
2047    impl->valid_metadata &= ~nir_metadata_live_defs;
2048 
2049    nir_foreach_block_unstructured(block, impl) {
2050       nir_foreach_instr(instr, block)
2051          nir_foreach_def(instr, index_ssa_def_cb, &index);
2052    }
2053 
2054    impl->ssa_alloc = index;
2055 }
2056 
2057 /**
2058  * The indices are applied top-to-bottom which has the very nice property
2059  * that, if A dominates B, then A->index <= B->index.
2060  */
2061 unsigned
nir_index_instrs(nir_function_impl * impl)2062 nir_index_instrs(nir_function_impl *impl)
2063 {
2064    unsigned index = 0;
2065 
2066    nir_foreach_block(block, impl) {
2067       block->start_ip = index++;
2068 
2069       nir_foreach_instr(instr, block)
2070          instr->index = index++;
2071 
2072       block->end_ip = index++;
2073    }
2074 
2075    return index;
2076 }
2077 
2078 void
nir_shader_clear_pass_flags(nir_shader * shader)2079 nir_shader_clear_pass_flags(nir_shader *shader)
2080 {
2081    nir_foreach_function_impl(impl, shader) {
2082       nir_foreach_block(block, impl) {
2083          nir_foreach_instr(instr, block) {
2084             instr->pass_flags = 0;
2085          }
2086       }
2087    }
2088 }
2089 
2090 unsigned
nir_shader_index_vars(nir_shader * shader,nir_variable_mode modes)2091 nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes)
2092 {
2093    unsigned count = 0;
2094    nir_foreach_variable_with_modes(var, shader, modes)
2095       var->index = count++;
2096    return count;
2097 }
2098 
2099 unsigned
nir_function_impl_index_vars(nir_function_impl * impl)2100 nir_function_impl_index_vars(nir_function_impl *impl)
2101 {
2102    unsigned count = 0;
2103    nir_foreach_function_temp_variable(var, impl)
2104       var->index = count++;
2105    return count;
2106 }
2107 
2108 static nir_instr *
cursor_next_instr(nir_cursor cursor)2109 cursor_next_instr(nir_cursor cursor)
2110 {
2111    switch (cursor.option) {
2112    case nir_cursor_before_block:
2113       for (nir_block *block = cursor.block; block;
2114            block = nir_block_cf_tree_next(block)) {
2115          nir_instr *instr = nir_block_first_instr(block);
2116          if (instr)
2117             return instr;
2118       }
2119       return NULL;
2120 
2121    case nir_cursor_after_block:
2122       cursor.block = nir_block_cf_tree_next(cursor.block);
2123       if (cursor.block == NULL)
2124          return NULL;
2125 
2126       cursor.option = nir_cursor_before_block;
2127       return cursor_next_instr(cursor);
2128 
2129    case nir_cursor_before_instr:
2130       return cursor.instr;
2131 
2132    case nir_cursor_after_instr:
2133       if (nir_instr_next(cursor.instr))
2134          return nir_instr_next(cursor.instr);
2135 
2136       cursor.option = nir_cursor_after_block;
2137       cursor.block = cursor.instr->block;
2138       return cursor_next_instr(cursor);
2139    }
2140 
2141    unreachable("Inavlid cursor option");
2142 }
2143 
2144 bool
nir_function_impl_lower_instructions(nir_function_impl * impl,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2145 nir_function_impl_lower_instructions(nir_function_impl *impl,
2146                                      nir_instr_filter_cb filter,
2147                                      nir_lower_instr_cb lower,
2148                                      void *cb_data)
2149 {
2150    nir_builder b = nir_builder_create(impl);
2151 
2152    nir_metadata preserved = nir_metadata_control_flow;
2153 
2154    bool progress = false;
2155    nir_cursor iter = nir_before_impl(impl);
2156    nir_instr *instr;
2157    while ((instr = cursor_next_instr(iter)) != NULL) {
2158       if (filter && !filter(instr, cb_data)) {
2159          iter = nir_after_instr(instr);
2160          continue;
2161       }
2162 
2163       nir_def *old_def = nir_instr_def(instr);
2164       struct list_head old_uses;
2165       if (old_def != NULL) {
2166          /* We're about to ask the callback to generate a replacement for instr.
2167           * Save off the uses from instr's SSA def so we know what uses to
2168           * rewrite later.  If we use nir_def_rewrite_uses, it fails in the
2169           * case where the generated replacement code uses the result of instr
2170           * itself.  If we use nir_def_rewrite_uses_after (which is the
2171           * normal solution to this problem), it doesn't work well if control-
2172           * flow is inserted as part of the replacement, doesn't handle cases
2173           * where the replacement is something consumed by instr, and suffers
2174           * from performance issues.  This is the only way to 100% guarantee
2175           * that we rewrite the correct set efficiently.
2176           */
2177 
2178          list_replace(&old_def->uses, &old_uses);
2179          list_inithead(&old_def->uses);
2180       }
2181 
2182       b.cursor = nir_after_instr(instr);
2183       nir_def *new_def = lower(&b, instr, cb_data);
2184       if (new_def && new_def != NIR_LOWER_INSTR_PROGRESS &&
2185           new_def != NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2186          assert(old_def != NULL);
2187          if (new_def->parent_instr->block != instr->block)
2188             preserved = nir_metadata_none;
2189 
2190          list_for_each_entry_safe(nir_src, use_src, &old_uses, use_link)
2191             nir_src_rewrite(use_src, new_def);
2192 
2193          if (nir_def_is_unused(old_def)) {
2194             iter = nir_instr_free_and_dce(instr);
2195          } else {
2196             iter = nir_after_instr(instr);
2197          }
2198          progress = true;
2199       } else {
2200          /* We didn't end up lowering after all.  Put the uses back */
2201          if (old_def)
2202             list_replace(&old_uses, &old_def->uses);
2203 
2204          if (new_def == NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2205             /* Only instructions without a return value can be removed like this */
2206             assert(!old_def);
2207             iter = nir_instr_free_and_dce(instr);
2208             progress = true;
2209          } else
2210             iter = nir_after_instr(instr);
2211 
2212          if (new_def == NIR_LOWER_INSTR_PROGRESS)
2213             progress = true;
2214       }
2215    }
2216 
2217    if (progress) {
2218       nir_metadata_preserve(impl, preserved);
2219    } else {
2220       nir_metadata_preserve(impl, nir_metadata_all);
2221    }
2222 
2223    return progress;
2224 }
2225 
2226 bool
nir_shader_lower_instructions(nir_shader * shader,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2227 nir_shader_lower_instructions(nir_shader *shader,
2228                               nir_instr_filter_cb filter,
2229                               nir_lower_instr_cb lower,
2230                               void *cb_data)
2231 {
2232    bool progress = false;
2233 
2234    nir_foreach_function_impl(impl, shader) {
2235       if (nir_function_impl_lower_instructions(impl, filter, lower, cb_data))
2236          progress = true;
2237    }
2238 
2239    return progress;
2240 }
2241 
2242 /**
2243  * Returns true if the shader supports quad-based implicit derivatives on
2244  * texture sampling.
2245  */
2246 bool
nir_shader_supports_implicit_lod(nir_shader * shader)2247 nir_shader_supports_implicit_lod(nir_shader *shader)
2248 {
2249    return (shader->info.stage == MESA_SHADER_FRAGMENT ||
2250            (gl_shader_stage_uses_workgroup(shader->info.stage) &&
2251             shader->info.derivative_group != DERIVATIVE_GROUP_NONE));
2252 }
2253 
2254 nir_intrinsic_op
nir_intrinsic_from_system_value(gl_system_value val)2255 nir_intrinsic_from_system_value(gl_system_value val)
2256 {
2257    switch (val) {
2258    case SYSTEM_VALUE_VERTEX_ID:
2259       return nir_intrinsic_load_vertex_id;
2260    case SYSTEM_VALUE_INSTANCE_ID:
2261       return nir_intrinsic_load_instance_id;
2262    case SYSTEM_VALUE_DRAW_ID:
2263       return nir_intrinsic_load_draw_id;
2264    case SYSTEM_VALUE_BASE_INSTANCE:
2265       return nir_intrinsic_load_base_instance;
2266    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
2267       return nir_intrinsic_load_vertex_id_zero_base;
2268    case SYSTEM_VALUE_IS_INDEXED_DRAW:
2269       return nir_intrinsic_load_is_indexed_draw;
2270    case SYSTEM_VALUE_FIRST_VERTEX:
2271       return nir_intrinsic_load_first_vertex;
2272    case SYSTEM_VALUE_BASE_VERTEX:
2273       return nir_intrinsic_load_base_vertex;
2274    case SYSTEM_VALUE_INVOCATION_ID:
2275       return nir_intrinsic_load_invocation_id;
2276    case SYSTEM_VALUE_FRAG_COORD:
2277       return nir_intrinsic_load_frag_coord;
2278    case SYSTEM_VALUE_PIXEL_COORD:
2279       return nir_intrinsic_load_pixel_coord;
2280    case SYSTEM_VALUE_POINT_COORD:
2281       return nir_intrinsic_load_point_coord;
2282    case SYSTEM_VALUE_LINE_COORD:
2283       return nir_intrinsic_load_line_coord;
2284    case SYSTEM_VALUE_FRONT_FACE:
2285       return nir_intrinsic_load_front_face;
2286    case SYSTEM_VALUE_FRONT_FACE_FSIGN:
2287       return nir_intrinsic_load_front_face_fsign;
2288    case SYSTEM_VALUE_SAMPLE_ID:
2289       return nir_intrinsic_load_sample_id;
2290    case SYSTEM_VALUE_SAMPLE_POS:
2291       return nir_intrinsic_load_sample_pos;
2292    case SYSTEM_VALUE_SAMPLE_POS_OR_CENTER:
2293       return nir_intrinsic_load_sample_pos_or_center;
2294    case SYSTEM_VALUE_SAMPLE_MASK_IN:
2295       return nir_intrinsic_load_sample_mask_in;
2296    case SYSTEM_VALUE_LAYER_ID:
2297       return nir_intrinsic_load_layer_id;
2298    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
2299       return nir_intrinsic_load_local_invocation_id;
2300    case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
2301       return nir_intrinsic_load_local_invocation_index;
2302    case SYSTEM_VALUE_WORKGROUP_ID:
2303       return nir_intrinsic_load_workgroup_id;
2304    case SYSTEM_VALUE_BASE_WORKGROUP_ID:
2305       return nir_intrinsic_load_base_workgroup_id;
2306    case SYSTEM_VALUE_WORKGROUP_INDEX:
2307       return nir_intrinsic_load_workgroup_index;
2308    case SYSTEM_VALUE_NUM_WORKGROUPS:
2309       return nir_intrinsic_load_num_workgroups;
2310    case SYSTEM_VALUE_PRIMITIVE_ID:
2311       return nir_intrinsic_load_primitive_id;
2312    case SYSTEM_VALUE_TESS_COORD:
2313       return nir_intrinsic_load_tess_coord;
2314    case SYSTEM_VALUE_TESS_LEVEL_OUTER:
2315       return nir_intrinsic_load_tess_level_outer;
2316    case SYSTEM_VALUE_TESS_LEVEL_INNER:
2317       return nir_intrinsic_load_tess_level_inner;
2318    case SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT:
2319       return nir_intrinsic_load_tess_level_outer_default;
2320    case SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT:
2321       return nir_intrinsic_load_tess_level_inner_default;
2322    case SYSTEM_VALUE_VERTICES_IN:
2323       return nir_intrinsic_load_patch_vertices_in;
2324    case SYSTEM_VALUE_HELPER_INVOCATION:
2325       return nir_intrinsic_load_helper_invocation;
2326    case SYSTEM_VALUE_COLOR0:
2327       return nir_intrinsic_load_color0;
2328    case SYSTEM_VALUE_COLOR1:
2329       return nir_intrinsic_load_color1;
2330    case SYSTEM_VALUE_VIEW_INDEX:
2331       return nir_intrinsic_load_view_index;
2332    case SYSTEM_VALUE_SUBGROUP_SIZE:
2333       return nir_intrinsic_load_subgroup_size;
2334    case SYSTEM_VALUE_SUBGROUP_INVOCATION:
2335       return nir_intrinsic_load_subgroup_invocation;
2336    case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
2337       return nir_intrinsic_load_subgroup_eq_mask;
2338    case SYSTEM_VALUE_SUBGROUP_GE_MASK:
2339       return nir_intrinsic_load_subgroup_ge_mask;
2340    case SYSTEM_VALUE_SUBGROUP_GT_MASK:
2341       return nir_intrinsic_load_subgroup_gt_mask;
2342    case SYSTEM_VALUE_SUBGROUP_LE_MASK:
2343       return nir_intrinsic_load_subgroup_le_mask;
2344    case SYSTEM_VALUE_SUBGROUP_LT_MASK:
2345       return nir_intrinsic_load_subgroup_lt_mask;
2346    case SYSTEM_VALUE_NUM_SUBGROUPS:
2347       return nir_intrinsic_load_num_subgroups;
2348    case SYSTEM_VALUE_SUBGROUP_ID:
2349       return nir_intrinsic_load_subgroup_id;
2350    case SYSTEM_VALUE_WORKGROUP_SIZE:
2351       return nir_intrinsic_load_workgroup_size;
2352    case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
2353       return nir_intrinsic_load_global_invocation_id;
2354    case SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID:
2355       return nir_intrinsic_load_base_global_invocation_id;
2356    case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX:
2357       return nir_intrinsic_load_global_invocation_index;
2358    case SYSTEM_VALUE_GLOBAL_GROUP_SIZE:
2359       return nir_intrinsic_load_global_size;
2360    case SYSTEM_VALUE_WORK_DIM:
2361       return nir_intrinsic_load_work_dim;
2362    case SYSTEM_VALUE_USER_DATA_AMD:
2363       return nir_intrinsic_load_user_data_amd;
2364    case SYSTEM_VALUE_RAY_LAUNCH_ID:
2365       return nir_intrinsic_load_ray_launch_id;
2366    case SYSTEM_VALUE_RAY_LAUNCH_SIZE:
2367       return nir_intrinsic_load_ray_launch_size;
2368    case SYSTEM_VALUE_RAY_WORLD_ORIGIN:
2369       return nir_intrinsic_load_ray_world_origin;
2370    case SYSTEM_VALUE_RAY_WORLD_DIRECTION:
2371       return nir_intrinsic_load_ray_world_direction;
2372    case SYSTEM_VALUE_RAY_OBJECT_ORIGIN:
2373       return nir_intrinsic_load_ray_object_origin;
2374    case SYSTEM_VALUE_RAY_OBJECT_DIRECTION:
2375       return nir_intrinsic_load_ray_object_direction;
2376    case SYSTEM_VALUE_RAY_T_MIN:
2377       return nir_intrinsic_load_ray_t_min;
2378    case SYSTEM_VALUE_RAY_T_MAX:
2379       return nir_intrinsic_load_ray_t_max;
2380    case SYSTEM_VALUE_RAY_OBJECT_TO_WORLD:
2381       return nir_intrinsic_load_ray_object_to_world;
2382    case SYSTEM_VALUE_RAY_WORLD_TO_OBJECT:
2383       return nir_intrinsic_load_ray_world_to_object;
2384    case SYSTEM_VALUE_RAY_HIT_KIND:
2385       return nir_intrinsic_load_ray_hit_kind;
2386    case SYSTEM_VALUE_RAY_FLAGS:
2387       return nir_intrinsic_load_ray_flags;
2388    case SYSTEM_VALUE_RAY_GEOMETRY_INDEX:
2389       return nir_intrinsic_load_ray_geometry_index;
2390    case SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX:
2391       return nir_intrinsic_load_ray_instance_custom_index;
2392    case SYSTEM_VALUE_CULL_MASK:
2393       return nir_intrinsic_load_cull_mask;
2394    case SYSTEM_VALUE_RAY_TRIANGLE_VERTEX_POSITIONS:
2395       return nir_intrinsic_load_ray_triangle_vertex_positions;
2396    case SYSTEM_VALUE_MESH_VIEW_COUNT:
2397       return nir_intrinsic_load_mesh_view_count;
2398    case SYSTEM_VALUE_FRAG_SHADING_RATE:
2399       return nir_intrinsic_load_frag_shading_rate;
2400    case SYSTEM_VALUE_FULLY_COVERED:
2401       return nir_intrinsic_load_fully_covered;
2402    case SYSTEM_VALUE_FRAG_SIZE:
2403       return nir_intrinsic_load_frag_size;
2404    case SYSTEM_VALUE_FRAG_INVOCATION_COUNT:
2405       return nir_intrinsic_load_frag_invocation_count;
2406    case SYSTEM_VALUE_SHADER_INDEX:
2407       return nir_intrinsic_load_shader_index;
2408    case SYSTEM_VALUE_COALESCED_INPUT_COUNT:
2409       return nir_intrinsic_load_coalesced_input_count;
2410    case SYSTEM_VALUE_WARPS_PER_SM_NV:
2411       return nir_intrinsic_load_warps_per_sm_nv;
2412    case SYSTEM_VALUE_SM_COUNT_NV:
2413       return nir_intrinsic_load_sm_count_nv;
2414    case SYSTEM_VALUE_WARP_ID_NV:
2415       return nir_intrinsic_load_warp_id_nv;
2416    case SYSTEM_VALUE_SM_ID_NV:
2417       return nir_intrinsic_load_sm_id_nv;
2418    default:
2419       return nir_num_intrinsics;
2420    }
2421 }
2422 
2423 gl_system_value
nir_system_value_from_intrinsic(nir_intrinsic_op intrin)2424 nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
2425 {
2426    switch (intrin) {
2427    case nir_intrinsic_load_vertex_id:
2428       return SYSTEM_VALUE_VERTEX_ID;
2429    case nir_intrinsic_load_instance_id:
2430       return SYSTEM_VALUE_INSTANCE_ID;
2431    case nir_intrinsic_load_draw_id:
2432       return SYSTEM_VALUE_DRAW_ID;
2433    case nir_intrinsic_load_base_instance:
2434       return SYSTEM_VALUE_BASE_INSTANCE;
2435    case nir_intrinsic_load_vertex_id_zero_base:
2436       return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
2437    case nir_intrinsic_load_first_vertex:
2438       return SYSTEM_VALUE_FIRST_VERTEX;
2439    case nir_intrinsic_load_is_indexed_draw:
2440       return SYSTEM_VALUE_IS_INDEXED_DRAW;
2441    case nir_intrinsic_load_base_vertex:
2442       return SYSTEM_VALUE_BASE_VERTEX;
2443    case nir_intrinsic_load_invocation_id:
2444       return SYSTEM_VALUE_INVOCATION_ID;
2445    case nir_intrinsic_load_frag_coord:
2446       return SYSTEM_VALUE_FRAG_COORD;
2447    case nir_intrinsic_load_pixel_coord:
2448       return SYSTEM_VALUE_PIXEL_COORD;
2449    case nir_intrinsic_load_point_coord:
2450       return SYSTEM_VALUE_POINT_COORD;
2451    case nir_intrinsic_load_line_coord:
2452       return SYSTEM_VALUE_LINE_COORD;
2453    case nir_intrinsic_load_front_face:
2454       return SYSTEM_VALUE_FRONT_FACE;
2455    case nir_intrinsic_load_front_face_fsign:
2456       return SYSTEM_VALUE_FRONT_FACE_FSIGN;
2457    case nir_intrinsic_load_sample_id:
2458       return SYSTEM_VALUE_SAMPLE_ID;
2459    case nir_intrinsic_load_sample_pos:
2460       return SYSTEM_VALUE_SAMPLE_POS;
2461    case nir_intrinsic_load_sample_pos_or_center:
2462       return SYSTEM_VALUE_SAMPLE_POS_OR_CENTER;
2463    case nir_intrinsic_load_sample_mask_in:
2464       return SYSTEM_VALUE_SAMPLE_MASK_IN;
2465    case nir_intrinsic_load_layer_id:
2466       return SYSTEM_VALUE_LAYER_ID;
2467    case nir_intrinsic_load_local_invocation_id:
2468       return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
2469    case nir_intrinsic_load_local_invocation_index:
2470       return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
2471    case nir_intrinsic_load_num_workgroups:
2472       return SYSTEM_VALUE_NUM_WORKGROUPS;
2473    case nir_intrinsic_load_workgroup_id:
2474       return SYSTEM_VALUE_WORKGROUP_ID;
2475    case nir_intrinsic_load_base_workgroup_id:
2476       return SYSTEM_VALUE_BASE_WORKGROUP_ID;
2477    case nir_intrinsic_load_workgroup_index:
2478       return SYSTEM_VALUE_WORKGROUP_INDEX;
2479    case nir_intrinsic_load_primitive_id:
2480       return SYSTEM_VALUE_PRIMITIVE_ID;
2481    case nir_intrinsic_load_tess_coord:
2482    case nir_intrinsic_load_tess_coord_xy:
2483       return SYSTEM_VALUE_TESS_COORD;
2484    case nir_intrinsic_load_tess_level_outer:
2485       return SYSTEM_VALUE_TESS_LEVEL_OUTER;
2486    case nir_intrinsic_load_tess_level_inner:
2487       return SYSTEM_VALUE_TESS_LEVEL_INNER;
2488    case nir_intrinsic_load_tess_level_outer_default:
2489       return SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT;
2490    case nir_intrinsic_load_tess_level_inner_default:
2491       return SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT;
2492    case nir_intrinsic_load_patch_vertices_in:
2493       return SYSTEM_VALUE_VERTICES_IN;
2494    case nir_intrinsic_load_helper_invocation:
2495       return SYSTEM_VALUE_HELPER_INVOCATION;
2496    case nir_intrinsic_load_color0:
2497       return SYSTEM_VALUE_COLOR0;
2498    case nir_intrinsic_load_color1:
2499       return SYSTEM_VALUE_COLOR1;
2500    case nir_intrinsic_load_view_index:
2501       return SYSTEM_VALUE_VIEW_INDEX;
2502    case nir_intrinsic_load_subgroup_size:
2503       return SYSTEM_VALUE_SUBGROUP_SIZE;
2504    case nir_intrinsic_load_subgroup_invocation:
2505       return SYSTEM_VALUE_SUBGROUP_INVOCATION;
2506    case nir_intrinsic_load_subgroup_eq_mask:
2507       return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
2508    case nir_intrinsic_load_subgroup_ge_mask:
2509       return SYSTEM_VALUE_SUBGROUP_GE_MASK;
2510    case nir_intrinsic_load_subgroup_gt_mask:
2511       return SYSTEM_VALUE_SUBGROUP_GT_MASK;
2512    case nir_intrinsic_load_subgroup_le_mask:
2513       return SYSTEM_VALUE_SUBGROUP_LE_MASK;
2514    case nir_intrinsic_load_subgroup_lt_mask:
2515       return SYSTEM_VALUE_SUBGROUP_LT_MASK;
2516    case nir_intrinsic_load_num_subgroups:
2517       return SYSTEM_VALUE_NUM_SUBGROUPS;
2518    case nir_intrinsic_load_subgroup_id:
2519       return SYSTEM_VALUE_SUBGROUP_ID;
2520    case nir_intrinsic_load_workgroup_size:
2521       return SYSTEM_VALUE_WORKGROUP_SIZE;
2522    case nir_intrinsic_load_global_invocation_id:
2523       return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
2524    case nir_intrinsic_load_base_global_invocation_id:
2525       return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID;
2526    case nir_intrinsic_load_global_invocation_index:
2527       return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX;
2528    case nir_intrinsic_load_global_size:
2529       return SYSTEM_VALUE_GLOBAL_GROUP_SIZE;
2530    case nir_intrinsic_load_work_dim:
2531       return SYSTEM_VALUE_WORK_DIM;
2532    case nir_intrinsic_load_user_data_amd:
2533       return SYSTEM_VALUE_USER_DATA_AMD;
2534    case nir_intrinsic_load_barycentric_model:
2535       return SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL;
2536    case nir_intrinsic_load_gs_header_ir3:
2537       return SYSTEM_VALUE_GS_HEADER_IR3;
2538    case nir_intrinsic_load_tcs_header_ir3:
2539       return SYSTEM_VALUE_TCS_HEADER_IR3;
2540    case nir_intrinsic_load_ray_launch_id:
2541       return SYSTEM_VALUE_RAY_LAUNCH_ID;
2542    case nir_intrinsic_load_ray_launch_size:
2543       return SYSTEM_VALUE_RAY_LAUNCH_SIZE;
2544    case nir_intrinsic_load_ray_world_origin:
2545       return SYSTEM_VALUE_RAY_WORLD_ORIGIN;
2546    case nir_intrinsic_load_ray_world_direction:
2547       return SYSTEM_VALUE_RAY_WORLD_DIRECTION;
2548    case nir_intrinsic_load_ray_object_origin:
2549       return SYSTEM_VALUE_RAY_OBJECT_ORIGIN;
2550    case nir_intrinsic_load_ray_object_direction:
2551       return SYSTEM_VALUE_RAY_OBJECT_DIRECTION;
2552    case nir_intrinsic_load_ray_t_min:
2553       return SYSTEM_VALUE_RAY_T_MIN;
2554    case nir_intrinsic_load_ray_t_max:
2555       return SYSTEM_VALUE_RAY_T_MAX;
2556    case nir_intrinsic_load_ray_object_to_world:
2557       return SYSTEM_VALUE_RAY_OBJECT_TO_WORLD;
2558    case nir_intrinsic_load_ray_world_to_object:
2559       return SYSTEM_VALUE_RAY_WORLD_TO_OBJECT;
2560    case nir_intrinsic_load_ray_hit_kind:
2561       return SYSTEM_VALUE_RAY_HIT_KIND;
2562    case nir_intrinsic_load_ray_flags:
2563       return SYSTEM_VALUE_RAY_FLAGS;
2564    case nir_intrinsic_load_ray_geometry_index:
2565       return SYSTEM_VALUE_RAY_GEOMETRY_INDEX;
2566    case nir_intrinsic_load_ray_instance_custom_index:
2567       return SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX;
2568    case nir_intrinsic_load_cull_mask:
2569       return SYSTEM_VALUE_CULL_MASK;
2570    case nir_intrinsic_load_ray_triangle_vertex_positions:
2571       return SYSTEM_VALUE_RAY_TRIANGLE_VERTEX_POSITIONS;
2572    case nir_intrinsic_load_frag_shading_rate:
2573       return SYSTEM_VALUE_FRAG_SHADING_RATE;
2574    case nir_intrinsic_load_mesh_view_count:
2575       return SYSTEM_VALUE_MESH_VIEW_COUNT;
2576    case nir_intrinsic_load_fully_covered:
2577       return SYSTEM_VALUE_FULLY_COVERED;
2578    case nir_intrinsic_load_frag_size:
2579       return SYSTEM_VALUE_FRAG_SIZE;
2580    case nir_intrinsic_load_frag_invocation_count:
2581       return SYSTEM_VALUE_FRAG_INVOCATION_COUNT;
2582    case nir_intrinsic_load_shader_index:
2583       return SYSTEM_VALUE_SHADER_INDEX;
2584    case nir_intrinsic_load_coalesced_input_count:
2585       return SYSTEM_VALUE_COALESCED_INPUT_COUNT;
2586    case nir_intrinsic_load_warps_per_sm_nv:
2587       return SYSTEM_VALUE_WARPS_PER_SM_NV;
2588    case nir_intrinsic_load_sm_count_nv:
2589       return SYSTEM_VALUE_SM_COUNT_NV;
2590    case nir_intrinsic_load_warp_id_nv:
2591       return SYSTEM_VALUE_WARP_ID_NV;
2592    case nir_intrinsic_load_sm_id_nv:
2593       return SYSTEM_VALUE_SM_ID_NV;
2594    default:
2595       unreachable("intrinsic doesn't produce a system value");
2596    }
2597 }
2598 
2599 /* OpenGL utility method that remaps the location attributes if they are
2600  * doubles. Not needed for vulkan due the differences on the input location
2601  * count for doubles on vulkan vs OpenGL
2602  *
2603  * The bitfield returned in dual_slot is one bit for each double input slot in
2604  * the original OpenGL single-slot input numbering.  The mapping from old
2605  * locations to new locations is as follows:
2606  *
2607  *    new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc))
2608  */
2609 void
nir_remap_dual_slot_attributes(nir_shader * shader,uint64_t * dual_slot)2610 nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot)
2611 {
2612    assert(shader->info.stage == MESA_SHADER_VERTEX);
2613 
2614    *dual_slot = 0;
2615    nir_foreach_shader_in_variable(var, shader) {
2616       if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
2617          unsigned slots = glsl_count_attribute_slots(var->type, true);
2618          *dual_slot |= BITFIELD64_MASK(slots) << var->data.location;
2619       }
2620    }
2621 
2622    nir_foreach_shader_in_variable(var, shader) {
2623       var->data.location +=
2624          util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location));
2625    }
2626 }
2627 
2628 /* Returns an attribute mask that has been re-compacted using the given
2629  * dual_slot mask.
2630  */
2631 uint64_t
nir_get_single_slot_attribs_mask(uint64_t attribs,uint64_t dual_slot)2632 nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot)
2633 {
2634    while (dual_slot) {
2635       unsigned loc = u_bit_scan64(&dual_slot);
2636       /* mask of all bits up to and including loc */
2637       uint64_t mask = BITFIELD64_MASK(loc + 1);
2638       attribs = (attribs & mask) | ((attribs & ~mask) >> 1);
2639    }
2640    return attribs;
2641 }
2642 
2643 void
nir_rewrite_image_intrinsic(nir_intrinsic_instr * intrin,nir_def * src,bool bindless)2644 nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_def *src,
2645                             bool bindless)
2646 {
2647    enum gl_access_qualifier access = nir_intrinsic_access(intrin);
2648 
2649    /* Image intrinsics only have one of these */
2650    assert(!nir_intrinsic_has_src_type(intrin) ||
2651           !nir_intrinsic_has_dest_type(intrin));
2652 
2653    nir_alu_type data_type = nir_type_invalid;
2654    if (nir_intrinsic_has_src_type(intrin))
2655       data_type = nir_intrinsic_src_type(intrin);
2656    if (nir_intrinsic_has_dest_type(intrin))
2657       data_type = nir_intrinsic_dest_type(intrin);
2658 
2659    nir_atomic_op atomic_op = 0;
2660    if (nir_intrinsic_has_atomic_op(intrin))
2661       atomic_op = nir_intrinsic_atomic_op(intrin);
2662 
2663    switch (intrin->intrinsic) {
2664 #define CASE(op)                                                       \
2665    case nir_intrinsic_image_deref_##op:                                \
2666       intrin->intrinsic = bindless ? nir_intrinsic_bindless_image_##op \
2667                                    : nir_intrinsic_image_##op;         \
2668       break;
2669       CASE(load)
2670       CASE(sparse_load)
2671       CASE(store)
2672       CASE(atomic)
2673       CASE(atomic_swap)
2674       CASE(size)
2675       CASE(samples)
2676       CASE(load_raw_intel)
2677       CASE(store_raw_intel)
2678       CASE(fragment_mask_load_amd)
2679       CASE(store_block_agx)
2680 #undef CASE
2681    default:
2682       unreachable("Unhanded image intrinsic");
2683    }
2684 
2685    nir_variable *var = nir_intrinsic_get_var(intrin, 0);
2686 
2687    /* Only update the format if the intrinsic doesn't have one set */
2688    if (nir_intrinsic_format(intrin) == PIPE_FORMAT_NONE)
2689       nir_intrinsic_set_format(intrin, var->data.image.format);
2690 
2691    nir_intrinsic_set_access(intrin, access | var->data.access);
2692    if (nir_intrinsic_has_src_type(intrin))
2693       nir_intrinsic_set_src_type(intrin, data_type);
2694    if (nir_intrinsic_has_dest_type(intrin))
2695       nir_intrinsic_set_dest_type(intrin, data_type);
2696 
2697    if (nir_intrinsic_has_atomic_op(intrin))
2698       nir_intrinsic_set_atomic_op(intrin, atomic_op);
2699 
2700    nir_src_rewrite(&intrin->src[0], src);
2701 }
2702 
2703 unsigned
nir_image_intrinsic_coord_components(const nir_intrinsic_instr * instr)2704 nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr)
2705 {
2706    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
2707    int coords = glsl_get_sampler_dim_coordinate_components(dim);
2708    if (dim == GLSL_SAMPLER_DIM_CUBE)
2709       return coords;
2710    else
2711       return coords + nir_intrinsic_image_array(instr);
2712 }
2713 
2714 bool
nir_intrinsic_can_reorder(nir_intrinsic_instr * instr)2715 nir_intrinsic_can_reorder(nir_intrinsic_instr *instr)
2716 {
2717    if (nir_intrinsic_has_access(instr)) {
2718       enum gl_access_qualifier access = nir_intrinsic_access(instr);
2719       if (access & ACCESS_VOLATILE)
2720          return false;
2721       if (access & ACCESS_CAN_REORDER)
2722          return true;
2723    }
2724 
2725    const nir_intrinsic_info *info;
2726    if (instr->intrinsic == nir_intrinsic_load_deref) {
2727       nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
2728       if (nir_deref_mode_is_in_set(deref, nir_var_system_value)) {
2729          nir_variable *var = nir_deref_instr_get_variable(deref);
2730          if (!var)
2731             return false;
2732 
2733          nir_intrinsic_op sysval_op =
2734             nir_intrinsic_from_system_value((gl_system_value)var->data.location);
2735          if (sysval_op == nir_num_intrinsics)
2736             return true;
2737 
2738          info = &nir_intrinsic_infos[sysval_op];
2739       } else {
2740          return nir_deref_mode_is_in_set(deref, nir_var_read_only_modes);
2741       }
2742    } else {
2743       info = &nir_intrinsic_infos[instr->intrinsic];
2744    }
2745 
2746    return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
2747           (info->flags & NIR_INTRINSIC_CAN_REORDER);
2748 }
2749 
2750 nir_src *
nir_get_shader_call_payload_src(nir_intrinsic_instr * call)2751 nir_get_shader_call_payload_src(nir_intrinsic_instr *call)
2752 {
2753    switch (call->intrinsic) {
2754    case nir_intrinsic_trace_ray:
2755    case nir_intrinsic_rt_trace_ray:
2756       return &call->src[10];
2757    case nir_intrinsic_execute_callable:
2758    case nir_intrinsic_rt_execute_callable:
2759       return &call->src[1];
2760    default:
2761       unreachable("Not a call intrinsic");
2762       return NULL;
2763    }
2764 }
2765 
2766 nir_binding
nir_chase_binding(nir_src rsrc)2767 nir_chase_binding(nir_src rsrc)
2768 {
2769    nir_binding res = { 0 };
2770    if (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2771       const struct glsl_type *type = glsl_without_array(nir_src_as_deref(rsrc)->type);
2772       bool is_image = glsl_type_is_image(type) || glsl_type_is_sampler(type);
2773       while (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2774          nir_deref_instr *deref = nir_src_as_deref(rsrc);
2775 
2776          if (deref->deref_type == nir_deref_type_var) {
2777             res.success = true;
2778             res.var = deref->var;
2779             res.desc_set = deref->var->data.descriptor_set;
2780             res.binding = deref->var->data.binding;
2781             return res;
2782          } else if (deref->deref_type == nir_deref_type_array && is_image) {
2783             if (res.num_indices == ARRAY_SIZE(res.indices))
2784                return (nir_binding){ 0 };
2785             res.indices[res.num_indices++] = deref->arr.index;
2786          }
2787 
2788          rsrc = deref->parent;
2789       }
2790    }
2791 
2792    /* Skip copies and trimming. Trimming can appear as nir_op_mov instructions
2793     * when removing the offset from addresses. We also consider
2794     * nir_op_is_vec_or_mov() instructions to skip trimming of
2795     * vec2_index_32bit_offset addresses after lowering ALU to scalar.
2796     */
2797    unsigned num_components = nir_src_num_components(rsrc);
2798    while (true) {
2799       nir_alu_instr *alu = nir_src_as_alu_instr(rsrc);
2800       nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2801       if (alu && alu->op == nir_op_mov) {
2802          for (unsigned i = 0; i < num_components; i++) {
2803             if (alu->src[0].swizzle[i] != i)
2804                return (nir_binding){ 0 };
2805          }
2806          rsrc = alu->src[0].src;
2807       } else if (alu && nir_op_is_vec(alu->op)) {
2808          for (unsigned i = 0; i < num_components; i++) {
2809             if (alu->src[i].swizzle[0] != i || alu->src[i].src.ssa != alu->src[0].src.ssa)
2810                return (nir_binding){ 0 };
2811          }
2812          rsrc = alu->src[0].src;
2813       } else if (intrin && intrin->intrinsic == nir_intrinsic_read_first_invocation) {
2814          /* The caller might want to be aware if only the first invocation of
2815           * the indices are used.
2816           */
2817          res.read_first_invocation = true;
2818          rsrc = intrin->src[0];
2819       } else {
2820          break;
2821       }
2822    }
2823 
2824    if (nir_src_is_const(rsrc)) {
2825       /* GL binding model after deref lowering */
2826       res.success = true;
2827       /* Can't use just nir_src_as_uint. Vulkan resource index produces a
2828        * vec2. Some drivers lower it to vec1 (to handle get_ssbo_size for
2829        * example) but others just keep it around as a vec2 (v3dv).
2830        */
2831       res.binding = nir_src_comp_as_uint(rsrc, 0);
2832       return res;
2833    }
2834 
2835    /* otherwise, must be Vulkan binding model after deref lowering or GL bindless */
2836 
2837    nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2838    if (!intrin)
2839       return (nir_binding){ 0 };
2840 
2841    /* Intel resource, similar to load_vulkan_descriptor after it has been
2842     * lowered.
2843     */
2844    if (intrin->intrinsic == nir_intrinsic_resource_intel) {
2845       res.success = true;
2846       res.desc_set = nir_intrinsic_desc_set(intrin);
2847       res.binding = nir_intrinsic_binding(intrin);
2848       /* nir_intrinsic_resource_intel has 3 sources, but src[2] is included in
2849        * src[1], it is kept around for other purposes.
2850        */
2851       res.num_indices = 2;
2852       res.indices[0] = intrin->src[0];
2853       res.indices[1] = intrin->src[1];
2854       return res;
2855    }
2856 
2857    /* skip load_vulkan_descriptor */
2858    if (intrin->intrinsic == nir_intrinsic_load_vulkan_descriptor) {
2859       intrin = nir_src_as_intrinsic(intrin->src[0]);
2860       if (!intrin)
2861          return (nir_binding){ 0 };
2862    }
2863 
2864    if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
2865       return (nir_binding){ 0 };
2866 
2867    assert(res.num_indices == 0);
2868    res.success = true;
2869    res.desc_set = nir_intrinsic_desc_set(intrin);
2870    res.binding = nir_intrinsic_binding(intrin);
2871    res.num_indices = 1;
2872    res.indices[0] = intrin->src[0];
2873    return res;
2874 }
2875 
2876 nir_variable *
nir_get_binding_variable(nir_shader * shader,nir_binding binding)2877 nir_get_binding_variable(nir_shader *shader, nir_binding binding)
2878 {
2879    nir_variable *binding_var = NULL;
2880    unsigned count = 0;
2881 
2882    if (!binding.success)
2883       return NULL;
2884 
2885    if (binding.var)
2886       return binding.var;
2887 
2888    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ubo | nir_var_mem_ssbo) {
2889       if (var->data.descriptor_set == binding.desc_set && var->data.binding == binding.binding) {
2890          binding_var = var;
2891          count++;
2892       }
2893    }
2894 
2895    /* Be conservative if another variable is using the same binding/desc_set
2896     * because the access mask might be different and we can't get it reliably.
2897     */
2898    if (count > 1)
2899       return NULL;
2900 
2901    return binding_var;
2902 }
2903 
2904 nir_scalar
nir_scalar_chase_movs(nir_scalar s)2905 nir_scalar_chase_movs(nir_scalar s)
2906 {
2907    while (nir_scalar_is_alu(s)) {
2908       nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
2909       if (alu->op == nir_op_mov) {
2910          s.def = alu->src[0].src.ssa;
2911          s.comp = alu->src[0].swizzle[s.comp];
2912       } else if (nir_op_is_vec(alu->op)) {
2913          s.def = alu->src[s.comp].src.ssa;
2914          s.comp = alu->src[s.comp].swizzle[0];
2915       } else {
2916          break;
2917       }
2918    }
2919 
2920    return s;
2921 }
2922 
2923 nir_alu_type
nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)2924 nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)
2925 {
2926    switch (base_type) {
2927    /* clang-format off */
2928    case GLSL_TYPE_BOOL:    return nir_type_bool1;
2929    case GLSL_TYPE_UINT:    return nir_type_uint32;
2930    case GLSL_TYPE_INT:     return nir_type_int32;
2931    case GLSL_TYPE_UINT16:  return nir_type_uint16;
2932    case GLSL_TYPE_INT16:   return nir_type_int16;
2933    case GLSL_TYPE_UINT8:   return nir_type_uint8;
2934    case GLSL_TYPE_INT8:    return nir_type_int8;
2935    case GLSL_TYPE_UINT64:  return nir_type_uint64;
2936    case GLSL_TYPE_INT64:   return nir_type_int64;
2937    case GLSL_TYPE_FLOAT:   return nir_type_float32;
2938    case GLSL_TYPE_FLOAT16: return nir_type_float16;
2939    case GLSL_TYPE_DOUBLE:  return nir_type_float64;
2940       /* clang-format on */
2941 
2942    case GLSL_TYPE_COOPERATIVE_MATRIX:
2943    case GLSL_TYPE_SAMPLER:
2944    case GLSL_TYPE_TEXTURE:
2945    case GLSL_TYPE_IMAGE:
2946    case GLSL_TYPE_ATOMIC_UINT:
2947    case GLSL_TYPE_STRUCT:
2948    case GLSL_TYPE_INTERFACE:
2949    case GLSL_TYPE_ARRAY:
2950    case GLSL_TYPE_VOID:
2951    case GLSL_TYPE_SUBROUTINE:
2952    case GLSL_TYPE_ERROR:
2953       return nir_type_invalid;
2954    }
2955 
2956    unreachable("unknown type");
2957 }
2958 
2959 enum glsl_base_type
nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)2960 nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)
2961 {
2962    /* clang-format off */
2963    switch (base_type) {
2964    case nir_type_bool1:    return GLSL_TYPE_BOOL;
2965    case nir_type_uint32:   return GLSL_TYPE_UINT;
2966    case nir_type_int32:    return GLSL_TYPE_INT;
2967    case nir_type_uint16:   return GLSL_TYPE_UINT16;
2968    case nir_type_int16:    return GLSL_TYPE_INT16;
2969    case nir_type_uint8:    return GLSL_TYPE_UINT8;
2970    case nir_type_int8:     return GLSL_TYPE_INT8;
2971    case nir_type_uint64:   return GLSL_TYPE_UINT64;
2972    case nir_type_int64:    return GLSL_TYPE_INT64;
2973    case nir_type_float32:  return GLSL_TYPE_FLOAT;
2974    case nir_type_float16:  return GLSL_TYPE_FLOAT16;
2975    case nir_type_float64:  return GLSL_TYPE_DOUBLE;
2976    default: unreachable("Not a sized nir_alu_type");
2977    }
2978    /* clang-format on */
2979 }
2980 
2981 nir_op
nir_op_vec(unsigned num_components)2982 nir_op_vec(unsigned num_components)
2983 {
2984    /* clang-format off */
2985    switch (num_components) {
2986    case  1: return nir_op_mov;
2987    case  2: return nir_op_vec2;
2988    case  3: return nir_op_vec3;
2989    case  4: return nir_op_vec4;
2990    case  5: return nir_op_vec5;
2991    case  8: return nir_op_vec8;
2992    case 16: return nir_op_vec16;
2993    default: unreachable("bad component count");
2994    }
2995    /* clang-format on */
2996 }
2997 
2998 bool
nir_op_is_vec(nir_op op)2999 nir_op_is_vec(nir_op op)
3000 {
3001    switch (op) {
3002    case nir_op_vec2:
3003    case nir_op_vec3:
3004    case nir_op_vec4:
3005    case nir_op_vec5:
3006    case nir_op_vec8:
3007    case nir_op_vec16:
3008       return true;
3009    default:
3010       return false;
3011    }
3012 }
3013 
3014 nir_component_mask_t
nir_alu_instr_src_read_mask(const nir_alu_instr * instr,unsigned src)3015 nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src)
3016 {
3017    nir_component_mask_t read_mask = 0;
3018    for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
3019       if (!nir_alu_instr_channel_used(instr, src, c))
3020          continue;
3021 
3022       read_mask |= (1 << instr->src[src].swizzle[c]);
3023    }
3024    return read_mask;
3025 }
3026 
3027 unsigned
nir_ssa_alu_instr_src_components(const nir_alu_instr * instr,unsigned src)3028 nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
3029 {
3030    if (nir_op_infos[instr->op].input_sizes[src] > 0)
3031       return nir_op_infos[instr->op].input_sizes[src];
3032 
3033    return instr->def.num_components;
3034 }
3035 
3036 #define CASE_ALL_SIZES(op) \
3037    case op:                \
3038    case op##8:             \
3039    case op##16:            \
3040    case op##32:
3041 
3042 bool
nir_alu_instr_is_comparison(const nir_alu_instr * instr)3043 nir_alu_instr_is_comparison(const nir_alu_instr *instr)
3044 {
3045    switch (instr->op) {
3046       CASE_ALL_SIZES(nir_op_flt)
3047       CASE_ALL_SIZES(nir_op_fge)
3048       CASE_ALL_SIZES(nir_op_fltu)
3049       CASE_ALL_SIZES(nir_op_fgeu)
3050       CASE_ALL_SIZES(nir_op_feq)
3051       CASE_ALL_SIZES(nir_op_fneu)
3052       CASE_ALL_SIZES(nir_op_fequ)
3053       CASE_ALL_SIZES(nir_op_fneo)
3054       CASE_ALL_SIZES(nir_op_funord)
3055       CASE_ALL_SIZES(nir_op_ford)
3056       CASE_ALL_SIZES(nir_op_ilt)
3057       CASE_ALL_SIZES(nir_op_ult)
3058       CASE_ALL_SIZES(nir_op_ige)
3059       CASE_ALL_SIZES(nir_op_uge)
3060       CASE_ALL_SIZES(nir_op_ieq)
3061       CASE_ALL_SIZES(nir_op_ine)
3062       CASE_ALL_SIZES(nir_op_bitz)
3063       CASE_ALL_SIZES(nir_op_bitnz)
3064    case nir_op_inot:
3065       return true;
3066    default:
3067       return false;
3068    }
3069 }
3070 
3071 #undef CASE_ALL_SIZES
3072 
3073 unsigned
nir_intrinsic_src_components(const nir_intrinsic_instr * intr,unsigned srcn)3074 nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn)
3075 {
3076    const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
3077    assert(srcn < info->num_srcs);
3078    if (info->src_components[srcn] > 0)
3079       return info->src_components[srcn];
3080    else if (info->src_components[srcn] == 0)
3081       return intr->num_components;
3082    else
3083       return nir_src_num_components(intr->src[srcn]);
3084 }
3085 
3086 unsigned
nir_intrinsic_dest_components(nir_intrinsic_instr * intr)3087 nir_intrinsic_dest_components(nir_intrinsic_instr *intr)
3088 {
3089    const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
3090    if (!info->has_dest)
3091       return 0;
3092    else if (info->dest_components)
3093       return info->dest_components;
3094    else
3095       return intr->num_components;
3096 }
3097 
3098 nir_alu_type
nir_intrinsic_instr_src_type(const nir_intrinsic_instr * intrin,unsigned src)3099 nir_intrinsic_instr_src_type(const nir_intrinsic_instr *intrin, unsigned src)
3100 {
3101    /* We could go nuts here, but we'll just handle a few simple
3102     * cases and let everything else be untyped.
3103     */
3104    switch (intrin->intrinsic) {
3105    case nir_intrinsic_store_deref: {
3106       nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
3107       if (src == 1)
3108          return nir_get_nir_type_for_glsl_type(deref->type);
3109       break;
3110    }
3111 
3112    case nir_intrinsic_store_output:
3113       if (src == 0)
3114          return nir_intrinsic_src_type(intrin);
3115       break;
3116 
3117    default:
3118       break;
3119    }
3120 
3121    /* For the most part, we leave other intrinsics alone.  Most
3122     * of them don't matter in OpenGL ES 2.0 drivers anyway.
3123     * However, we should at least check if this is some sort of
3124     * IO intrinsic and flag it's offset and index sources.
3125     */
3126    {
3127       int offset_src_idx = nir_get_io_offset_src_number(intrin);
3128       if (src == offset_src_idx) {
3129          const nir_src *offset_src = offset_src_idx >= 0 ? &intrin->src[offset_src_idx] : NULL;
3130          if (offset_src)
3131             return nir_type_int;
3132       }
3133    }
3134 
3135    return nir_type_invalid;
3136 }
3137 
3138 nir_alu_type
nir_intrinsic_instr_dest_type(const nir_intrinsic_instr * intrin)3139 nir_intrinsic_instr_dest_type(const nir_intrinsic_instr *intrin)
3140 {
3141    /* We could go nuts here, but we'll just handle a few simple
3142     * cases and let everything else be untyped.
3143     */
3144    switch (intrin->intrinsic) {
3145    case nir_intrinsic_load_deref: {
3146       nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
3147       return nir_get_nir_type_for_glsl_type(deref->type);
3148    }
3149 
3150    case nir_intrinsic_load_input:
3151    case nir_intrinsic_load_per_primitive_input:
3152    case nir_intrinsic_load_uniform:
3153       return nir_intrinsic_dest_type(intrin);
3154 
3155    default:
3156       break;
3157    }
3158 
3159    return nir_type_invalid;
3160 }
3161 
3162 /**
3163  * Helper to copy const_index[] from src to dst, without assuming they
3164  * match in order.
3165  */
3166 void
nir_intrinsic_copy_const_indices(nir_intrinsic_instr * dst,nir_intrinsic_instr * src)3167 nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src)
3168 {
3169    if (src->intrinsic == dst->intrinsic) {
3170       memcpy(dst->const_index, src->const_index, sizeof(dst->const_index));
3171       return;
3172    }
3173 
3174    const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic];
3175    const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic];
3176 
3177    for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) {
3178       if (src_info->index_map[i] == 0)
3179          continue;
3180 
3181       /* require that dst instruction also uses the same const_index[]: */
3182       assert(dst_info->index_map[i] > 0);
3183 
3184       dst->const_index[dst_info->index_map[i] - 1] =
3185          src->const_index[src_info->index_map[i] - 1];
3186    }
3187 }
3188 
3189 bool
nir_tex_instr_need_sampler(const nir_tex_instr * instr)3190 nir_tex_instr_need_sampler(const nir_tex_instr *instr)
3191 {
3192    switch (instr->op) {
3193    case nir_texop_txf:
3194    case nir_texop_txf_ms:
3195    case nir_texop_txs:
3196    case nir_texop_query_levels:
3197    case nir_texop_texture_samples:
3198    case nir_texop_samples_identical:
3199    case nir_texop_descriptor_amd:
3200       return false;
3201    default:
3202       return true;
3203    }
3204 }
3205 
3206 unsigned
nir_tex_instr_result_size(const nir_tex_instr * instr)3207 nir_tex_instr_result_size(const nir_tex_instr *instr)
3208 {
3209    switch (instr->op) {
3210    case nir_texop_txs: {
3211       unsigned ret;
3212       switch (instr->sampler_dim) {
3213       case GLSL_SAMPLER_DIM_1D:
3214       case GLSL_SAMPLER_DIM_BUF:
3215          ret = 1;
3216          break;
3217       case GLSL_SAMPLER_DIM_2D:
3218       case GLSL_SAMPLER_DIM_CUBE:
3219       case GLSL_SAMPLER_DIM_MS:
3220       case GLSL_SAMPLER_DIM_RECT:
3221       case GLSL_SAMPLER_DIM_EXTERNAL:
3222       case GLSL_SAMPLER_DIM_SUBPASS:
3223       case GLSL_SAMPLER_DIM_SUBPASS_MS:
3224          ret = 2;
3225          break;
3226       case GLSL_SAMPLER_DIM_3D:
3227          ret = 3;
3228          break;
3229       default:
3230          unreachable("not reached");
3231       }
3232       if (instr->is_array)
3233          ret++;
3234       return ret;
3235    }
3236 
3237    case nir_texop_lod:
3238       return 2;
3239 
3240    case nir_texop_texture_samples:
3241    case nir_texop_query_levels:
3242    case nir_texop_samples_identical:
3243    case nir_texop_fragment_mask_fetch_amd:
3244    case nir_texop_lod_bias_agx:
3245    case nir_texop_has_custom_border_color_agx:
3246       return 1;
3247 
3248    case nir_texop_descriptor_amd:
3249       return instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8;
3250 
3251    case nir_texop_sampler_descriptor_amd:
3252       return 4;
3253 
3254    case nir_texop_hdr_dim_nv:
3255    case nir_texop_tex_type_nv:
3256       return 4;
3257 
3258    case nir_texop_custom_border_color_agx:
3259       return 4;
3260 
3261    default:
3262       if (instr->is_shadow && instr->is_new_style_shadow)
3263          return 1;
3264 
3265       return 4;
3266    }
3267 }
3268 
3269 bool
nir_tex_instr_is_query(const nir_tex_instr * instr)3270 nir_tex_instr_is_query(const nir_tex_instr *instr)
3271 {
3272    switch (instr->op) {
3273    case nir_texop_txs:
3274    case nir_texop_lod:
3275    case nir_texop_texture_samples:
3276    case nir_texop_query_levels:
3277    case nir_texop_descriptor_amd:
3278    case nir_texop_sampler_descriptor_amd:
3279    case nir_texop_lod_bias_agx:
3280    case nir_texop_custom_border_color_agx:
3281    case nir_texop_has_custom_border_color_agx:
3282    case nir_texop_hdr_dim_nv:
3283    case nir_texop_tex_type_nv:
3284       return true;
3285    case nir_texop_tex:
3286    case nir_texop_txb:
3287    case nir_texop_txl:
3288    case nir_texop_txd:
3289    case nir_texop_txf:
3290    case nir_texop_txf_ms:
3291    case nir_texop_txf_ms_fb:
3292    case nir_texop_txf_ms_mcs_intel:
3293    case nir_texop_tg4:
3294    case nir_texop_samples_identical:
3295    case nir_texop_fragment_mask_fetch_amd:
3296    case nir_texop_fragment_fetch_amd:
3297       return false;
3298    default:
3299       unreachable("Invalid texture opcode");
3300    }
3301 }
3302 
3303 bool
nir_tex_instr_has_implicit_derivative(const nir_tex_instr * instr)3304 nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr)
3305 {
3306    switch (instr->op) {
3307    case nir_texop_tex:
3308    case nir_texop_txb:
3309    case nir_texop_lod:
3310       return true;
3311    case nir_texop_tg4:
3312       return instr->is_gather_implicit_lod;
3313    default:
3314       return false;
3315    }
3316 }
3317 
3318 nir_alu_type
nir_tex_instr_src_type(const nir_tex_instr * instr,unsigned src)3319 nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
3320 {
3321    switch (instr->src[src].src_type) {
3322    case nir_tex_src_coord:
3323       switch (instr->op) {
3324       case nir_texop_txf:
3325       case nir_texop_txf_ms:
3326       case nir_texop_txf_ms_fb:
3327       case nir_texop_txf_ms_mcs_intel:
3328       case nir_texop_samples_identical:
3329       case nir_texop_fragment_fetch_amd:
3330       case nir_texop_fragment_mask_fetch_amd:
3331          return nir_type_int;
3332 
3333       default:
3334          return nir_type_float;
3335       }
3336 
3337    case nir_tex_src_lod:
3338       switch (instr->op) {
3339       case nir_texop_txs:
3340       case nir_texop_txf:
3341       case nir_texop_txf_ms:
3342       case nir_texop_fragment_fetch_amd:
3343       case nir_texop_fragment_mask_fetch_amd:
3344          return nir_type_int;
3345 
3346       default:
3347          return nir_type_float;
3348       }
3349 
3350    case nir_tex_src_projector:
3351    case nir_tex_src_comparator:
3352    case nir_tex_src_bias:
3353    case nir_tex_src_min_lod:
3354    case nir_tex_src_ddx:
3355    case nir_tex_src_ddy:
3356    case nir_tex_src_backend1:
3357    case nir_tex_src_backend2:
3358       return nir_type_float;
3359 
3360    case nir_tex_src_offset:
3361    case nir_tex_src_ms_index:
3362    case nir_tex_src_plane:
3363       return nir_type_int;
3364 
3365    case nir_tex_src_sampler_deref_intrinsic:
3366    case nir_tex_src_texture_deref_intrinsic:
3367    case nir_tex_src_ms_mcs_intel:
3368    case nir_tex_src_texture_deref:
3369    case nir_tex_src_sampler_deref:
3370    case nir_tex_src_texture_offset:
3371    case nir_tex_src_sampler_offset:
3372    case nir_tex_src_texture_handle:
3373    case nir_tex_src_sampler_handle:
3374       return nir_type_uint;
3375 
3376    case nir_num_tex_src_types:
3377       unreachable("nir_num_tex_src_types is not a valid source type");
3378    }
3379 
3380    unreachable("Invalid texture source type");
3381 }
3382 
3383 unsigned
nir_tex_instr_src_size(const nir_tex_instr * instr,unsigned src)3384 nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src)
3385 {
3386    if (instr->src[src].src_type == nir_tex_src_coord)
3387       return instr->coord_components;
3388 
3389    /* The MCS value is expected to be a vec4 returned by a txf_ms_mcs_intel */
3390    if (instr->src[src].src_type == nir_tex_src_ms_mcs_intel)
3391       return 4;
3392 
3393    if (instr->src[src].src_type == nir_tex_src_ddx ||
3394        instr->src[src].src_type == nir_tex_src_ddy) {
3395 
3396       if (instr->is_array && !instr->array_is_lowered_cube)
3397          return instr->coord_components - 1;
3398       else
3399          return instr->coord_components;
3400    }
3401 
3402    if (instr->src[src].src_type == nir_tex_src_offset) {
3403       if (instr->is_array)
3404          return instr->coord_components - 1;
3405       else
3406          return instr->coord_components;
3407    }
3408 
3409    if (instr->src[src].src_type == nir_tex_src_backend1 ||
3410        instr->src[src].src_type == nir_tex_src_backend2)
3411       return nir_src_num_components(instr->src[src].src);
3412 
3413    /* For AMD, this can be a vec8/vec4 image/sampler descriptor. */
3414    if (instr->src[src].src_type == nir_tex_src_texture_handle ||
3415        instr->src[src].src_type == nir_tex_src_sampler_handle)
3416       return 0;
3417 
3418    return 1;
3419 }
3420 
3421 /**
3422  * Return which components are written into transform feedback buffers.
3423  * The result is relative to 0, not "component".
3424  */
3425 unsigned
nir_instr_xfb_write_mask(nir_intrinsic_instr * instr)3426 nir_instr_xfb_write_mask(nir_intrinsic_instr *instr)
3427 {
3428    unsigned mask = 0;
3429 
3430    if (nir_intrinsic_has_io_xfb(instr)) {
3431       unsigned wr_mask = nir_intrinsic_write_mask(instr) << nir_intrinsic_component(instr);
3432       assert((wr_mask & ~0xf) == 0); /* only 4 components allowed */
3433 
3434       unsigned iter_mask = wr_mask;
3435       while (iter_mask) {
3436          unsigned i = u_bit_scan(&iter_mask);
3437          nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(instr) : nir_intrinsic_io_xfb2(instr);
3438          if (xfb.out[i % 2].num_components)
3439             mask |= BITFIELD_RANGE(i, xfb.out[i % 2].num_components) & wr_mask;
3440       }
3441    }
3442 
3443    return mask;
3444 }
3445 
3446 /**
3447  * Whether an output slot is consumed by fixed-function logic.
3448  */
3449 bool
nir_slot_is_sysval_output(gl_varying_slot slot,gl_shader_stage next_shader)3450 nir_slot_is_sysval_output(gl_varying_slot slot, gl_shader_stage next_shader)
3451 {
3452    switch (next_shader) {
3453    case MESA_SHADER_FRAGMENT:
3454       return slot == VARYING_SLOT_POS ||
3455              slot == VARYING_SLOT_PSIZ ||
3456              slot == VARYING_SLOT_EDGE ||
3457              slot == VARYING_SLOT_CLIP_VERTEX ||
3458              slot == VARYING_SLOT_CLIP_DIST0 ||
3459              slot == VARYING_SLOT_CLIP_DIST1 ||
3460              slot == VARYING_SLOT_CULL_DIST0 ||
3461              slot == VARYING_SLOT_CULL_DIST1 ||
3462              slot == VARYING_SLOT_LAYER ||
3463              slot == VARYING_SLOT_VIEWPORT ||
3464              slot == VARYING_SLOT_VIEW_INDEX ||
3465              slot == VARYING_SLOT_VIEWPORT_MASK ||
3466              slot == VARYING_SLOT_PRIMITIVE_SHADING_RATE ||
3467              /* NV_mesh_shader_only */
3468              slot == VARYING_SLOT_PRIMITIVE_COUNT ||
3469              slot == VARYING_SLOT_PRIMITIVE_INDICES;
3470 
3471    case MESA_SHADER_TESS_EVAL:
3472       return slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
3473              slot == VARYING_SLOT_TESS_LEVEL_INNER ||
3474              slot == VARYING_SLOT_BOUNDING_BOX0 ||
3475              slot == VARYING_SLOT_BOUNDING_BOX1;
3476 
3477    case MESA_SHADER_MESH:
3478       /* NV_mesh_shader only */
3479       return slot == VARYING_SLOT_TASK_COUNT;
3480 
3481    case MESA_SHADER_NONE:
3482       /* NONE means unknown. Check all possibilities. */
3483       return nir_slot_is_sysval_output(slot, MESA_SHADER_FRAGMENT) ||
3484              nir_slot_is_sysval_output(slot, MESA_SHADER_TESS_EVAL) ||
3485              nir_slot_is_sysval_output(slot, MESA_SHADER_MESH);
3486 
3487    default:
3488       /* No other shaders have preceding shaders with sysval outputs. */
3489       return false;
3490    }
3491 }
3492 
3493 /**
3494  * Whether an input/output slot is consumed by the next shader stage,
3495  * or written by the previous shader stage.
3496  *
3497  * Pass MESA_SHADER_NONE if the next shader is unknown.
3498  */
3499 bool
nir_slot_is_varying(gl_varying_slot slot,gl_shader_stage next_shader)3500 nir_slot_is_varying(gl_varying_slot slot, gl_shader_stage next_shader)
3501 {
3502    bool unknown = next_shader == MESA_SHADER_NONE;
3503    bool exactly_before_fs = next_shader == MESA_SHADER_FRAGMENT || unknown;
3504    bool at_most_before_gs = next_shader <= MESA_SHADER_GEOMETRY || unknown;
3505 
3506    return slot >= VARYING_SLOT_VAR0 ||
3507           (slot == VARYING_SLOT_POS && at_most_before_gs) ||
3508           slot == VARYING_SLOT_COL0 ||
3509           slot == VARYING_SLOT_COL1 ||
3510           slot == VARYING_SLOT_BFC0 ||
3511           slot == VARYING_SLOT_BFC1 ||
3512           slot == VARYING_SLOT_FOGC ||
3513           (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) ||
3514           slot == VARYING_SLOT_PNTC ||
3515           (slot == VARYING_SLOT_CLIP_VERTEX && at_most_before_gs) ||
3516           slot == VARYING_SLOT_CLIP_DIST0 ||
3517           slot == VARYING_SLOT_CLIP_DIST1 ||
3518           slot == VARYING_SLOT_CULL_DIST0 ||
3519           slot == VARYING_SLOT_CULL_DIST1 ||
3520           slot == VARYING_SLOT_PRIMITIVE_ID ||
3521           slot == VARYING_SLOT_LAYER ||
3522           slot == VARYING_SLOT_VIEWPORT ||
3523           slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
3524           slot == VARYING_SLOT_TESS_LEVEL_INNER ||
3525           (slot == VARYING_SLOT_VIEW_INDEX && exactly_before_fs);
3526 }
3527 
3528 bool
nir_slot_is_sysval_output_and_varying(gl_varying_slot slot,gl_shader_stage next_shader)3529 nir_slot_is_sysval_output_and_varying(gl_varying_slot slot,
3530                                       gl_shader_stage next_shader)
3531 {
3532    return nir_slot_is_sysval_output(slot, next_shader) &&
3533           nir_slot_is_varying(slot, next_shader);
3534 }
3535 
3536 /**
3537  * This marks the output store instruction as not feeding the next shader
3538  * stage. If the instruction has no other use, it's removed.
3539  */
3540 bool
nir_remove_varying(nir_intrinsic_instr * intr,gl_shader_stage next_shader)3541 nir_remove_varying(nir_intrinsic_instr *intr, gl_shader_stage next_shader)
3542 {
3543    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3544 
3545    if ((!sem.no_sysval_output &&
3546         nir_slot_is_sysval_output(sem.location, next_shader)) ||
3547        nir_instr_xfb_write_mask(intr)) {
3548       /* Demote the store instruction. */
3549       sem.no_varying = true;
3550       nir_intrinsic_set_io_semantics(intr, sem);
3551       return false;
3552    } else {
3553       nir_instr_remove(&intr->instr);
3554       return true;
3555    }
3556 }
3557 
3558 /**
3559  * This marks the output store instruction as not feeding fixed-function
3560  * logic. If the instruction has no other use, it's removed.
3561  */
3562 bool
nir_remove_sysval_output(nir_intrinsic_instr * intr,gl_shader_stage next_shader)3563 nir_remove_sysval_output(nir_intrinsic_instr *intr, gl_shader_stage next_shader)
3564 {
3565    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3566 
3567    if ((!sem.no_varying && nir_slot_is_varying(sem.location, next_shader)) ||
3568        nir_instr_xfb_write_mask(intr)) {
3569       /* Demote the store instruction. */
3570       sem.no_sysval_output = true;
3571       nir_intrinsic_set_io_semantics(intr, sem);
3572       return false;
3573    } else {
3574       nir_instr_remove(&intr->instr);
3575       return true;
3576    }
3577 }
3578 
3579 void
nir_remove_non_entrypoints(nir_shader * nir)3580 nir_remove_non_entrypoints(nir_shader *nir)
3581 {
3582    nir_foreach_function_safe(func, nir) {
3583       if (!func->is_entrypoint)
3584          exec_node_remove(&func->node);
3585    }
3586    assert(exec_list_length(&nir->functions) == 1);
3587 }
3588 
3589 void
nir_remove_non_exported(nir_shader * nir)3590 nir_remove_non_exported(nir_shader *nir)
3591 {
3592    nir_foreach_function_safe(func, nir) {
3593       if (!func->is_exported)
3594          exec_node_remove(&func->node);
3595    }
3596 }
3597 
3598 /*
3599  * After precompiling entrypoints from a kernel library, we want to garbage
3600  * collect the NIR entrypoints but leave the exported library functions. This
3601  * helper does that.
3602  */
3603 void
nir_remove_entrypoints(nir_shader * nir)3604 nir_remove_entrypoints(nir_shader *nir)
3605 {
3606    nir_foreach_entrypoint_safe(func, nir) {
3607       exec_node_remove(&func->node);
3608    }
3609 }
3610 
3611 unsigned
nir_static_workgroup_size(const nir_shader * s)3612 nir_static_workgroup_size(const nir_shader *s)
3613 {
3614    return s->info.workgroup_size[0] * s->info.workgroup_size[1] *
3615           s->info.workgroup_size[2];
3616 }
3617 
3618 bool
nir_block_contains_work(nir_block * block)3619 nir_block_contains_work(nir_block *block)
3620 {
3621    if (!nir_cf_node_is_last(&block->cf_node))
3622       return true;
3623 
3624    nir_foreach_instr(instr, block) {
3625       if (instr->type == nir_instr_type_phi)
3626          continue;
3627       if (instr->type != nir_instr_type_alu ||
3628           !nir_op_is_vec_or_mov(nir_instr_as_alu(instr)->op))
3629          return true;
3630    }
3631 
3632    return false;
3633 }
3634 
3635 nir_op
nir_atomic_op_to_alu(nir_atomic_op op)3636 nir_atomic_op_to_alu(nir_atomic_op op)
3637 {
3638    switch (op) {
3639    case nir_atomic_op_iadd:
3640       return nir_op_iadd;
3641    case nir_atomic_op_imin:
3642       return nir_op_imin;
3643    case nir_atomic_op_umin:
3644       return nir_op_umin;
3645    case nir_atomic_op_imax:
3646       return nir_op_imax;
3647    case nir_atomic_op_umax:
3648       return nir_op_umax;
3649    case nir_atomic_op_iand:
3650       return nir_op_iand;
3651    case nir_atomic_op_ior:
3652       return nir_op_ior;
3653    case nir_atomic_op_ixor:
3654       return nir_op_ixor;
3655    case nir_atomic_op_fadd:
3656       return nir_op_fadd;
3657    case nir_atomic_op_fmin:
3658       return nir_op_fmin;
3659    case nir_atomic_op_fmax:
3660       return nir_op_fmax;
3661 
3662    /* We don't handle exchanges or wraps */
3663    case nir_atomic_op_xchg:
3664    case nir_atomic_op_cmpxchg:
3665    case nir_atomic_op_fcmpxchg:
3666    case nir_atomic_op_inc_wrap:
3667    case nir_atomic_op_dec_wrap:
3668    case nir_atomic_op_ordered_add_gfx12_amd:
3669       return nir_num_opcodes;
3670    }
3671 
3672    unreachable("Invalid nir_atomic_op");
3673 }
3674 
3675