• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Connor Abbott (cwabbott0@gmail.com)
25  *
26  */
27 
28 #include "nir.h"
29 #include <assert.h>
30 #include <limits.h>
31 #include <math.h>
32 #include "util/half_float.h"
33 #include "util/macros.h"
34 #include "util/u_math.h"
35 #include "util/u_qsort.h"
36 #include "nir_builder.h"
37 #include "nir_control_flow_private.h"
38 #include "nir_worklist.h"
39 
40 #ifndef NDEBUG
41 uint32_t nir_debug = 0;
42 bool nir_debug_print_shader[MESA_SHADER_KERNEL + 1] = { 0 };
43 
44 static const struct debug_named_value nir_debug_control[] = {
45    { "clone", NIR_DEBUG_CLONE,
46      "Test cloning a shader at each successful lowering/optimization call" },
47    { "serialize", NIR_DEBUG_SERIALIZE,
48      "Test serialize and deserialize shader at each successful lowering/optimization call" },
49    { "novalidate", NIR_DEBUG_NOVALIDATE,
50      "Disable shader validation at each successful lowering/optimization call" },
51    { "tgsi", NIR_DEBUG_TGSI,
52      "Dump NIR/TGSI shaders when doing a NIR<->TGSI translation" },
53    { "print", NIR_DEBUG_PRINT,
54      "Dump resulting shader after each successful lowering/optimization call" },
55    { "print_vs", NIR_DEBUG_PRINT_VS,
56      "Dump resulting vertex shader after each successful lowering/optimization call" },
57    { "print_tcs", NIR_DEBUG_PRINT_TCS,
58      "Dump resulting tessellation control shader after each successful lowering/optimization call" },
59    { "print_tes", NIR_DEBUG_PRINT_TES,
60      "Dump resulting tessellation evaluation shader after each successful lowering/optimization call" },
61    { "print_gs", NIR_DEBUG_PRINT_GS,
62      "Dump resulting geometry shader after each successful lowering/optimization call" },
63    { "print_fs", NIR_DEBUG_PRINT_FS,
64      "Dump resulting fragment shader after each successful lowering/optimization call" },
65    { "print_cs", NIR_DEBUG_PRINT_CS,
66      "Dump resulting compute shader after each successful lowering/optimization call" },
67    { "print_ts", NIR_DEBUG_PRINT_TS,
68      "Dump resulting task shader after each successful lowering/optimization call" },
69    { "print_ms", NIR_DEBUG_PRINT_MS,
70      "Dump resulting mesh shader after each successful lowering/optimization call" },
71    { "print_rgs", NIR_DEBUG_PRINT_RGS,
72      "Dump resulting raygen shader after each successful lowering/optimization call" },
73    { "print_ahs", NIR_DEBUG_PRINT_AHS,
74      "Dump resulting any-hit shader after each successful lowering/optimization call" },
75    { "print_chs", NIR_DEBUG_PRINT_CHS,
76      "Dump resulting closest-hit shader after each successful lowering/optimization call" },
77    { "print_mhs", NIR_DEBUG_PRINT_MHS,
78      "Dump resulting miss-hit shader after each successful lowering/optimization call" },
79    { "print_is", NIR_DEBUG_PRINT_IS,
80      "Dump resulting intersection shader after each successful lowering/optimization call" },
81    { "print_cbs", NIR_DEBUG_PRINT_CBS,
82      "Dump resulting callable shader after each successful lowering/optimization call" },
83    { "print_ks", NIR_DEBUG_PRINT_KS,
84      "Dump resulting kernel shader after each successful lowering/optimization call" },
85    { "print_no_inline_consts", NIR_DEBUG_PRINT_NO_INLINE_CONSTS,
86      "Do not print const value near each use of const SSA variable" },
87    { "print_internal", NIR_DEBUG_PRINT_INTERNAL,
88      "Print shaders even if they are marked as internal" },
89    { "print_pass_flags", NIR_DEBUG_PRINT_PASS_FLAGS,
90      "Print pass_flags for every instruction when pass_flags are non-zero" },
91    DEBUG_NAMED_VALUE_END
92 };
93 
94 DEBUG_GET_ONCE_FLAGS_OPTION(nir_debug, "NIR_DEBUG", nir_debug_control, 0)
95 
96 static void
nir_process_debug_variable_once(void)97 nir_process_debug_variable_once(void)
98 {
99    nir_debug = debug_get_option_nir_debug();
100 
101    /* clang-format off */
102    nir_debug_print_shader[MESA_SHADER_VERTEX]       = NIR_DEBUG(PRINT_VS);
103    nir_debug_print_shader[MESA_SHADER_TESS_CTRL]    = NIR_DEBUG(PRINT_TCS);
104    nir_debug_print_shader[MESA_SHADER_TESS_EVAL]    = NIR_DEBUG(PRINT_TES);
105    nir_debug_print_shader[MESA_SHADER_GEOMETRY]     = NIR_DEBUG(PRINT_GS);
106    nir_debug_print_shader[MESA_SHADER_FRAGMENT]     = NIR_DEBUG(PRINT_FS);
107    nir_debug_print_shader[MESA_SHADER_COMPUTE]      = NIR_DEBUG(PRINT_CS);
108    nir_debug_print_shader[MESA_SHADER_TASK]         = NIR_DEBUG(PRINT_TS);
109    nir_debug_print_shader[MESA_SHADER_MESH]         = NIR_DEBUG(PRINT_MS);
110    nir_debug_print_shader[MESA_SHADER_RAYGEN]       = NIR_DEBUG(PRINT_RGS);
111    nir_debug_print_shader[MESA_SHADER_ANY_HIT]      = NIR_DEBUG(PRINT_AHS);
112    nir_debug_print_shader[MESA_SHADER_CLOSEST_HIT]  = NIR_DEBUG(PRINT_CHS);
113    nir_debug_print_shader[MESA_SHADER_MISS]         = NIR_DEBUG(PRINT_MHS);
114    nir_debug_print_shader[MESA_SHADER_INTERSECTION] = NIR_DEBUG(PRINT_IS);
115    nir_debug_print_shader[MESA_SHADER_CALLABLE]     = NIR_DEBUG(PRINT_CBS);
116    nir_debug_print_shader[MESA_SHADER_KERNEL]       = NIR_DEBUG(PRINT_KS);
117    /* clang-format on */
118 }
119 
120 void
nir_process_debug_variable(void)121 nir_process_debug_variable(void)
122 {
123    static once_flag flag = ONCE_FLAG_INIT;
124    call_once(&flag, nir_process_debug_variable_once);
125 }
126 #endif
127 
128 /** Return true if the component mask "mask" with bit size "old_bit_size" can
129  * be re-interpreted to be used with "new_bit_size".
130  */
131 bool
nir_component_mask_can_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)132 nir_component_mask_can_reinterpret(nir_component_mask_t mask,
133                                    unsigned old_bit_size,
134                                    unsigned new_bit_size)
135 {
136    assert(util_is_power_of_two_nonzero(old_bit_size));
137    assert(util_is_power_of_two_nonzero(new_bit_size));
138 
139    if (old_bit_size == new_bit_size)
140       return true;
141 
142    if (old_bit_size == 1 || new_bit_size == 1)
143       return false;
144 
145    if (old_bit_size > new_bit_size) {
146       unsigned ratio = old_bit_size / new_bit_size;
147       return util_last_bit(mask) * ratio <= NIR_MAX_VEC_COMPONENTS;
148    }
149 
150    unsigned iter = mask;
151    while (iter) {
152       int start, count;
153       u_bit_scan_consecutive_range(&iter, &start, &count);
154       start *= old_bit_size;
155       count *= old_bit_size;
156       if (start % new_bit_size != 0)
157          return false;
158       if (count % new_bit_size != 0)
159          return false;
160    }
161    return true;
162 }
163 
164 /** Re-interprets a component mask "mask" with bit size "old_bit_size" so that
165  * it can be used can be used with "new_bit_size".
166  */
167 nir_component_mask_t
nir_component_mask_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)168 nir_component_mask_reinterpret(nir_component_mask_t mask,
169                                unsigned old_bit_size,
170                                unsigned new_bit_size)
171 {
172    assert(nir_component_mask_can_reinterpret(mask, old_bit_size, new_bit_size));
173 
174    if (old_bit_size == new_bit_size)
175       return mask;
176 
177    nir_component_mask_t new_mask = 0;
178    unsigned iter = mask;
179    while (iter) {
180       int start, count;
181       u_bit_scan_consecutive_range(&iter, &start, &count);
182       start = start * old_bit_size / new_bit_size;
183       count = count * old_bit_size / new_bit_size;
184       new_mask |= BITFIELD_RANGE(start, count);
185    }
186    return new_mask;
187 }
188 
189 nir_shader *
nir_shader_create(void * mem_ctx,gl_shader_stage stage,const nir_shader_compiler_options * options,shader_info * si)190 nir_shader_create(void *mem_ctx,
191                   gl_shader_stage stage,
192                   const nir_shader_compiler_options *options,
193                   shader_info *si)
194 {
195    nir_shader *shader = rzalloc(mem_ctx, nir_shader);
196 
197    shader->gctx = gc_context(shader);
198 
199 #ifndef NDEBUG
200    nir_process_debug_variable();
201 #endif
202 
203    exec_list_make_empty(&shader->variables);
204 
205    shader->options = options;
206 
207    if (si) {
208       assert(si->stage == stage);
209       shader->info = *si;
210    } else {
211       shader->info.stage = stage;
212    }
213 
214    exec_list_make_empty(&shader->functions);
215 
216    shader->num_inputs = 0;
217    shader->num_outputs = 0;
218    shader->num_uniforms = 0;
219 
220    return shader;
221 }
222 
223 void
nir_shader_add_variable(nir_shader * shader,nir_variable * var)224 nir_shader_add_variable(nir_shader *shader, nir_variable *var)
225 {
226    switch (var->data.mode) {
227    case nir_var_function_temp:
228       assert(!"nir_shader_add_variable cannot be used for local variables");
229       return;
230 
231    case nir_var_shader_temp:
232    case nir_var_shader_in:
233    case nir_var_shader_out:
234    case nir_var_uniform:
235    case nir_var_mem_ubo:
236    case nir_var_mem_ssbo:
237    case nir_var_image:
238    case nir_var_mem_shared:
239    case nir_var_system_value:
240    case nir_var_mem_push_const:
241    case nir_var_mem_constant:
242    case nir_var_shader_call_data:
243    case nir_var_ray_hit_attrib:
244    case nir_var_mem_task_payload:
245    case nir_var_mem_node_payload:
246    case nir_var_mem_node_payload_in:
247    case nir_var_mem_global:
248       break;
249 
250    default:
251       assert(!"invalid mode");
252       return;
253    }
254 
255    exec_list_push_tail(&shader->variables, &var->node);
256 }
257 
258 nir_variable *
nir_variable_create(nir_shader * shader,nir_variable_mode mode,const struct glsl_type * type,const char * name)259 nir_variable_create(nir_shader *shader, nir_variable_mode mode,
260                     const struct glsl_type *type, const char *name)
261 {
262    nir_variable *var = rzalloc(shader, nir_variable);
263    var->name = ralloc_strdup(var, name);
264    var->type = type;
265    var->data.mode = mode;
266    var->data.how_declared = nir_var_declared_normally;
267 
268    if ((mode == nir_var_shader_in &&
269         shader->info.stage != MESA_SHADER_VERTEX &&
270         shader->info.stage != MESA_SHADER_KERNEL) ||
271        (mode == nir_var_shader_out &&
272         shader->info.stage != MESA_SHADER_FRAGMENT))
273       var->data.interpolation = INTERP_MODE_SMOOTH;
274 
275    if (mode == nir_var_shader_in || mode == nir_var_uniform)
276       var->data.read_only = true;
277 
278    nir_shader_add_variable(shader, var);
279 
280    return var;
281 }
282 
283 nir_variable *
nir_local_variable_create(nir_function_impl * impl,const struct glsl_type * type,const char * name)284 nir_local_variable_create(nir_function_impl *impl,
285                           const struct glsl_type *type, const char *name)
286 {
287    nir_variable *var = rzalloc(impl->function->shader, nir_variable);
288    var->name = ralloc_strdup(var, name);
289    var->type = type;
290    var->data.mode = nir_var_function_temp;
291 
292    nir_function_impl_add_variable(impl, var);
293 
294    return var;
295 }
296 
297 nir_variable *
nir_state_variable_create(nir_shader * shader,const struct glsl_type * type,const char * name,const gl_state_index16 tokens[STATE_LENGTH])298 nir_state_variable_create(nir_shader *shader,
299                           const struct glsl_type *type,
300                           const char *name,
301                           const gl_state_index16 tokens[STATE_LENGTH])
302 {
303    nir_variable *var = nir_variable_create(shader, nir_var_uniform, type, name);
304    var->num_state_slots = 1;
305    var->state_slots = rzalloc_array(var, nir_state_slot, 1);
306    memcpy(var->state_slots[0].tokens, tokens,
307           sizeof(var->state_slots[0].tokens));
308    shader->num_uniforms++;
309    return var;
310 }
311 
312 nir_variable *
nir_create_variable_with_location(nir_shader * shader,nir_variable_mode mode,int location,const struct glsl_type * type)313 nir_create_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location,
314                                   const struct glsl_type *type)
315 {
316    /* Only supporting non-array, or arrayed-io types, because otherwise we don't
317     * know how much to increment num_inputs/outputs
318     */
319    assert(glsl_type_is_vector_or_scalar(type) || glsl_type_is_unsized_array(type));
320 
321    const char *name;
322    switch (mode) {
323    case nir_var_shader_in:
324       if (shader->info.stage == MESA_SHADER_VERTEX)
325          name = gl_vert_attrib_name(location);
326       else
327          name = gl_varying_slot_name_for_stage(location, shader->info.stage);
328       break;
329 
330    case nir_var_shader_out:
331       if (shader->info.stage == MESA_SHADER_FRAGMENT)
332          name = gl_frag_result_name(location);
333       else
334          name = gl_varying_slot_name_for_stage(location, shader->info.stage);
335       break;
336 
337    case nir_var_system_value:
338       name = gl_system_value_name(location);
339       break;
340 
341    default:
342       unreachable("Unsupported variable mode");
343    }
344 
345    nir_variable *var = nir_variable_create(shader, mode, type, name);
346    var->data.location = location;
347 
348    switch (mode) {
349    case nir_var_shader_in:
350       var->data.driver_location = shader->num_inputs++;
351       break;
352 
353    case nir_var_shader_out:
354       var->data.driver_location = shader->num_outputs++;
355       break;
356 
357    case nir_var_system_value:
358       break;
359 
360    default:
361       unreachable("Unsupported variable mode");
362    }
363 
364    return var;
365 }
366 
367 nir_variable *
nir_get_variable_with_location(nir_shader * shader,nir_variable_mode mode,int location,const struct glsl_type * type)368 nir_get_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location,
369                                const struct glsl_type *type)
370 {
371    nir_variable *var = nir_find_variable_with_location(shader, mode, location);
372    if (var) {
373       /* If this shader has location_fracs, this builder function is not suitable. */
374       assert(var->data.location_frac == 0);
375 
376       /* The variable for the slot should match what we expected. */
377       assert(type == var->type);
378       return var;
379    }
380 
381    return nir_create_variable_with_location(shader, mode, location, type);
382 }
383 
384 nir_variable *
nir_find_variable_with_location(nir_shader * shader,nir_variable_mode mode,unsigned location)385 nir_find_variable_with_location(nir_shader *shader,
386                                 nir_variable_mode mode,
387                                 unsigned location)
388 {
389    assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
390    nir_foreach_variable_with_modes(var, shader, mode) {
391       if (var->data.location == location)
392          return var;
393    }
394    return NULL;
395 }
396 
397 nir_variable *
nir_find_variable_with_driver_location(nir_shader * shader,nir_variable_mode mode,unsigned location)398 nir_find_variable_with_driver_location(nir_shader *shader,
399                                        nir_variable_mode mode,
400                                        unsigned location)
401 {
402    assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
403    nir_foreach_variable_with_modes(var, shader, mode) {
404       if (var->data.driver_location == location)
405          return var;
406    }
407    return NULL;
408 }
409 
410 nir_variable *
nir_find_state_variable(nir_shader * s,gl_state_index16 tokens[STATE_LENGTH])411 nir_find_state_variable(nir_shader *s,
412                         gl_state_index16 tokens[STATE_LENGTH])
413 {
414    nir_foreach_variable_with_modes(var, s, nir_var_uniform) {
415       if (var->num_state_slots == 1 &&
416           !memcmp(var->state_slots[0].tokens, tokens,
417                   sizeof(var->state_slots[0].tokens)))
418          return var;
419    }
420    return NULL;
421 }
422 
nir_find_sampler_variable_with_tex_index(nir_shader * shader,unsigned texture_index)423 nir_variable *nir_find_sampler_variable_with_tex_index(nir_shader *shader,
424                                                        unsigned texture_index)
425 {
426    nir_foreach_variable_with_modes(var, shader, nir_var_uniform) {
427       unsigned size =
428           glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
429       if ((glsl_type_is_texture(glsl_without_array(var->type)) ||
430            glsl_type_is_sampler(glsl_without_array(var->type))) &&
431           (var->data.binding == texture_index ||
432            (var->data.binding < texture_index &&
433             var->data.binding + size > texture_index)))
434          return var;
435    }
436    return NULL;
437 }
438 
439 /* Annoyingly, qsort_r is not in the C standard library and, in particular, we
440  * can't count on it on MSV and Android.  So we stuff the CMP function into
441  * each array element.  It's a bit messy and burns more memory but the list of
442  * variables should hever be all that long.
443  */
444 struct var_cmp {
445    nir_variable *var;
446    int (*cmp)(const nir_variable *, const nir_variable *);
447 };
448 
449 static int
var_sort_cmp(const void * _a,const void * _b,void * _cmp)450 var_sort_cmp(const void *_a, const void *_b, void *_cmp)
451 {
452    const struct var_cmp *a = _a;
453    const struct var_cmp *b = _b;
454    assert(a->cmp == b->cmp);
455    return a->cmp(a->var, b->var);
456 }
457 
458 void
nir_sort_variables_with_modes(nir_shader * shader,int (* cmp)(const nir_variable *,const nir_variable *),nir_variable_mode modes)459 nir_sort_variables_with_modes(nir_shader *shader,
460                               int (*cmp)(const nir_variable *,
461                                          const nir_variable *),
462                               nir_variable_mode modes)
463 {
464    unsigned num_vars = 0;
465    nir_foreach_variable_with_modes(var, shader, modes) {
466       ++num_vars;
467    }
468    struct var_cmp *vars = ralloc_array(shader, struct var_cmp, num_vars);
469    unsigned i = 0;
470    nir_foreach_variable_with_modes_safe(var, shader, modes) {
471       exec_node_remove(&var->node);
472       vars[i++] = (struct var_cmp){
473          .var = var,
474          .cmp = cmp,
475       };
476    }
477    assert(i == num_vars);
478 
479    util_qsort_r(vars, num_vars, sizeof(*vars), var_sort_cmp, cmp);
480 
481    for (i = 0; i < num_vars; i++)
482       exec_list_push_tail(&shader->variables, &vars[i].var->node);
483 
484    ralloc_free(vars);
485 }
486 
487 nir_function *
nir_function_create(nir_shader * shader,const char * name)488 nir_function_create(nir_shader *shader, const char *name)
489 {
490    nir_function *func = ralloc(shader, nir_function);
491 
492    exec_list_push_tail(&shader->functions, &func->node);
493 
494    func->name = ralloc_strdup(func, name);
495    func->shader = shader;
496    func->num_params = 0;
497    func->params = NULL;
498    func->impl = NULL;
499    func->is_entrypoint = false;
500    func->is_preamble = false;
501    func->dont_inline = false;
502    func->should_inline = false;
503    func->driver_attributes = 0;
504    func->is_subroutine = false;
505    func->is_tmp_globals_wrapper = false;
506    func->subroutine_index = 0;
507    func->num_subroutine_types = 0;
508    func->subroutine_types = NULL;
509    func->workgroup_size[0] = 0;
510    func->workgroup_size[1] = 0;
511    func->workgroup_size[2] = 0;
512 
513    /* Only meaningful for shader libraries, so don't export by default. */
514    func->is_exported = false;
515 
516    return func;
517 }
518 
519 void
nir_alu_src_copy(nir_alu_src * dest,const nir_alu_src * src)520 nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src)
521 {
522    dest->src = nir_src_for_ssa(src->src.ssa);
523    for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
524       dest->swizzle[i] = src->swizzle[i];
525 }
526 
527 bool
nir_alu_src_is_trivial_ssa(const nir_alu_instr * alu,unsigned srcn)528 nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn)
529 {
530    static uint8_t trivial_swizzle[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
531    STATIC_ASSERT(ARRAY_SIZE(trivial_swizzle) == NIR_MAX_VEC_COMPONENTS);
532 
533    const nir_alu_src *src = &alu->src[srcn];
534    unsigned num_components = nir_ssa_alu_instr_src_components(alu, srcn);
535 
536    return (src->src.ssa->num_components == num_components) &&
537           (memcmp(src->swizzle, trivial_swizzle, num_components) == 0);
538 }
539 
540 static void
cf_init(nir_cf_node * node,nir_cf_node_type type)541 cf_init(nir_cf_node *node, nir_cf_node_type type)
542 {
543    exec_node_init(&node->node);
544    node->parent = NULL;
545    node->type = type;
546 }
547 
548 nir_function_impl *
nir_function_impl_create_bare(nir_shader * shader)549 nir_function_impl_create_bare(nir_shader *shader)
550 {
551    nir_function_impl *impl = ralloc(shader, nir_function_impl);
552 
553    impl->function = NULL;
554    impl->preamble = NULL;
555 
556    cf_init(&impl->cf_node, nir_cf_node_function);
557 
558    exec_list_make_empty(&impl->body);
559    exec_list_make_empty(&impl->locals);
560    impl->ssa_alloc = 0;
561    impl->num_blocks = 0;
562    impl->valid_metadata = nir_metadata_none;
563    impl->structured = true;
564 
565    /* create start & end blocks */
566    nir_block *start_block = nir_block_create(shader);
567    nir_block *end_block = nir_block_create(shader);
568    start_block->cf_node.parent = &impl->cf_node;
569    end_block->cf_node.parent = &impl->cf_node;
570    impl->end_block = end_block;
571 
572    exec_list_push_tail(&impl->body, &start_block->cf_node.node);
573 
574    start_block->successors[0] = end_block;
575    _mesa_set_add(end_block->predecessors, start_block);
576    return impl;
577 }
578 
579 nir_function_impl *
nir_function_impl_create(nir_function * function)580 nir_function_impl_create(nir_function *function)
581 {
582    assert(function->impl == NULL);
583 
584    nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
585    nir_function_set_impl(function, impl);
586    return impl;
587 }
588 
589 nir_block *
nir_block_create(nir_shader * shader)590 nir_block_create(nir_shader *shader)
591 {
592    nir_block *block = rzalloc(shader, nir_block);
593 
594    cf_init(&block->cf_node, nir_cf_node_block);
595 
596    block->successors[0] = block->successors[1] = NULL;
597    block->predecessors = _mesa_pointer_set_create(block);
598    block->imm_dom = NULL;
599    /* XXX maybe it would be worth it to defer allocation?  This
600     * way it doesn't get allocated for shader refs that never run
601     * nir_calc_dominance?  For example, state-tracker creates an
602     * initial IR, clones that, runs appropriate lowering pass, passes
603     * to driver which does common lowering/opt, and then stores ref
604     * which is later used to do state specific lowering and futher
605     * opt.  Do any of the references not need dominance metadata?
606     */
607    block->dom_frontier = _mesa_pointer_set_create(block);
608 
609    exec_list_make_empty(&block->instr_list);
610 
611    return block;
612 }
613 
614 static inline void
src_init(nir_src * src)615 src_init(nir_src *src)
616 {
617    src->ssa = NULL;
618 }
619 
620 nir_if *
nir_if_create(nir_shader * shader)621 nir_if_create(nir_shader *shader)
622 {
623    nir_if *if_stmt = ralloc(shader, nir_if);
624 
625    if_stmt->control = nir_selection_control_none;
626 
627    cf_init(&if_stmt->cf_node, nir_cf_node_if);
628    src_init(&if_stmt->condition);
629 
630    nir_block *then = nir_block_create(shader);
631    exec_list_make_empty(&if_stmt->then_list);
632    exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
633    then->cf_node.parent = &if_stmt->cf_node;
634 
635    nir_block *else_stmt = nir_block_create(shader);
636    exec_list_make_empty(&if_stmt->else_list);
637    exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
638    else_stmt->cf_node.parent = &if_stmt->cf_node;
639 
640    return if_stmt;
641 }
642 
643 nir_loop *
nir_loop_create(nir_shader * shader)644 nir_loop_create(nir_shader *shader)
645 {
646    nir_loop *loop = rzalloc(shader, nir_loop);
647 
648    cf_init(&loop->cf_node, nir_cf_node_loop);
649    /* Assume that loops are divergent until proven otherwise */
650    loop->divergent_break = true;
651    loop->divergent_continue = true;
652 
653    nir_block *body = nir_block_create(shader);
654    exec_list_make_empty(&loop->body);
655    exec_list_push_tail(&loop->body, &body->cf_node.node);
656    body->cf_node.parent = &loop->cf_node;
657 
658    body->successors[0] = body;
659    _mesa_set_add(body->predecessors, body);
660 
661    exec_list_make_empty(&loop->continue_list);
662 
663    return loop;
664 }
665 
666 static void
instr_init(nir_instr * instr,nir_instr_type type)667 instr_init(nir_instr *instr, nir_instr_type type)
668 {
669    instr->type = type;
670    instr->block = NULL;
671    exec_node_init(&instr->node);
672 }
673 
674 static void
alu_src_init(nir_alu_src * src)675 alu_src_init(nir_alu_src *src)
676 {
677    src_init(&src->src);
678    for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
679       src->swizzle[i] = i;
680 }
681 
682 nir_alu_instr *
nir_alu_instr_create(nir_shader * shader,nir_op op)683 nir_alu_instr_create(nir_shader *shader, nir_op op)
684 {
685    unsigned num_srcs = nir_op_infos[op].num_inputs;
686    nir_alu_instr *instr = gc_zalloc_zla(shader->gctx, nir_alu_instr, nir_alu_src, num_srcs);
687 
688    instr_init(&instr->instr, nir_instr_type_alu);
689    instr->op = op;
690    for (unsigned i = 0; i < num_srcs; i++)
691       alu_src_init(&instr->src[i]);
692 
693    return instr;
694 }
695 
696 nir_deref_instr *
nir_deref_instr_create(nir_shader * shader,nir_deref_type deref_type)697 nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
698 {
699    nir_deref_instr *instr = gc_zalloc(shader->gctx, nir_deref_instr, 1);
700 
701    instr_init(&instr->instr, nir_instr_type_deref);
702 
703    instr->deref_type = deref_type;
704    if (deref_type != nir_deref_type_var)
705       src_init(&instr->parent);
706 
707    if (deref_type == nir_deref_type_array ||
708        deref_type == nir_deref_type_ptr_as_array)
709       src_init(&instr->arr.index);
710 
711    return instr;
712 }
713 
714 nir_jump_instr *
nir_jump_instr_create(nir_shader * shader,nir_jump_type type)715 nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
716 {
717    nir_jump_instr *instr = gc_alloc(shader->gctx, nir_jump_instr, 1);
718    instr_init(&instr->instr, nir_instr_type_jump);
719    src_init(&instr->condition);
720    instr->type = type;
721    instr->target = NULL;
722    instr->else_target = NULL;
723 
724    return instr;
725 }
726 
727 nir_load_const_instr *
nir_load_const_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)728 nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
729                             unsigned bit_size)
730 {
731    nir_load_const_instr *instr =
732       gc_zalloc_zla(shader->gctx, nir_load_const_instr, nir_const_value, num_components);
733    instr_init(&instr->instr, nir_instr_type_load_const);
734 
735    nir_def_init(&instr->instr, &instr->def, num_components, bit_size);
736 
737    return instr;
738 }
739 
740 nir_intrinsic_instr *
nir_intrinsic_instr_create(nir_shader * shader,nir_intrinsic_op op)741 nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
742 {
743    unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
744    nir_intrinsic_instr *instr =
745       gc_zalloc_zla(shader->gctx, nir_intrinsic_instr, nir_src, num_srcs);
746 
747    instr_init(&instr->instr, nir_instr_type_intrinsic);
748    instr->intrinsic = op;
749 
750    for (unsigned i = 0; i < num_srcs; i++)
751       src_init(&instr->src[i]);
752 
753    return instr;
754 }
755 
756 nir_call_instr *
nir_call_instr_create(nir_shader * shader,nir_function * callee)757 nir_call_instr_create(nir_shader *shader, nir_function *callee)
758 {
759    const unsigned num_params = callee->num_params;
760    nir_call_instr *instr =
761       gc_zalloc_zla(shader->gctx, nir_call_instr, nir_src, num_params);
762 
763    instr_init(&instr->instr, nir_instr_type_call);
764    instr->callee = callee;
765    instr->num_params = num_params;
766    for (unsigned i = 0; i < num_params; i++)
767       src_init(&instr->params[i]);
768 
769    return instr;
770 }
771 
772 static int8_t default_tg4_offsets[4][2] = {
773    { 0, 1 },
774    { 1, 1 },
775    { 1, 0 },
776    { 0, 0 },
777 };
778 
779 nir_tex_instr *
nir_tex_instr_create(nir_shader * shader,unsigned num_srcs)780 nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
781 {
782    nir_tex_instr *instr = gc_zalloc(shader->gctx, nir_tex_instr, 1);
783    instr_init(&instr->instr, nir_instr_type_tex);
784 
785    instr->num_srcs = num_srcs;
786    instr->src = gc_alloc(shader->gctx, nir_tex_src, num_srcs);
787    for (unsigned i = 0; i < num_srcs; i++)
788       src_init(&instr->src[i].src);
789 
790    instr->texture_index = 0;
791    instr->sampler_index = 0;
792    memcpy(instr->tg4_offsets, default_tg4_offsets, sizeof(instr->tg4_offsets));
793 
794    return instr;
795 }
796 
797 void
nir_tex_instr_add_src(nir_tex_instr * tex,nir_tex_src_type src_type,nir_def * src)798 nir_tex_instr_add_src(nir_tex_instr *tex,
799                       nir_tex_src_type src_type,
800                       nir_def *src)
801 {
802    nir_tex_src *new_srcs = gc_zalloc(gc_get_context(tex), nir_tex_src, tex->num_srcs + 1);
803 
804    for (unsigned i = 0; i < tex->num_srcs; i++) {
805       new_srcs[i].src_type = tex->src[i].src_type;
806       nir_instr_move_src(&tex->instr, &new_srcs[i].src,
807                          &tex->src[i].src);
808    }
809 
810    gc_free(tex->src);
811    tex->src = new_srcs;
812 
813    tex->src[tex->num_srcs].src_type = src_type;
814    nir_instr_init_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
815    tex->num_srcs++;
816 }
817 
818 void
nir_tex_instr_remove_src(nir_tex_instr * tex,unsigned src_idx)819 nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
820 {
821    assert(src_idx < tex->num_srcs);
822 
823    /* First rewrite the source to NIR_SRC_INIT */
824    nir_instr_clear_src(&tex->instr, &tex->src[src_idx].src);
825 
826    /* Now, move all of the other sources down */
827    for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
828       tex->src[i - 1].src_type = tex->src[i].src_type;
829       nir_instr_move_src(&tex->instr, &tex->src[i - 1].src, &tex->src[i].src);
830    }
831    tex->num_srcs--;
832 }
833 
834 bool
nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr * tex)835 nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex)
836 {
837    if (tex->op != nir_texop_tg4)
838       return false;
839    return memcmp(tex->tg4_offsets, default_tg4_offsets,
840                  sizeof(tex->tg4_offsets)) != 0;
841 }
842 
843 nir_phi_instr *
nir_phi_instr_create(nir_shader * shader)844 nir_phi_instr_create(nir_shader *shader)
845 {
846    nir_phi_instr *instr = gc_alloc(shader->gctx, nir_phi_instr, 1);
847    instr_init(&instr->instr, nir_instr_type_phi);
848 
849    exec_list_make_empty(&instr->srcs);
850 
851    return instr;
852 }
853 
854 /**
855  * Adds a new source to a NIR instruction.
856  *
857  * Note that this does not update the def/use relationship for src, assuming
858  * that the instr is not in the shader.  If it is, you have to do:
859  *
860  * list_addtail(&phi_src->src.use_link, &src.ssa->uses);
861  */
862 nir_phi_src *
nir_phi_instr_add_src(nir_phi_instr * instr,nir_block * pred,nir_def * src)863 nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_def *src)
864 {
865    nir_phi_src *phi_src;
866 
867    phi_src = gc_zalloc(gc_get_context(instr), nir_phi_src, 1);
868    phi_src->pred = pred;
869    phi_src->src = nir_src_for_ssa(src);
870    nir_src_set_parent_instr(&phi_src->src, &instr->instr);
871    exec_list_push_tail(&instr->srcs, &phi_src->node);
872 
873    return phi_src;
874 }
875 
876 nir_parallel_copy_instr *
nir_parallel_copy_instr_create(nir_shader * shader)877 nir_parallel_copy_instr_create(nir_shader *shader)
878 {
879    nir_parallel_copy_instr *instr = gc_alloc(shader->gctx, nir_parallel_copy_instr, 1);
880    instr_init(&instr->instr, nir_instr_type_parallel_copy);
881 
882    exec_list_make_empty(&instr->entries);
883 
884    return instr;
885 }
886 
887 nir_debug_info_instr *
nir_debug_info_instr_create(nir_shader * shader,nir_debug_info_type type,uint32_t string_length)888 nir_debug_info_instr_create(nir_shader *shader, nir_debug_info_type type,
889                             uint32_t string_length)
890 {
891    uint32_t additional_size = 0;
892    if (type == nir_debug_info_string)
893       additional_size = string_length + 1;
894 
895    nir_debug_info_instr *instr = gc_zalloc_size(
896       shader->gctx, sizeof(nir_debug_info_instr) + additional_size, 1);
897    instr_init(&instr->instr, nir_instr_type_debug_info);
898 
899    instr->type = type;
900 
901    if (type == nir_debug_info_string)
902       instr->string_length = string_length;
903 
904    return instr;
905 }
906 
907 nir_undef_instr *
nir_undef_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)908 nir_undef_instr_create(nir_shader *shader,
909                        unsigned num_components,
910                        unsigned bit_size)
911 {
912    nir_undef_instr *instr = gc_alloc(shader->gctx, nir_undef_instr, 1);
913    instr_init(&instr->instr, nir_instr_type_undef);
914 
915    nir_def_init(&instr->instr, &instr->def, num_components, bit_size);
916 
917    return instr;
918 }
919 
920 static nir_const_value
const_value_float(double d,unsigned bit_size)921 const_value_float(double d, unsigned bit_size)
922 {
923    nir_const_value v;
924    memset(&v, 0, sizeof(v));
925 
926    /* clang-format off */
927    switch (bit_size) {
928    case 16: v.u16 = _mesa_float_to_half(d);  break;
929    case 32: v.f32 = d;                       break;
930    case 64: v.f64 = d;                       break;
931    default:
932       unreachable("Invalid bit size");
933    }
934    /* clang-format on */
935 
936    return v;
937 }
938 
939 static nir_const_value
const_value_int(int64_t i,unsigned bit_size)940 const_value_int(int64_t i, unsigned bit_size)
941 {
942    nir_const_value v;
943    memset(&v, 0, sizeof(v));
944 
945    /* clang-format off */
946    switch (bit_size) {
947    case 1:  v.b   = i & 1; break;
948    case 8:  v.i8  = i;     break;
949    case 16: v.i16 = i;     break;
950    case 32: v.i32 = i;     break;
951    case 64: v.i64 = i;     break;
952    default:
953       unreachable("Invalid bit size");
954    }
955    /* clang-format on */
956 
957    return v;
958 }
959 
960 nir_const_value
nir_alu_binop_identity(nir_op binop,unsigned bit_size)961 nir_alu_binop_identity(nir_op binop, unsigned bit_size)
962 {
963    const int64_t max_int = (1ull << (bit_size - 1)) - 1;
964    const int64_t min_int = -max_int - 1;
965    switch (binop) {
966    case nir_op_iadd:
967       return const_value_int(0, bit_size);
968    case nir_op_fadd:
969       return const_value_float(0, bit_size);
970    case nir_op_imul:
971       return const_value_int(1, bit_size);
972    case nir_op_fmul:
973       return const_value_float(1, bit_size);
974    case nir_op_imin:
975       return const_value_int(max_int, bit_size);
976    case nir_op_umin:
977       return const_value_int(~0ull, bit_size);
978    case nir_op_fmin:
979       return const_value_float(INFINITY, bit_size);
980    case nir_op_imax:
981       return const_value_int(min_int, bit_size);
982    case nir_op_umax:
983       return const_value_int(0, bit_size);
984    case nir_op_fmax:
985       return const_value_float(-INFINITY, bit_size);
986    case nir_op_iand:
987       return const_value_int(~0ull, bit_size);
988    case nir_op_ior:
989       return const_value_int(0, bit_size);
990    case nir_op_ixor:
991       return const_value_int(0, bit_size);
992    default:
993       unreachable("Invalid reduction operation");
994    }
995 }
996 
997 nir_function_impl *
nir_cf_node_get_function(nir_cf_node * node)998 nir_cf_node_get_function(nir_cf_node *node)
999 {
1000    while (node->type != nir_cf_node_function) {
1001       node = node->parent;
1002    }
1003 
1004    return nir_cf_node_as_function(node);
1005 }
1006 
1007 /* Reduces a cursor by trying to convert everything to after and trying to
1008  * go up to block granularity when possible.
1009  */
1010 static nir_cursor
reduce_cursor(nir_cursor cursor)1011 reduce_cursor(nir_cursor cursor)
1012 {
1013    switch (cursor.option) {
1014    case nir_cursor_before_block:
1015       if (exec_list_is_empty(&cursor.block->instr_list)) {
1016          /* Empty block.  After is as good as before. */
1017          cursor.option = nir_cursor_after_block;
1018       }
1019       return cursor;
1020 
1021    case nir_cursor_after_block:
1022       return cursor;
1023 
1024    case nir_cursor_before_instr: {
1025       nir_instr *prev_instr = nir_instr_prev(cursor.instr);
1026       if (prev_instr) {
1027          /* Before this instruction is after the previous */
1028          cursor.instr = prev_instr;
1029          cursor.option = nir_cursor_after_instr;
1030       } else {
1031          /* No previous instruction.  Switch to before block */
1032          cursor.block = cursor.instr->block;
1033          cursor.option = nir_cursor_before_block;
1034       }
1035       return reduce_cursor(cursor);
1036    }
1037 
1038    case nir_cursor_after_instr:
1039       if (nir_instr_next(cursor.instr) == NULL) {
1040          /* This is the last instruction, switch to after block */
1041          cursor.option = nir_cursor_after_block;
1042          cursor.block = cursor.instr->block;
1043       }
1044       return cursor;
1045 
1046    default:
1047       unreachable("Inavlid cursor option");
1048    }
1049 }
1050 
1051 bool
nir_cursors_equal(nir_cursor a,nir_cursor b)1052 nir_cursors_equal(nir_cursor a, nir_cursor b)
1053 {
1054    /* Reduced cursors should be unique */
1055    a = reduce_cursor(a);
1056    b = reduce_cursor(b);
1057 
1058    return a.block == b.block && a.option == b.option;
1059 }
1060 
1061 static bool
add_use_cb(nir_src * src,void * state)1062 add_use_cb(nir_src *src, void *state)
1063 {
1064    nir_instr *instr = state;
1065 
1066    nir_src_set_parent_instr(src, instr);
1067    list_addtail(&src->use_link, &src->ssa->uses);
1068 
1069    return true;
1070 }
1071 
1072 static bool
add_ssa_def_cb(nir_def * def,void * state)1073 add_ssa_def_cb(nir_def *def, void *state)
1074 {
1075    nir_instr *instr = state;
1076 
1077    if (instr->block && def->index == UINT_MAX) {
1078       nir_function_impl *impl =
1079          nir_cf_node_get_function(&instr->block->cf_node);
1080 
1081       def->index = impl->ssa_alloc++;
1082 
1083       impl->valid_metadata &= ~nir_metadata_live_defs;
1084    }
1085 
1086    return true;
1087 }
1088 
1089 static void
add_defs_uses(nir_instr * instr)1090 add_defs_uses(nir_instr *instr)
1091 {
1092    nir_foreach_src(instr, add_use_cb, instr);
1093    nir_foreach_def(instr, add_ssa_def_cb, instr);
1094 }
1095 
1096 void
nir_instr_insert(nir_cursor cursor,nir_instr * instr)1097 nir_instr_insert(nir_cursor cursor, nir_instr *instr)
1098 {
1099    switch (cursor.option) {
1100    case nir_cursor_before_block:
1101       /* Only allow inserting jumps into empty blocks. */
1102       if (instr->type == nir_instr_type_jump)
1103          assert(exec_list_is_empty(&cursor.block->instr_list));
1104 
1105       instr->block = cursor.block;
1106       add_defs_uses(instr);
1107       exec_list_push_head(&cursor.block->instr_list, &instr->node);
1108       break;
1109    case nir_cursor_after_block: {
1110       /* Inserting instructions after a jump is illegal. */
1111       nir_instr *last = nir_block_last_instr(cursor.block);
1112       assert(last == NULL || last->type != nir_instr_type_jump);
1113       (void)last;
1114 
1115       instr->block = cursor.block;
1116       add_defs_uses(instr);
1117       exec_list_push_tail(&cursor.block->instr_list, &instr->node);
1118       break;
1119    }
1120    case nir_cursor_before_instr:
1121       assert(instr->type != nir_instr_type_jump);
1122       instr->block = cursor.instr->block;
1123       add_defs_uses(instr);
1124       exec_node_insert_node_before(&cursor.instr->node, &instr->node);
1125       break;
1126    case nir_cursor_after_instr:
1127       /* Inserting instructions after a jump is illegal. */
1128       assert(cursor.instr->type != nir_instr_type_jump);
1129 
1130       /* Only allow inserting jumps at the end of the block. */
1131       if (instr->type == nir_instr_type_jump)
1132          assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
1133 
1134       instr->block = cursor.instr->block;
1135       add_defs_uses(instr);
1136       exec_node_insert_after(&cursor.instr->node, &instr->node);
1137       break;
1138    }
1139 
1140    if (instr->type == nir_instr_type_jump)
1141       nir_handle_add_jump(instr->block);
1142 
1143    nir_function_impl *impl = nir_cf_node_get_function(&instr->block->cf_node);
1144    impl->valid_metadata &= ~nir_metadata_instr_index;
1145 }
1146 
1147 bool
nir_instr_move(nir_cursor cursor,nir_instr * instr)1148 nir_instr_move(nir_cursor cursor, nir_instr *instr)
1149 {
1150    /* If the cursor happens to refer to this instruction (either before or
1151     * after), don't do anything.
1152     */
1153    switch (cursor.option) {
1154    case nir_cursor_before_instr:
1155       if (cursor.instr == instr || nir_instr_prev(cursor.instr) == instr)
1156          return false;
1157       break;
1158    case nir_cursor_after_instr:
1159       if (cursor.instr == instr || nir_instr_next(cursor.instr) == instr)
1160          return false;
1161       break;
1162    case nir_cursor_before_block:
1163       if (cursor.block == instr->block && nir_instr_is_first(instr))
1164          return false;
1165       break;
1166    case nir_cursor_after_block:
1167       if (cursor.block == instr->block && nir_instr_is_last(instr))
1168          return false;
1169       break;
1170    }
1171 
1172    nir_instr_remove(instr);
1173    nir_instr_insert(cursor, instr);
1174    return true;
1175 }
1176 
1177 static bool
src_is_valid(const nir_src * src)1178 src_is_valid(const nir_src *src)
1179 {
1180    return (src->ssa != NULL);
1181 }
1182 
1183 static bool
remove_use_cb(nir_src * src,void * state)1184 remove_use_cb(nir_src *src, void *state)
1185 {
1186    (void)state;
1187 
1188    if (src_is_valid(src))
1189       list_del(&src->use_link);
1190 
1191    return true;
1192 }
1193 
1194 static void
remove_defs_uses(nir_instr * instr)1195 remove_defs_uses(nir_instr *instr)
1196 {
1197    nir_foreach_src(instr, remove_use_cb, instr);
1198 }
1199 
1200 void
nir_instr_remove_v(nir_instr * instr)1201 nir_instr_remove_v(nir_instr *instr)
1202 {
1203    remove_defs_uses(instr);
1204    exec_node_remove(&instr->node);
1205 
1206    if (instr->type == nir_instr_type_jump) {
1207       nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
1208       nir_handle_remove_jump(instr->block, jump_instr->type);
1209    }
1210 }
1211 
1212 void
nir_instr_free(nir_instr * instr)1213 nir_instr_free(nir_instr *instr)
1214 {
1215    switch (instr->type) {
1216    case nir_instr_type_tex:
1217       gc_free(nir_instr_as_tex(instr)->src);
1218       break;
1219 
1220    case nir_instr_type_phi: {
1221       nir_phi_instr *phi = nir_instr_as_phi(instr);
1222       nir_foreach_phi_src_safe(phi_src, phi)
1223          gc_free(phi_src);
1224       break;
1225    }
1226 
1227    default:
1228       break;
1229    }
1230 
1231    gc_free(instr);
1232 }
1233 
1234 void
nir_instr_free_list(struct exec_list * list)1235 nir_instr_free_list(struct exec_list *list)
1236 {
1237    struct exec_node *node;
1238    while ((node = exec_list_pop_head(list))) {
1239       nir_instr *removed_instr = exec_node_data(nir_instr, node, node);
1240       nir_instr_free(removed_instr);
1241    }
1242 }
1243 
1244 static bool
nir_instr_free_and_dce_live_cb(nir_def * def,void * state)1245 nir_instr_free_and_dce_live_cb(nir_def *def, void *state)
1246 {
1247    bool *live = state;
1248 
1249    if (!nir_def_is_unused(def)) {
1250       *live = true;
1251       return false;
1252    } else {
1253       return true;
1254    }
1255 }
1256 
1257 static bool
nir_instr_free_and_dce_is_live(nir_instr * instr)1258 nir_instr_free_and_dce_is_live(nir_instr *instr)
1259 {
1260    /* Note: don't have to worry about jumps because they don't have dests to
1261     * become unused.
1262     */
1263    if (instr->type == nir_instr_type_intrinsic) {
1264       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1265       const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
1266       if (!(info->flags & NIR_INTRINSIC_CAN_ELIMINATE))
1267          return true;
1268    }
1269 
1270    bool live = false;
1271    nir_foreach_def(instr, nir_instr_free_and_dce_live_cb, &live);
1272    return live;
1273 }
1274 
1275 static bool
nir_instr_dce_add_dead_srcs_cb(nir_src * src,void * state)1276 nir_instr_dce_add_dead_srcs_cb(nir_src *src, void *state)
1277 {
1278    nir_instr_worklist *wl = state;
1279 
1280    list_del(&src->use_link);
1281    if (!nir_instr_free_and_dce_is_live(src->ssa->parent_instr))
1282       nir_instr_worklist_push_tail(wl, src->ssa->parent_instr);
1283 
1284    /* Stop nir_instr_remove from trying to delete the link again. */
1285    src->ssa = NULL;
1286 
1287    return true;
1288 }
1289 
1290 static void
nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist * wl,nir_instr * instr)1291 nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist *wl, nir_instr *instr)
1292 {
1293    nir_foreach_src(instr, nir_instr_dce_add_dead_srcs_cb, wl);
1294 }
1295 
1296 /**
1297  * Frees an instruction and any SSA defs that it used that are now dead,
1298  * returning a nir_cursor where the instruction previously was.
1299  */
1300 nir_cursor
nir_instr_free_and_dce(nir_instr * instr)1301 nir_instr_free_and_dce(nir_instr *instr)
1302 {
1303    nir_instr_worklist *worklist = nir_instr_worklist_create();
1304 
1305    nir_instr_dce_add_dead_ssa_srcs(worklist, instr);
1306    nir_cursor c = nir_instr_remove(instr);
1307 
1308    struct exec_list to_free;
1309    exec_list_make_empty(&to_free);
1310 
1311    nir_instr *dce_instr;
1312    while ((dce_instr = nir_instr_worklist_pop_head(worklist))) {
1313       nir_instr_dce_add_dead_ssa_srcs(worklist, dce_instr);
1314 
1315       /* If we're removing the instr where our cursor is, then we have to
1316        * point the cursor elsewhere.
1317        */
1318       if ((c.option == nir_cursor_before_instr ||
1319            c.option == nir_cursor_after_instr) &&
1320           c.instr == dce_instr)
1321          c = nir_instr_remove(dce_instr);
1322       else
1323          nir_instr_remove(dce_instr);
1324       exec_list_push_tail(&to_free, &dce_instr->node);
1325    }
1326 
1327    nir_instr_free_list(&to_free);
1328 
1329    nir_instr_worklist_destroy(worklist);
1330 
1331    return c;
1332 }
1333 
1334 /*@}*/
1335 
1336 nir_def *
nir_instr_def(nir_instr * instr)1337 nir_instr_def(nir_instr *instr)
1338 {
1339    switch (instr->type) {
1340    case nir_instr_type_alu:
1341       return &nir_instr_as_alu(instr)->def;
1342 
1343    case nir_instr_type_deref:
1344       return &nir_instr_as_deref(instr)->def;
1345 
1346    case nir_instr_type_tex:
1347       return &nir_instr_as_tex(instr)->def;
1348 
1349    case nir_instr_type_intrinsic: {
1350       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1351       if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
1352          return &intrin->def;
1353       } else {
1354          return NULL;
1355       }
1356    }
1357 
1358    case nir_instr_type_phi:
1359       return &nir_instr_as_phi(instr)->def;
1360 
1361    case nir_instr_type_parallel_copy:
1362       unreachable("Parallel copies are unsupported by this function");
1363 
1364    case nir_instr_type_load_const:
1365       return &nir_instr_as_load_const(instr)->def;
1366 
1367    case nir_instr_type_undef:
1368       return &nir_instr_as_undef(instr)->def;
1369 
1370    case nir_instr_type_debug_info:
1371       return &nir_instr_as_debug_info(instr)->def;
1372 
1373    case nir_instr_type_call:
1374    case nir_instr_type_jump:
1375       return NULL;
1376    }
1377 
1378    unreachable("Invalid instruction type");
1379 }
1380 
1381 bool
nir_foreach_phi_src_leaving_block(nir_block * block,nir_foreach_src_cb cb,void * state)1382 nir_foreach_phi_src_leaving_block(nir_block *block,
1383                                   nir_foreach_src_cb cb,
1384                                   void *state)
1385 {
1386    for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
1387       if (block->successors[i] == NULL)
1388          continue;
1389 
1390       nir_foreach_phi(phi, block->successors[i]) {
1391          nir_foreach_phi_src(phi_src, phi) {
1392             if (phi_src->pred == block) {
1393                if (!cb(&phi_src->src, state))
1394                   return false;
1395             }
1396          }
1397       }
1398    }
1399 
1400    return true;
1401 }
1402 
1403 nir_const_value
nir_const_value_for_float(double f,unsigned bit_size)1404 nir_const_value_for_float(double f, unsigned bit_size)
1405 {
1406    nir_const_value v;
1407    memset(&v, 0, sizeof(v));
1408 
1409    /* clang-format off */
1410    switch (bit_size) {
1411    case 16: v.u16 = _mesa_float_to_half(f);  break;
1412    case 32: v.f32 = f;                       break;
1413    case 64: v.f64 = f;                       break;
1414    default: unreachable("Invalid bit size");
1415    }
1416    /* clang-format on */
1417 
1418    return v;
1419 }
1420 
1421 double
nir_const_value_as_float(nir_const_value value,unsigned bit_size)1422 nir_const_value_as_float(nir_const_value value, unsigned bit_size)
1423 {
1424    /* clang-format off */
1425    switch (bit_size) {
1426    case 16: return _mesa_half_to_float(value.u16);
1427    case 32: return value.f32;
1428    case 64: return value.f64;
1429    default: unreachable("Invalid bit size");
1430    }
1431    /* clang-format on */
1432 }
1433 
1434 nir_const_value *
nir_src_as_const_value(nir_src src)1435 nir_src_as_const_value(nir_src src)
1436 {
1437    if (src.ssa->parent_instr->type != nir_instr_type_load_const)
1438       return NULL;
1439 
1440    nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1441 
1442    return load->value;
1443 }
1444 
1445 const char *
nir_src_as_string(nir_src src)1446 nir_src_as_string(nir_src src)
1447 {
1448    nir_debug_info_instr *di = nir_src_as_debug_info(src);
1449    if (di && di->type == nir_debug_info_string)
1450       return di->string;
1451 
1452    return NULL;
1453 }
1454 
1455 /**
1456  * Returns true if the source is known to be always uniform. Otherwise it
1457  * returns false which means it may or may not be uniform but it can't be
1458  * determined.
1459  *
1460  * For a more precise analysis of uniform values, use nir_divergence_analysis.
1461  */
1462 bool
nir_src_is_always_uniform(nir_src src)1463 nir_src_is_always_uniform(nir_src src)
1464 {
1465    /* Constants are trivially uniform */
1466    if (src.ssa->parent_instr->type == nir_instr_type_load_const)
1467       return true;
1468 
1469    if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
1470       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
1471       /* As are uniform variables */
1472       if (intr->intrinsic == nir_intrinsic_load_uniform &&
1473           nir_src_is_always_uniform(intr->src[0]))
1474          return true;
1475       /* From the Vulkan specification 15.6.1. Push Constant Interface:
1476        * "Any member of a push constant block that is declared as an array must
1477        * only be accessed with dynamically uniform indices."
1478        */
1479       if (intr->intrinsic == nir_intrinsic_load_push_constant)
1480          return true;
1481       if (intr->intrinsic == nir_intrinsic_load_deref &&
1482           nir_deref_mode_is(nir_src_as_deref(intr->src[0]), nir_var_mem_push_const))
1483          return true;
1484    }
1485 
1486    /* Operating together uniform expressions produces a uniform result */
1487    if (src.ssa->parent_instr->type == nir_instr_type_alu) {
1488       nir_alu_instr *alu = nir_instr_as_alu(src.ssa->parent_instr);
1489       for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
1490          if (!nir_src_is_always_uniform(alu->src[i].src))
1491             return false;
1492       }
1493 
1494       return true;
1495    }
1496 
1497    /* XXX: this could have many more tests, such as when a sampler function is
1498     * called with uniform arguments.
1499     */
1500    return false;
1501 }
1502 
1503 nir_block *
nir_src_get_block(nir_src * src)1504 nir_src_get_block(nir_src *src)
1505 {
1506    if (nir_src_is_if(src))
1507       return nir_cf_node_cf_tree_prev(&nir_src_parent_if(src)->cf_node);
1508    else if (nir_src_parent_instr(src)->type == nir_instr_type_phi)
1509       return list_entry(src, nir_phi_src, src)->pred;
1510    else
1511       return nir_src_parent_instr(src)->block;
1512 }
1513 
1514 static void
src_remove_all_uses(nir_src * src)1515 src_remove_all_uses(nir_src *src)
1516 {
1517    if (src && src_is_valid(src))
1518       list_del(&src->use_link);
1519 }
1520 
1521 static void
src_add_all_uses(nir_src * src,nir_instr * parent_instr,nir_if * parent_if)1522 src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
1523 {
1524    if (!src)
1525       return;
1526 
1527    if (!src_is_valid(src))
1528       return;
1529 
1530    if (parent_instr) {
1531       nir_src_set_parent_instr(src, parent_instr);
1532    } else {
1533       assert(parent_if);
1534       nir_src_set_parent_if(src, parent_if);
1535    }
1536 
1537    list_addtail(&src->use_link, &src->ssa->uses);
1538 }
1539 
1540 void
nir_instr_init_src(nir_instr * instr,nir_src * src,nir_def * def)1541 nir_instr_init_src(nir_instr *instr, nir_src *src, nir_def *def)
1542 {
1543    *src = nir_src_for_ssa(def);
1544    src_add_all_uses(src, instr, NULL);
1545 }
1546 
1547 void
nir_instr_clear_src(nir_instr * instr,nir_src * src)1548 nir_instr_clear_src(nir_instr *instr, nir_src *src)
1549 {
1550    src_remove_all_uses(src);
1551    *src = NIR_SRC_INIT;
1552 }
1553 
1554 void
nir_instr_move_src(nir_instr * dest_instr,nir_src * dest,nir_src * src)1555 nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
1556 {
1557    assert(!src_is_valid(dest) || nir_src_parent_instr(dest) == dest_instr);
1558 
1559    src_remove_all_uses(dest);
1560    src_remove_all_uses(src);
1561    *dest = *src;
1562    *src = NIR_SRC_INIT;
1563    src_add_all_uses(dest, dest_instr, NULL);
1564 }
1565 
1566 void
nir_def_init(nir_instr * instr,nir_def * def,unsigned num_components,unsigned bit_size)1567 nir_def_init(nir_instr *instr, nir_def *def,
1568              unsigned num_components,
1569              unsigned bit_size)
1570 {
1571    def->parent_instr = instr;
1572    list_inithead(&def->uses);
1573    def->num_components = num_components;
1574    def->bit_size = bit_size;
1575    def->divergent = true; /* This is the safer default */
1576    def->loop_invariant = false;
1577 
1578    if (instr->block) {
1579       nir_function_impl *impl =
1580          nir_cf_node_get_function(&instr->block->cf_node);
1581 
1582       def->index = impl->ssa_alloc++;
1583 
1584       impl->valid_metadata &= ~nir_metadata_live_defs;
1585    } else {
1586       def->index = UINT_MAX;
1587    }
1588 }
1589 
1590 void
nir_def_rewrite_uses(nir_def * def,nir_def * new_ssa)1591 nir_def_rewrite_uses(nir_def *def, nir_def *new_ssa)
1592 {
1593    assert(def != new_ssa);
1594    nir_foreach_use_including_if_safe(use_src, def) {
1595       nir_src_rewrite(use_src, new_ssa);
1596    }
1597 }
1598 
1599 void
nir_def_rewrite_uses_src(nir_def * def,nir_src new_src)1600 nir_def_rewrite_uses_src(nir_def *def, nir_src new_src)
1601 {
1602    nir_def_rewrite_uses(def, new_src.ssa);
1603 }
1604 
1605 bool
nir_instr_is_before(nir_instr * first,nir_instr * second)1606 nir_instr_is_before(nir_instr *first, nir_instr *second)
1607 {
1608    if (first->block != second->block)
1609       return false;
1610 
1611    /* Search backwards looking for "first" */
1612    while (second != nir_block_first_instr(second->block)) {
1613       second = nir_instr_prev(second);
1614       assert(second);
1615 
1616       if (first == second)
1617          return true;
1618    }
1619 
1620    return false;
1621 }
1622 
1623 static bool
is_instr_between(nir_instr * start,nir_instr * end,nir_instr * between)1624 is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
1625 {
1626    assert(start->block == end->block);
1627 
1628    if (between->block != start->block)
1629       return false;
1630 
1631    /* Search backwards looking for "between" */
1632    while (start != end) {
1633       if (between == end)
1634          return true;
1635 
1636       end = nir_instr_prev(end);
1637       assert(end);
1638    }
1639 
1640    return false;
1641 }
1642 
1643 /* Replaces all uses of the given SSA def with the given source but only if
1644  * the use comes after the after_me instruction.  This can be useful if you
1645  * are emitting code to fix up the result of some instruction: you can freely
1646  * use the result in that code and then call rewrite_uses_after and pass the
1647  * last fixup instruction as after_me and it will replace all of the uses you
1648  * want without touching the fixup code.
1649  *
1650  * This function assumes that after_me is in the same block as
1651  * def->parent_instr and that after_me comes after def->parent_instr.
1652  */
1653 void
nir_def_rewrite_uses_after(nir_def * def,nir_def * new_ssa,nir_instr * after_me)1654 nir_def_rewrite_uses_after(nir_def *def, nir_def *new_ssa,
1655                            nir_instr *after_me)
1656 {
1657    if (def == new_ssa)
1658       return;
1659 
1660    nir_foreach_use_including_if_safe(use_src, def) {
1661       if (!nir_src_is_if(use_src)) {
1662          assert(nir_src_parent_instr(use_src) != def->parent_instr);
1663 
1664          /* Since def already dominates all of its uses, the only way a use can
1665           * not be dominated by after_me is if it is between def and after_me in
1666           * the instruction list.
1667           */
1668          if (is_instr_between(def->parent_instr, after_me, nir_src_parent_instr(use_src)))
1669             continue;
1670       }
1671 
1672       nir_src_rewrite(use_src, new_ssa);
1673    }
1674 }
1675 
1676 static nir_def *
get_store_value(nir_intrinsic_instr * intrin)1677 get_store_value(nir_intrinsic_instr *intrin)
1678 {
1679    assert(nir_intrinsic_has_write_mask(intrin));
1680    /* deref stores have the deref in src[0] and the store value in src[1] */
1681    if (intrin->intrinsic == nir_intrinsic_store_deref ||
1682        intrin->intrinsic == nir_intrinsic_store_deref_block_intel)
1683       return intrin->src[1].ssa;
1684 
1685    /* all other stores have the store value in src[0] */
1686    return intrin->src[0].ssa;
1687 }
1688 
1689 nir_component_mask_t
nir_src_components_read(const nir_src * src)1690 nir_src_components_read(const nir_src *src)
1691 {
1692    assert(nir_src_parent_instr(src));
1693 
1694    if (nir_src_parent_instr(src)->type == nir_instr_type_alu) {
1695       nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(src));
1696       nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src);
1697       int src_idx = alu_src - &alu->src[0];
1698       assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
1699       return nir_alu_instr_src_read_mask(alu, src_idx);
1700    } else if (nir_src_parent_instr(src)->type == nir_instr_type_intrinsic) {
1701       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(nir_src_parent_instr(src));
1702       if (nir_intrinsic_has_write_mask(intrin) && src->ssa == get_store_value(intrin))
1703          return nir_intrinsic_write_mask(intrin);
1704       else
1705          return (1 << src->ssa->num_components) - 1;
1706    } else {
1707       return (1 << src->ssa->num_components) - 1;
1708    }
1709 }
1710 
1711 nir_component_mask_t
nir_def_components_read(const nir_def * def)1712 nir_def_components_read(const nir_def *def)
1713 {
1714    nir_component_mask_t read_mask = 0;
1715 
1716    nir_foreach_use_including_if(use, def) {
1717       read_mask |= nir_src_is_if(use) ? 1 : nir_src_components_read(use);
1718 
1719       if (read_mask == (1 << def->num_components) - 1)
1720          return read_mask;
1721    }
1722 
1723    return read_mask;
1724 }
1725 
1726 bool
nir_def_all_uses_are_fsat(const nir_def * def)1727 nir_def_all_uses_are_fsat(const nir_def *def)
1728 {
1729    nir_foreach_use(src, def) {
1730       if (nir_src_is_if(src))
1731          return false;
1732 
1733       nir_instr *use = nir_src_parent_instr(src);
1734       if (use->type != nir_instr_type_alu)
1735          return false;
1736 
1737       nir_alu_instr *alu = nir_instr_as_alu(use);
1738       if (alu->op != nir_op_fsat)
1739          return false;
1740    }
1741 
1742    return true;
1743 }
1744 
1745 bool
nir_def_all_uses_ignore_sign_bit(const nir_def * def)1746 nir_def_all_uses_ignore_sign_bit(const nir_def *def)
1747 {
1748    nir_foreach_use(use, def) {
1749       if (nir_src_is_if(use))
1750          return false;
1751       nir_instr *instr = nir_src_parent_instr(use);
1752 
1753       if (instr->type != nir_instr_type_alu)
1754          return false;
1755 
1756       nir_alu_instr *alu = nir_instr_as_alu(instr);
1757       if (alu->op == nir_op_fabs) {
1758          continue;
1759       } else if (alu->op == nir_op_fmul || alu->op == nir_op_ffma) {
1760          nir_alu_src *alu_src = list_entry(use, nir_alu_src, src);
1761          unsigned src_index = alu_src - alu->src;
1762          /* a * a doesn't care about sign of a. */
1763          if (src_index < 2 && nir_alu_srcs_equal(alu, alu, 0, 1))
1764             continue;
1765       }
1766 
1767       return false;
1768    }
1769    return true;
1770 }
1771 
1772 nir_block *
nir_block_unstructured_next(nir_block * block)1773 nir_block_unstructured_next(nir_block *block)
1774 {
1775    if (block == NULL) {
1776       /* nir_foreach_block_unstructured_safe() will call this function on a
1777        * NULL block after the last iteration, but it won't use the result so
1778        * just return NULL here.
1779        */
1780       return NULL;
1781    }
1782 
1783    nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1784    if (cf_next == NULL && block->cf_node.parent->type == nir_cf_node_function)
1785       return NULL;
1786 
1787    if (cf_next && cf_next->type == nir_cf_node_block)
1788       return nir_cf_node_as_block(cf_next);
1789 
1790    return nir_block_cf_tree_next(block);
1791 }
1792 
1793 nir_block *
nir_unstructured_start_block(nir_function_impl * impl)1794 nir_unstructured_start_block(nir_function_impl *impl)
1795 {
1796    return nir_start_block(impl);
1797 }
1798 
1799 nir_block *
nir_block_cf_tree_next(nir_block * block)1800 nir_block_cf_tree_next(nir_block *block)
1801 {
1802    if (block == NULL) {
1803       /* nir_foreach_block_safe() will call this function on a NULL block
1804        * after the last iteration, but it won't use the result so just return
1805        * NULL here.
1806        */
1807       return NULL;
1808    }
1809 
1810    assert(nir_cf_node_get_function(&block->cf_node)->structured);
1811 
1812    nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1813    if (cf_next)
1814       return nir_cf_node_cf_tree_first(cf_next);
1815 
1816    nir_cf_node *parent = block->cf_node.parent;
1817    if (parent->type == nir_cf_node_function)
1818       return NULL;
1819 
1820    /* Is this the last block of a cf_node? Return the following block */
1821    if (block == nir_cf_node_cf_tree_last(parent))
1822       return nir_cf_node_as_block(nir_cf_node_next(parent));
1823 
1824    switch (parent->type) {
1825    case nir_cf_node_if: {
1826       /* We are at the end of the if. Go to the beginning of the else */
1827       nir_if *if_stmt = nir_cf_node_as_if(parent);
1828       assert(block == nir_if_last_then_block(if_stmt));
1829       return nir_if_first_else_block(if_stmt);
1830    }
1831 
1832    case nir_cf_node_loop: {
1833       /* We are at the end of the body and there is a continue construct */
1834       nir_loop *loop = nir_cf_node_as_loop(parent);
1835       assert(block == nir_loop_last_block(loop) &&
1836              nir_loop_has_continue_construct(loop));
1837       return nir_loop_first_continue_block(loop);
1838    }
1839 
1840    default:
1841       unreachable("unknown cf node type");
1842    }
1843 }
1844 
1845 nir_block *
nir_block_cf_tree_prev(nir_block * block)1846 nir_block_cf_tree_prev(nir_block *block)
1847 {
1848    if (block == NULL) {
1849       /* do this for consistency with nir_block_cf_tree_next() */
1850       return NULL;
1851    }
1852 
1853    assert(nir_cf_node_get_function(&block->cf_node)->structured);
1854 
1855    nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
1856    if (cf_prev)
1857       return nir_cf_node_cf_tree_last(cf_prev);
1858 
1859    nir_cf_node *parent = block->cf_node.parent;
1860    if (parent->type == nir_cf_node_function)
1861       return NULL;
1862 
1863    /* Is this the first block of a cf_node? Return the previous block */
1864    if (block == nir_cf_node_cf_tree_first(parent))
1865       return nir_cf_node_as_block(nir_cf_node_prev(parent));
1866 
1867    switch (parent->type) {
1868    case nir_cf_node_if: {
1869       /* We are at the beginning of the else. Go to the end of the if */
1870       nir_if *if_stmt = nir_cf_node_as_if(parent);
1871       assert(block == nir_if_first_else_block(if_stmt));
1872       return nir_if_last_then_block(if_stmt);
1873    }
1874    case nir_cf_node_loop: {
1875       /* We are at the beginning of the continue construct. */
1876       nir_loop *loop = nir_cf_node_as_loop(parent);
1877       assert(nir_loop_has_continue_construct(loop) &&
1878              block == nir_loop_first_continue_block(loop));
1879       return nir_loop_last_block(loop);
1880    }
1881 
1882    default:
1883       unreachable("unknown cf node type");
1884    }
1885 }
1886 
1887 nir_block *
nir_cf_node_cf_tree_first(nir_cf_node * node)1888 nir_cf_node_cf_tree_first(nir_cf_node *node)
1889 {
1890    switch (node->type) {
1891    case nir_cf_node_function: {
1892       nir_function_impl *impl = nir_cf_node_as_function(node);
1893       return nir_start_block(impl);
1894    }
1895 
1896    case nir_cf_node_if: {
1897       nir_if *if_stmt = nir_cf_node_as_if(node);
1898       return nir_if_first_then_block(if_stmt);
1899    }
1900 
1901    case nir_cf_node_loop: {
1902       nir_loop *loop = nir_cf_node_as_loop(node);
1903       return nir_loop_first_block(loop);
1904    }
1905 
1906    case nir_cf_node_block: {
1907       return nir_cf_node_as_block(node);
1908    }
1909 
1910    default:
1911       unreachable("unknown node type");
1912    }
1913 }
1914 
1915 nir_block *
nir_cf_node_cf_tree_last(nir_cf_node * node)1916 nir_cf_node_cf_tree_last(nir_cf_node *node)
1917 {
1918    switch (node->type) {
1919    case nir_cf_node_function: {
1920       nir_function_impl *impl = nir_cf_node_as_function(node);
1921       return nir_impl_last_block(impl);
1922    }
1923 
1924    case nir_cf_node_if: {
1925       nir_if *if_stmt = nir_cf_node_as_if(node);
1926       return nir_if_last_else_block(if_stmt);
1927    }
1928 
1929    case nir_cf_node_loop: {
1930       nir_loop *loop = nir_cf_node_as_loop(node);
1931       if (nir_loop_has_continue_construct(loop))
1932          return nir_loop_last_continue_block(loop);
1933       else
1934          return nir_loop_last_block(loop);
1935    }
1936 
1937    case nir_cf_node_block: {
1938       return nir_cf_node_as_block(node);
1939    }
1940 
1941    default:
1942       unreachable("unknown node type");
1943    }
1944 }
1945 
1946 nir_block *
nir_cf_node_cf_tree_next(nir_cf_node * node)1947 nir_cf_node_cf_tree_next(nir_cf_node *node)
1948 {
1949    if (node->type == nir_cf_node_block)
1950       return nir_block_cf_tree_next(nir_cf_node_as_block(node));
1951    else if (node->type == nir_cf_node_function)
1952       return NULL;
1953    else
1954       return nir_cf_node_as_block(nir_cf_node_next(node));
1955 }
1956 
1957 nir_block *
nir_cf_node_cf_tree_prev(nir_cf_node * node)1958 nir_cf_node_cf_tree_prev(nir_cf_node *node)
1959 {
1960    if (node->type == nir_cf_node_block)
1961       return nir_block_cf_tree_prev(nir_cf_node_as_block(node));
1962    else if (node->type == nir_cf_node_function)
1963       return NULL;
1964    else
1965       return nir_cf_node_as_block(nir_cf_node_prev(node));
1966 }
1967 
1968 nir_if *
nir_block_get_following_if(nir_block * block)1969 nir_block_get_following_if(nir_block *block)
1970 {
1971    if (exec_node_is_tail_sentinel(&block->cf_node.node))
1972       return NULL;
1973 
1974    if (nir_cf_node_is_last(&block->cf_node))
1975       return NULL;
1976 
1977    nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1978 
1979    if (next_node->type != nir_cf_node_if)
1980       return NULL;
1981 
1982    return nir_cf_node_as_if(next_node);
1983 }
1984 
1985 nir_loop *
nir_block_get_following_loop(nir_block * block)1986 nir_block_get_following_loop(nir_block *block)
1987 {
1988    if (exec_node_is_tail_sentinel(&block->cf_node.node))
1989       return NULL;
1990 
1991    if (nir_cf_node_is_last(&block->cf_node))
1992       return NULL;
1993 
1994    nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1995 
1996    if (next_node->type != nir_cf_node_loop)
1997       return NULL;
1998 
1999    return nir_cf_node_as_loop(next_node);
2000 }
2001 
2002 static int
compare_block_index(const void * p1,const void * p2)2003 compare_block_index(const void *p1, const void *p2)
2004 {
2005    const nir_block *block1 = *((const nir_block **)p1);
2006    const nir_block *block2 = *((const nir_block **)p2);
2007 
2008    return (int)block1->index - (int)block2->index;
2009 }
2010 
2011 nir_block **
nir_block_get_predecessors_sorted(const nir_block * block,void * mem_ctx)2012 nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx)
2013 {
2014    nir_block **preds =
2015       ralloc_array(mem_ctx, nir_block *, block->predecessors->entries);
2016 
2017    unsigned i = 0;
2018    set_foreach(block->predecessors, entry)
2019       preds[i++] = (nir_block *)entry->key;
2020    assert(i == block->predecessors->entries);
2021 
2022    qsort(preds, block->predecessors->entries, sizeof(nir_block *),
2023          compare_block_index);
2024 
2025    return preds;
2026 }
2027 
2028 void
nir_index_blocks(nir_function_impl * impl)2029 nir_index_blocks(nir_function_impl *impl)
2030 {
2031    unsigned index = 0;
2032 
2033    if (impl->valid_metadata & nir_metadata_block_index)
2034       return;
2035 
2036    nir_foreach_block_unstructured(block, impl) {
2037       block->index = index++;
2038    }
2039 
2040    /* The end_block isn't really part of the program, which is why its index
2041     * is >= num_blocks.
2042     */
2043    impl->num_blocks = impl->end_block->index = index;
2044 }
2045 
2046 static bool
index_ssa_def_cb(nir_def * def,void * state)2047 index_ssa_def_cb(nir_def *def, void *state)
2048 {
2049    unsigned *index = (unsigned *)state;
2050    def->index = (*index)++;
2051 
2052    return true;
2053 }
2054 
2055 /**
2056  * The indices are applied top-to-bottom which has the very nice property
2057  * that, if A dominates B, then A->index <= B->index.
2058  */
2059 void
nir_index_ssa_defs(nir_function_impl * impl)2060 nir_index_ssa_defs(nir_function_impl *impl)
2061 {
2062    unsigned index = 0;
2063 
2064    impl->valid_metadata &= ~nir_metadata_live_defs;
2065 
2066    nir_foreach_block_unstructured(block, impl) {
2067       nir_foreach_instr(instr, block)
2068          nir_foreach_def(instr, index_ssa_def_cb, &index);
2069    }
2070 
2071    impl->ssa_alloc = index;
2072 }
2073 
2074 /**
2075  * The indices are applied top-to-bottom which has the very nice property
2076  * that, if A dominates B, then A->index <= B->index.
2077  */
2078 unsigned
nir_index_instrs(nir_function_impl * impl)2079 nir_index_instrs(nir_function_impl *impl)
2080 {
2081    unsigned index = 0;
2082 
2083    nir_foreach_block(block, impl) {
2084       block->start_ip = index++;
2085 
2086       nir_foreach_instr(instr, block)
2087          instr->index = index++;
2088 
2089       block->end_ip = index++;
2090    }
2091 
2092    return index;
2093 }
2094 
2095 void
nir_shader_clear_pass_flags(nir_shader * shader)2096 nir_shader_clear_pass_flags(nir_shader *shader)
2097 {
2098    nir_foreach_function_impl(impl, shader) {
2099       nir_foreach_block(block, impl) {
2100          nir_foreach_instr(instr, block) {
2101             instr->pass_flags = 0;
2102          }
2103       }
2104    }
2105 }
2106 
2107 unsigned
nir_shader_index_vars(nir_shader * shader,nir_variable_mode modes)2108 nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes)
2109 {
2110    unsigned count = 0;
2111    nir_foreach_variable_with_modes(var, shader, modes)
2112       var->index = count++;
2113    return count;
2114 }
2115 
2116 unsigned
nir_function_impl_index_vars(nir_function_impl * impl)2117 nir_function_impl_index_vars(nir_function_impl *impl)
2118 {
2119    unsigned count = 0;
2120    nir_foreach_function_temp_variable(var, impl)
2121       var->index = count++;
2122    return count;
2123 }
2124 
2125 static nir_instr *
cursor_next_instr(nir_cursor cursor)2126 cursor_next_instr(nir_cursor cursor)
2127 {
2128    switch (cursor.option) {
2129    case nir_cursor_before_block:
2130       for (nir_block *block = cursor.block; block;
2131            block = nir_block_cf_tree_next(block)) {
2132          nir_instr *instr = nir_block_first_instr(block);
2133          if (instr)
2134             return instr;
2135       }
2136       return NULL;
2137 
2138    case nir_cursor_after_block:
2139       cursor.block = nir_block_cf_tree_next(cursor.block);
2140       if (cursor.block == NULL)
2141          return NULL;
2142 
2143       cursor.option = nir_cursor_before_block;
2144       return cursor_next_instr(cursor);
2145 
2146    case nir_cursor_before_instr:
2147       return cursor.instr;
2148 
2149    case nir_cursor_after_instr:
2150       if (nir_instr_next(cursor.instr))
2151          return nir_instr_next(cursor.instr);
2152 
2153       cursor.option = nir_cursor_after_block;
2154       cursor.block = cursor.instr->block;
2155       return cursor_next_instr(cursor);
2156    }
2157 
2158    unreachable("Inavlid cursor option");
2159 }
2160 
2161 bool
nir_function_impl_lower_instructions(nir_function_impl * impl,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2162 nir_function_impl_lower_instructions(nir_function_impl *impl,
2163                                      nir_instr_filter_cb filter,
2164                                      nir_lower_instr_cb lower,
2165                                      void *cb_data)
2166 {
2167    nir_builder b = nir_builder_create(impl);
2168 
2169    nir_metadata preserved = nir_metadata_control_flow;
2170 
2171    bool progress = false;
2172    nir_cursor iter = nir_before_impl(impl);
2173    nir_instr *instr;
2174    while ((instr = cursor_next_instr(iter)) != NULL) {
2175       if (filter && !filter(instr, cb_data)) {
2176          iter = nir_after_instr(instr);
2177          continue;
2178       }
2179 
2180       nir_def *old_def = nir_instr_def(instr);
2181       struct list_head old_uses;
2182       if (old_def != NULL) {
2183          /* We're about to ask the callback to generate a replacement for instr.
2184           * Save off the uses from instr's SSA def so we know what uses to
2185           * rewrite later.  If we use nir_def_rewrite_uses, it fails in the
2186           * case where the generated replacement code uses the result of instr
2187           * itself.  If we use nir_def_rewrite_uses_after (which is the
2188           * normal solution to this problem), it doesn't work well if control-
2189           * flow is inserted as part of the replacement, doesn't handle cases
2190           * where the replacement is something consumed by instr, and suffers
2191           * from performance issues.  This is the only way to 100% guarantee
2192           * that we rewrite the correct set efficiently.
2193           */
2194 
2195          list_replace(&old_def->uses, &old_uses);
2196          list_inithead(&old_def->uses);
2197       }
2198 
2199       b.cursor = nir_after_instr(instr);
2200       nir_def *new_def = lower(&b, instr, cb_data);
2201       if (new_def && new_def != NIR_LOWER_INSTR_PROGRESS &&
2202           new_def != NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2203          assert(old_def != NULL);
2204          if (new_def->parent_instr->block != instr->block)
2205             preserved = nir_metadata_none;
2206 
2207          list_for_each_entry_safe(nir_src, use_src, &old_uses, use_link)
2208             nir_src_rewrite(use_src, new_def);
2209 
2210          if (nir_def_is_unused(old_def)) {
2211             iter = nir_instr_free_and_dce(instr);
2212          } else {
2213             iter = nir_after_instr(instr);
2214          }
2215          progress = true;
2216       } else {
2217          /* We didn't end up lowering after all.  Put the uses back */
2218          if (old_def)
2219             list_replace(&old_uses, &old_def->uses);
2220 
2221          if (new_def == NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2222             /* Only instructions without a return value can be removed like this */
2223             assert(!old_def);
2224             iter = nir_instr_free_and_dce(instr);
2225             progress = true;
2226          } else
2227             iter = nir_after_instr(instr);
2228 
2229          if (new_def == NIR_LOWER_INSTR_PROGRESS)
2230             progress = true;
2231       }
2232    }
2233 
2234    if (progress) {
2235       nir_metadata_preserve(impl, preserved);
2236    } else {
2237       nir_metadata_preserve(impl, nir_metadata_all);
2238    }
2239 
2240    return progress;
2241 }
2242 
2243 bool
nir_shader_lower_instructions(nir_shader * shader,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2244 nir_shader_lower_instructions(nir_shader *shader,
2245                               nir_instr_filter_cb filter,
2246                               nir_lower_instr_cb lower,
2247                               void *cb_data)
2248 {
2249    bool progress = false;
2250 
2251    nir_foreach_function_impl(impl, shader) {
2252       if (nir_function_impl_lower_instructions(impl, filter, lower, cb_data))
2253          progress = true;
2254    }
2255 
2256    return progress;
2257 }
2258 
2259 /**
2260  * Returns true if the shader supports quad-based implicit derivatives on
2261  * texture sampling.
2262  */
2263 bool
nir_shader_supports_implicit_lod(nir_shader * shader)2264 nir_shader_supports_implicit_lod(nir_shader *shader)
2265 {
2266    return (shader->info.stage == MESA_SHADER_FRAGMENT ||
2267            (gl_shader_stage_uses_workgroup(shader->info.stage) &&
2268             shader->info.derivative_group != DERIVATIVE_GROUP_NONE));
2269 }
2270 
2271 nir_intrinsic_op
nir_intrinsic_from_system_value(gl_system_value val)2272 nir_intrinsic_from_system_value(gl_system_value val)
2273 {
2274    switch (val) {
2275    case SYSTEM_VALUE_VERTEX_ID:
2276       return nir_intrinsic_load_vertex_id;
2277    case SYSTEM_VALUE_INSTANCE_ID:
2278       return nir_intrinsic_load_instance_id;
2279    case SYSTEM_VALUE_DRAW_ID:
2280       return nir_intrinsic_load_draw_id;
2281    case SYSTEM_VALUE_BASE_INSTANCE:
2282       return nir_intrinsic_load_base_instance;
2283    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
2284       return nir_intrinsic_load_vertex_id_zero_base;
2285    case SYSTEM_VALUE_IS_INDEXED_DRAW:
2286       return nir_intrinsic_load_is_indexed_draw;
2287    case SYSTEM_VALUE_FIRST_VERTEX:
2288       return nir_intrinsic_load_first_vertex;
2289    case SYSTEM_VALUE_BASE_VERTEX:
2290       return nir_intrinsic_load_base_vertex;
2291    case SYSTEM_VALUE_INVOCATION_ID:
2292       return nir_intrinsic_load_invocation_id;
2293    case SYSTEM_VALUE_FRAG_COORD:
2294       return nir_intrinsic_load_frag_coord;
2295    case SYSTEM_VALUE_PIXEL_COORD:
2296       return nir_intrinsic_load_pixel_coord;
2297    case SYSTEM_VALUE_POINT_COORD:
2298       return nir_intrinsic_load_point_coord;
2299    case SYSTEM_VALUE_LINE_COORD:
2300       return nir_intrinsic_load_line_coord;
2301    case SYSTEM_VALUE_FRONT_FACE:
2302       return nir_intrinsic_load_front_face;
2303    case SYSTEM_VALUE_FRONT_FACE_FSIGN:
2304       return nir_intrinsic_load_front_face_fsign;
2305    case SYSTEM_VALUE_SAMPLE_ID:
2306       return nir_intrinsic_load_sample_id;
2307    case SYSTEM_VALUE_SAMPLE_POS:
2308       return nir_intrinsic_load_sample_pos;
2309    case SYSTEM_VALUE_SAMPLE_POS_OR_CENTER:
2310       return nir_intrinsic_load_sample_pos_or_center;
2311    case SYSTEM_VALUE_SAMPLE_MASK_IN:
2312       return nir_intrinsic_load_sample_mask_in;
2313    case SYSTEM_VALUE_LAYER_ID:
2314       return nir_intrinsic_load_layer_id;
2315    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
2316       return nir_intrinsic_load_local_invocation_id;
2317    case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
2318       return nir_intrinsic_load_local_invocation_index;
2319    case SYSTEM_VALUE_WORKGROUP_ID:
2320       return nir_intrinsic_load_workgroup_id;
2321    case SYSTEM_VALUE_BASE_WORKGROUP_ID:
2322       return nir_intrinsic_load_base_workgroup_id;
2323    case SYSTEM_VALUE_WORKGROUP_INDEX:
2324       return nir_intrinsic_load_workgroup_index;
2325    case SYSTEM_VALUE_NUM_WORKGROUPS:
2326       return nir_intrinsic_load_num_workgroups;
2327    case SYSTEM_VALUE_PRIMITIVE_ID:
2328       return nir_intrinsic_load_primitive_id;
2329    case SYSTEM_VALUE_TESS_COORD:
2330       return nir_intrinsic_load_tess_coord;
2331    case SYSTEM_VALUE_TESS_LEVEL_OUTER:
2332       return nir_intrinsic_load_tess_level_outer;
2333    case SYSTEM_VALUE_TESS_LEVEL_INNER:
2334       return nir_intrinsic_load_tess_level_inner;
2335    case SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT:
2336       return nir_intrinsic_load_tess_level_outer_default;
2337    case SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT:
2338       return nir_intrinsic_load_tess_level_inner_default;
2339    case SYSTEM_VALUE_VERTICES_IN:
2340       return nir_intrinsic_load_patch_vertices_in;
2341    case SYSTEM_VALUE_HELPER_INVOCATION:
2342       return nir_intrinsic_load_helper_invocation;
2343    case SYSTEM_VALUE_COLOR0:
2344       return nir_intrinsic_load_color0;
2345    case SYSTEM_VALUE_COLOR1:
2346       return nir_intrinsic_load_color1;
2347    case SYSTEM_VALUE_VIEW_INDEX:
2348       return nir_intrinsic_load_view_index;
2349    case SYSTEM_VALUE_SUBGROUP_SIZE:
2350       return nir_intrinsic_load_subgroup_size;
2351    case SYSTEM_VALUE_SUBGROUP_INVOCATION:
2352       return nir_intrinsic_load_subgroup_invocation;
2353    case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
2354       return nir_intrinsic_load_subgroup_eq_mask;
2355    case SYSTEM_VALUE_SUBGROUP_GE_MASK:
2356       return nir_intrinsic_load_subgroup_ge_mask;
2357    case SYSTEM_VALUE_SUBGROUP_GT_MASK:
2358       return nir_intrinsic_load_subgroup_gt_mask;
2359    case SYSTEM_VALUE_SUBGROUP_LE_MASK:
2360       return nir_intrinsic_load_subgroup_le_mask;
2361    case SYSTEM_VALUE_SUBGROUP_LT_MASK:
2362       return nir_intrinsic_load_subgroup_lt_mask;
2363    case SYSTEM_VALUE_NUM_SUBGROUPS:
2364       return nir_intrinsic_load_num_subgroups;
2365    case SYSTEM_VALUE_SUBGROUP_ID:
2366       return nir_intrinsic_load_subgroup_id;
2367    case SYSTEM_VALUE_WORKGROUP_SIZE:
2368       return nir_intrinsic_load_workgroup_size;
2369    case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
2370       return nir_intrinsic_load_global_invocation_id;
2371    case SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID:
2372       return nir_intrinsic_load_base_global_invocation_id;
2373    case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX:
2374       return nir_intrinsic_load_global_invocation_index;
2375    case SYSTEM_VALUE_GLOBAL_GROUP_SIZE:
2376       return nir_intrinsic_load_global_size;
2377    case SYSTEM_VALUE_WORK_DIM:
2378       return nir_intrinsic_load_work_dim;
2379    case SYSTEM_VALUE_USER_DATA_AMD:
2380       return nir_intrinsic_load_user_data_amd;
2381    case SYSTEM_VALUE_RAY_LAUNCH_ID:
2382       return nir_intrinsic_load_ray_launch_id;
2383    case SYSTEM_VALUE_RAY_LAUNCH_SIZE:
2384       return nir_intrinsic_load_ray_launch_size;
2385    case SYSTEM_VALUE_RAY_WORLD_ORIGIN:
2386       return nir_intrinsic_load_ray_world_origin;
2387    case SYSTEM_VALUE_RAY_WORLD_DIRECTION:
2388       return nir_intrinsic_load_ray_world_direction;
2389    case SYSTEM_VALUE_RAY_OBJECT_ORIGIN:
2390       return nir_intrinsic_load_ray_object_origin;
2391    case SYSTEM_VALUE_RAY_OBJECT_DIRECTION:
2392       return nir_intrinsic_load_ray_object_direction;
2393    case SYSTEM_VALUE_RAY_T_MIN:
2394       return nir_intrinsic_load_ray_t_min;
2395    case SYSTEM_VALUE_RAY_T_MAX:
2396       return nir_intrinsic_load_ray_t_max;
2397    case SYSTEM_VALUE_RAY_OBJECT_TO_WORLD:
2398       return nir_intrinsic_load_ray_object_to_world;
2399    case SYSTEM_VALUE_RAY_WORLD_TO_OBJECT:
2400       return nir_intrinsic_load_ray_world_to_object;
2401    case SYSTEM_VALUE_RAY_HIT_KIND:
2402       return nir_intrinsic_load_ray_hit_kind;
2403    case SYSTEM_VALUE_RAY_FLAGS:
2404       return nir_intrinsic_load_ray_flags;
2405    case SYSTEM_VALUE_RAY_GEOMETRY_INDEX:
2406       return nir_intrinsic_load_ray_geometry_index;
2407    case SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX:
2408       return nir_intrinsic_load_ray_instance_custom_index;
2409    case SYSTEM_VALUE_CULL_MASK:
2410       return nir_intrinsic_load_cull_mask;
2411    case SYSTEM_VALUE_RAY_TRIANGLE_VERTEX_POSITIONS:
2412       return nir_intrinsic_load_ray_triangle_vertex_positions;
2413    case SYSTEM_VALUE_MESH_VIEW_COUNT:
2414       return nir_intrinsic_load_mesh_view_count;
2415    case SYSTEM_VALUE_FRAG_SHADING_RATE:
2416       return nir_intrinsic_load_frag_shading_rate;
2417    case SYSTEM_VALUE_FULLY_COVERED:
2418       return nir_intrinsic_load_fully_covered;
2419    case SYSTEM_VALUE_FRAG_SIZE:
2420       return nir_intrinsic_load_frag_size;
2421    case SYSTEM_VALUE_FRAG_INVOCATION_COUNT:
2422       return nir_intrinsic_load_frag_invocation_count;
2423    case SYSTEM_VALUE_SHADER_INDEX:
2424       return nir_intrinsic_load_shader_index;
2425    case SYSTEM_VALUE_COALESCED_INPUT_COUNT:
2426       return nir_intrinsic_load_coalesced_input_count;
2427    case SYSTEM_VALUE_WARPS_PER_SM_NV:
2428       return nir_intrinsic_load_warps_per_sm_nv;
2429    case SYSTEM_VALUE_SM_COUNT_NV:
2430       return nir_intrinsic_load_sm_count_nv;
2431    case SYSTEM_VALUE_WARP_ID_NV:
2432       return nir_intrinsic_load_warp_id_nv;
2433    case SYSTEM_VALUE_SM_ID_NV:
2434       return nir_intrinsic_load_sm_id_nv;
2435    default:
2436       return nir_num_intrinsics;
2437    }
2438 }
2439 
2440 gl_system_value
nir_system_value_from_intrinsic(nir_intrinsic_op intrin)2441 nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
2442 {
2443    switch (intrin) {
2444    case nir_intrinsic_load_vertex_id:
2445       return SYSTEM_VALUE_VERTEX_ID;
2446    case nir_intrinsic_load_instance_id:
2447       return SYSTEM_VALUE_INSTANCE_ID;
2448    case nir_intrinsic_load_draw_id:
2449       return SYSTEM_VALUE_DRAW_ID;
2450    case nir_intrinsic_load_base_instance:
2451       return SYSTEM_VALUE_BASE_INSTANCE;
2452    case nir_intrinsic_load_vertex_id_zero_base:
2453       return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
2454    case nir_intrinsic_load_first_vertex:
2455       return SYSTEM_VALUE_FIRST_VERTEX;
2456    case nir_intrinsic_load_is_indexed_draw:
2457       return SYSTEM_VALUE_IS_INDEXED_DRAW;
2458    case nir_intrinsic_load_base_vertex:
2459       return SYSTEM_VALUE_BASE_VERTEX;
2460    case nir_intrinsic_load_invocation_id:
2461       return SYSTEM_VALUE_INVOCATION_ID;
2462    case nir_intrinsic_load_frag_coord:
2463       return SYSTEM_VALUE_FRAG_COORD;
2464    case nir_intrinsic_load_pixel_coord:
2465       return SYSTEM_VALUE_PIXEL_COORD;
2466    case nir_intrinsic_load_point_coord:
2467       return SYSTEM_VALUE_POINT_COORD;
2468    case nir_intrinsic_load_line_coord:
2469       return SYSTEM_VALUE_LINE_COORD;
2470    case nir_intrinsic_load_front_face:
2471       return SYSTEM_VALUE_FRONT_FACE;
2472    case nir_intrinsic_load_front_face_fsign:
2473       return SYSTEM_VALUE_FRONT_FACE_FSIGN;
2474    case nir_intrinsic_load_sample_id:
2475       return SYSTEM_VALUE_SAMPLE_ID;
2476    case nir_intrinsic_load_sample_pos:
2477       return SYSTEM_VALUE_SAMPLE_POS;
2478    case nir_intrinsic_load_sample_pos_or_center:
2479       return SYSTEM_VALUE_SAMPLE_POS_OR_CENTER;
2480    case nir_intrinsic_load_sample_mask_in:
2481       return SYSTEM_VALUE_SAMPLE_MASK_IN;
2482    case nir_intrinsic_load_layer_id:
2483       return SYSTEM_VALUE_LAYER_ID;
2484    case nir_intrinsic_load_local_invocation_id:
2485       return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
2486    case nir_intrinsic_load_local_invocation_index:
2487       return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
2488    case nir_intrinsic_load_num_workgroups:
2489       return SYSTEM_VALUE_NUM_WORKGROUPS;
2490    case nir_intrinsic_load_workgroup_id:
2491       return SYSTEM_VALUE_WORKGROUP_ID;
2492    case nir_intrinsic_load_base_workgroup_id:
2493       return SYSTEM_VALUE_BASE_WORKGROUP_ID;
2494    case nir_intrinsic_load_workgroup_index:
2495       return SYSTEM_VALUE_WORKGROUP_INDEX;
2496    case nir_intrinsic_load_primitive_id:
2497       return SYSTEM_VALUE_PRIMITIVE_ID;
2498    case nir_intrinsic_load_tess_coord:
2499    case nir_intrinsic_load_tess_coord_xy:
2500       return SYSTEM_VALUE_TESS_COORD;
2501    case nir_intrinsic_load_tess_level_outer:
2502       return SYSTEM_VALUE_TESS_LEVEL_OUTER;
2503    case nir_intrinsic_load_tess_level_inner:
2504       return SYSTEM_VALUE_TESS_LEVEL_INNER;
2505    case nir_intrinsic_load_tess_level_outer_default:
2506       return SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT;
2507    case nir_intrinsic_load_tess_level_inner_default:
2508       return SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT;
2509    case nir_intrinsic_load_patch_vertices_in:
2510       return SYSTEM_VALUE_VERTICES_IN;
2511    case nir_intrinsic_load_helper_invocation:
2512       return SYSTEM_VALUE_HELPER_INVOCATION;
2513    case nir_intrinsic_load_color0:
2514       return SYSTEM_VALUE_COLOR0;
2515    case nir_intrinsic_load_color1:
2516       return SYSTEM_VALUE_COLOR1;
2517    case nir_intrinsic_load_view_index:
2518       return SYSTEM_VALUE_VIEW_INDEX;
2519    case nir_intrinsic_load_subgroup_size:
2520       return SYSTEM_VALUE_SUBGROUP_SIZE;
2521    case nir_intrinsic_load_subgroup_invocation:
2522       return SYSTEM_VALUE_SUBGROUP_INVOCATION;
2523    case nir_intrinsic_load_subgroup_eq_mask:
2524       return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
2525    case nir_intrinsic_load_subgroup_ge_mask:
2526       return SYSTEM_VALUE_SUBGROUP_GE_MASK;
2527    case nir_intrinsic_load_subgroup_gt_mask:
2528       return SYSTEM_VALUE_SUBGROUP_GT_MASK;
2529    case nir_intrinsic_load_subgroup_le_mask:
2530       return SYSTEM_VALUE_SUBGROUP_LE_MASK;
2531    case nir_intrinsic_load_subgroup_lt_mask:
2532       return SYSTEM_VALUE_SUBGROUP_LT_MASK;
2533    case nir_intrinsic_load_num_subgroups:
2534       return SYSTEM_VALUE_NUM_SUBGROUPS;
2535    case nir_intrinsic_load_subgroup_id:
2536       return SYSTEM_VALUE_SUBGROUP_ID;
2537    case nir_intrinsic_load_workgroup_size:
2538       return SYSTEM_VALUE_WORKGROUP_SIZE;
2539    case nir_intrinsic_load_global_invocation_id:
2540       return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
2541    case nir_intrinsic_load_base_global_invocation_id:
2542       return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID;
2543    case nir_intrinsic_load_global_invocation_index:
2544       return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX;
2545    case nir_intrinsic_load_global_size:
2546       return SYSTEM_VALUE_GLOBAL_GROUP_SIZE;
2547    case nir_intrinsic_load_work_dim:
2548       return SYSTEM_VALUE_WORK_DIM;
2549    case nir_intrinsic_load_user_data_amd:
2550       return SYSTEM_VALUE_USER_DATA_AMD;
2551    case nir_intrinsic_load_barycentric_model:
2552       return SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL;
2553    case nir_intrinsic_load_gs_header_ir3:
2554       return SYSTEM_VALUE_GS_HEADER_IR3;
2555    case nir_intrinsic_load_tcs_header_ir3:
2556       return SYSTEM_VALUE_TCS_HEADER_IR3;
2557    case nir_intrinsic_load_ray_launch_id:
2558       return SYSTEM_VALUE_RAY_LAUNCH_ID;
2559    case nir_intrinsic_load_ray_launch_size:
2560       return SYSTEM_VALUE_RAY_LAUNCH_SIZE;
2561    case nir_intrinsic_load_ray_world_origin:
2562       return SYSTEM_VALUE_RAY_WORLD_ORIGIN;
2563    case nir_intrinsic_load_ray_world_direction:
2564       return SYSTEM_VALUE_RAY_WORLD_DIRECTION;
2565    case nir_intrinsic_load_ray_object_origin:
2566       return SYSTEM_VALUE_RAY_OBJECT_ORIGIN;
2567    case nir_intrinsic_load_ray_object_direction:
2568       return SYSTEM_VALUE_RAY_OBJECT_DIRECTION;
2569    case nir_intrinsic_load_ray_t_min:
2570       return SYSTEM_VALUE_RAY_T_MIN;
2571    case nir_intrinsic_load_ray_t_max:
2572       return SYSTEM_VALUE_RAY_T_MAX;
2573    case nir_intrinsic_load_ray_object_to_world:
2574       return SYSTEM_VALUE_RAY_OBJECT_TO_WORLD;
2575    case nir_intrinsic_load_ray_world_to_object:
2576       return SYSTEM_VALUE_RAY_WORLD_TO_OBJECT;
2577    case nir_intrinsic_load_ray_hit_kind:
2578       return SYSTEM_VALUE_RAY_HIT_KIND;
2579    case nir_intrinsic_load_ray_flags:
2580       return SYSTEM_VALUE_RAY_FLAGS;
2581    case nir_intrinsic_load_ray_geometry_index:
2582       return SYSTEM_VALUE_RAY_GEOMETRY_INDEX;
2583    case nir_intrinsic_load_ray_instance_custom_index:
2584       return SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX;
2585    case nir_intrinsic_load_cull_mask:
2586       return SYSTEM_VALUE_CULL_MASK;
2587    case nir_intrinsic_load_ray_triangle_vertex_positions:
2588       return SYSTEM_VALUE_RAY_TRIANGLE_VERTEX_POSITIONS;
2589    case nir_intrinsic_load_frag_shading_rate:
2590       return SYSTEM_VALUE_FRAG_SHADING_RATE;
2591    case nir_intrinsic_load_mesh_view_count:
2592       return SYSTEM_VALUE_MESH_VIEW_COUNT;
2593    case nir_intrinsic_load_fully_covered:
2594       return SYSTEM_VALUE_FULLY_COVERED;
2595    case nir_intrinsic_load_frag_size:
2596       return SYSTEM_VALUE_FRAG_SIZE;
2597    case nir_intrinsic_load_frag_invocation_count:
2598       return SYSTEM_VALUE_FRAG_INVOCATION_COUNT;
2599    case nir_intrinsic_load_shader_index:
2600       return SYSTEM_VALUE_SHADER_INDEX;
2601    case nir_intrinsic_load_coalesced_input_count:
2602       return SYSTEM_VALUE_COALESCED_INPUT_COUNT;
2603    case nir_intrinsic_load_warps_per_sm_nv:
2604       return SYSTEM_VALUE_WARPS_PER_SM_NV;
2605    case nir_intrinsic_load_sm_count_nv:
2606       return SYSTEM_VALUE_SM_COUNT_NV;
2607    case nir_intrinsic_load_warp_id_nv:
2608       return SYSTEM_VALUE_WARP_ID_NV;
2609    case nir_intrinsic_load_sm_id_nv:
2610       return SYSTEM_VALUE_SM_ID_NV;
2611    default:
2612       unreachable("intrinsic doesn't produce a system value");
2613    }
2614 }
2615 
2616 void
nir_rewrite_image_intrinsic(nir_intrinsic_instr * intrin,nir_def * src,bool bindless)2617 nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_def *src,
2618                             bool bindless)
2619 {
2620    enum gl_access_qualifier access = nir_intrinsic_access(intrin);
2621 
2622    /* Image intrinsics only have one of these */
2623    assert(!nir_intrinsic_has_src_type(intrin) ||
2624           !nir_intrinsic_has_dest_type(intrin));
2625 
2626    nir_alu_type data_type = nir_type_invalid;
2627    if (nir_intrinsic_has_src_type(intrin))
2628       data_type = nir_intrinsic_src_type(intrin);
2629    if (nir_intrinsic_has_dest_type(intrin))
2630       data_type = nir_intrinsic_dest_type(intrin);
2631 
2632    nir_atomic_op atomic_op = 0;
2633    if (nir_intrinsic_has_atomic_op(intrin))
2634       atomic_op = nir_intrinsic_atomic_op(intrin);
2635 
2636    switch (intrin->intrinsic) {
2637 #define CASE(op)                                                       \
2638    case nir_intrinsic_image_deref_##op:                                \
2639       intrin->intrinsic = bindless ? nir_intrinsic_bindless_image_##op \
2640                                    : nir_intrinsic_image_##op;         \
2641       break;
2642       CASE(load)
2643       CASE(sparse_load)
2644       CASE(store)
2645       CASE(atomic)
2646       CASE(atomic_swap)
2647       CASE(size)
2648       CASE(samples)
2649       CASE(load_raw_intel)
2650       CASE(store_raw_intel)
2651       CASE(fragment_mask_load_amd)
2652       CASE(store_block_agx)
2653 #undef CASE
2654    default:
2655       unreachable("Unhanded image intrinsic");
2656    }
2657 
2658    nir_variable *var = nir_intrinsic_get_var(intrin, 0);
2659 
2660    /* Only update the format if the intrinsic doesn't have one set */
2661    if (nir_intrinsic_format(intrin) == PIPE_FORMAT_NONE)
2662       nir_intrinsic_set_format(intrin, var->data.image.format);
2663 
2664    nir_intrinsic_set_access(intrin, access | var->data.access);
2665    if (nir_intrinsic_has_src_type(intrin))
2666       nir_intrinsic_set_src_type(intrin, data_type);
2667    if (nir_intrinsic_has_dest_type(intrin))
2668       nir_intrinsic_set_dest_type(intrin, data_type);
2669 
2670    if (nir_intrinsic_has_atomic_op(intrin))
2671       nir_intrinsic_set_atomic_op(intrin, atomic_op);
2672 
2673    nir_src_rewrite(&intrin->src[0], src);
2674 }
2675 
2676 unsigned
nir_image_intrinsic_coord_components(const nir_intrinsic_instr * instr)2677 nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr)
2678 {
2679    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
2680    int coords = glsl_get_sampler_dim_coordinate_components(dim);
2681    if (dim == GLSL_SAMPLER_DIM_CUBE)
2682       return coords;
2683    else
2684       return coords + nir_intrinsic_image_array(instr);
2685 }
2686 
2687 bool
nir_intrinsic_can_reorder(nir_intrinsic_instr * instr)2688 nir_intrinsic_can_reorder(nir_intrinsic_instr *instr)
2689 {
2690    if (nir_intrinsic_has_access(instr)) {
2691       enum gl_access_qualifier access = nir_intrinsic_access(instr);
2692       if (access & ACCESS_VOLATILE)
2693          return false;
2694       if (access & ACCESS_CAN_REORDER)
2695          return true;
2696    }
2697 
2698    const nir_intrinsic_info *info;
2699    if (instr->intrinsic == nir_intrinsic_load_deref) {
2700       nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
2701       if (nir_deref_mode_is_in_set(deref, nir_var_system_value)) {
2702          nir_variable *var = nir_deref_instr_get_variable(deref);
2703          if (!var)
2704             return false;
2705 
2706          nir_intrinsic_op sysval_op =
2707             nir_intrinsic_from_system_value((gl_system_value)var->data.location);
2708          if (sysval_op == nir_num_intrinsics)
2709             return true;
2710 
2711          info = &nir_intrinsic_infos[sysval_op];
2712       } else {
2713          return nir_deref_mode_is_in_set(deref, nir_var_read_only_modes);
2714       }
2715    } else {
2716       info = &nir_intrinsic_infos[instr->intrinsic];
2717    }
2718 
2719    return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
2720           (info->flags & NIR_INTRINSIC_CAN_REORDER);
2721 }
2722 
2723 nir_src *
nir_get_shader_call_payload_src(nir_intrinsic_instr * call)2724 nir_get_shader_call_payload_src(nir_intrinsic_instr *call)
2725 {
2726    switch (call->intrinsic) {
2727    case nir_intrinsic_trace_ray:
2728    case nir_intrinsic_rt_trace_ray:
2729       return &call->src[10];
2730    case nir_intrinsic_execute_callable:
2731    case nir_intrinsic_rt_execute_callable:
2732       return &call->src[1];
2733    default:
2734       unreachable("Not a call intrinsic");
2735       return NULL;
2736    }
2737 }
2738 
2739 nir_binding
nir_chase_binding(nir_src rsrc)2740 nir_chase_binding(nir_src rsrc)
2741 {
2742    nir_binding res = { 0 };
2743    if (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2744       const struct glsl_type *type = glsl_without_array(nir_src_as_deref(rsrc)->type);
2745       bool is_image = glsl_type_is_image(type) || glsl_type_is_sampler(type);
2746       while (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2747          nir_deref_instr *deref = nir_src_as_deref(rsrc);
2748 
2749          if (deref->deref_type == nir_deref_type_var) {
2750             res.success = true;
2751             res.var = deref->var;
2752             res.desc_set = deref->var->data.descriptor_set;
2753             res.binding = deref->var->data.binding;
2754             return res;
2755          } else if (deref->deref_type == nir_deref_type_array && is_image) {
2756             if (res.num_indices == ARRAY_SIZE(res.indices))
2757                return (nir_binding){ 0 };
2758             res.indices[res.num_indices++] = deref->arr.index;
2759          }
2760 
2761          rsrc = deref->parent;
2762       }
2763    }
2764 
2765    /* Skip copies and trimming. Trimming can appear as nir_op_mov instructions
2766     * when removing the offset from addresses. We also consider
2767     * nir_op_is_vec_or_mov() instructions to skip trimming of
2768     * vec2_index_32bit_offset addresses after lowering ALU to scalar.
2769     */
2770    unsigned num_components = nir_src_num_components(rsrc);
2771    while (true) {
2772       nir_alu_instr *alu = nir_src_as_alu_instr(rsrc);
2773       nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2774       if (alu && alu->op == nir_op_mov) {
2775          for (unsigned i = 0; i < num_components; i++) {
2776             if (alu->src[0].swizzle[i] != i)
2777                return (nir_binding){ 0 };
2778          }
2779          rsrc = alu->src[0].src;
2780       } else if (alu && nir_op_is_vec(alu->op)) {
2781          for (unsigned i = 0; i < num_components; i++) {
2782             if (alu->src[i].swizzle[0] != i || alu->src[i].src.ssa != alu->src[0].src.ssa)
2783                return (nir_binding){ 0 };
2784          }
2785          rsrc = alu->src[0].src;
2786       } else if (intrin && intrin->intrinsic == nir_intrinsic_read_first_invocation) {
2787          /* The caller might want to be aware if only the first invocation of
2788           * the indices are used.
2789           */
2790          res.read_first_invocation = true;
2791          rsrc = intrin->src[0];
2792       } else {
2793          break;
2794       }
2795    }
2796 
2797    if (nir_src_is_const(rsrc)) {
2798       /* GL binding model after deref lowering */
2799       res.success = true;
2800       /* Can't use just nir_src_as_uint. Vulkan resource index produces a
2801        * vec2. Some drivers lower it to vec1 (to handle get_ssbo_size for
2802        * example) but others just keep it around as a vec2 (v3dv).
2803        */
2804       res.binding = nir_src_comp_as_uint(rsrc, 0);
2805       return res;
2806    }
2807 
2808    /* otherwise, must be Vulkan binding model after deref lowering or GL bindless */
2809 
2810    nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2811    if (!intrin)
2812       return (nir_binding){ 0 };
2813 
2814    /* Intel resource, similar to load_vulkan_descriptor after it has been
2815     * lowered.
2816     */
2817    if (intrin->intrinsic == nir_intrinsic_resource_intel) {
2818       res.success = true;
2819       res.desc_set = nir_intrinsic_desc_set(intrin);
2820       res.binding = nir_intrinsic_binding(intrin);
2821       /* nir_intrinsic_resource_intel has 3 sources, but src[2] is included in
2822        * src[1], it is kept around for other purposes.
2823        */
2824       res.num_indices = 2;
2825       res.indices[0] = intrin->src[0];
2826       res.indices[1] = intrin->src[1];
2827       return res;
2828    }
2829 
2830    /* skip load_vulkan_descriptor */
2831    if (intrin->intrinsic == nir_intrinsic_load_vulkan_descriptor) {
2832       intrin = nir_src_as_intrinsic(intrin->src[0]);
2833       if (!intrin)
2834          return (nir_binding){ 0 };
2835    }
2836 
2837    if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
2838       return (nir_binding){ 0 };
2839 
2840    assert(res.num_indices == 0);
2841    res.success = true;
2842    res.desc_set = nir_intrinsic_desc_set(intrin);
2843    res.binding = nir_intrinsic_binding(intrin);
2844    res.num_indices = 1;
2845    res.indices[0] = intrin->src[0];
2846    return res;
2847 }
2848 
2849 nir_variable *
nir_get_binding_variable(nir_shader * shader,nir_binding binding)2850 nir_get_binding_variable(nir_shader *shader, nir_binding binding)
2851 {
2852    nir_variable *binding_var = NULL;
2853    unsigned count = 0;
2854 
2855    if (!binding.success)
2856       return NULL;
2857 
2858    if (binding.var)
2859       return binding.var;
2860 
2861    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ubo | nir_var_mem_ssbo) {
2862       if (var->data.descriptor_set == binding.desc_set && var->data.binding == binding.binding) {
2863          binding_var = var;
2864          count++;
2865       }
2866    }
2867 
2868    /* Be conservative if another variable is using the same binding/desc_set
2869     * because the access mask might be different and we can't get it reliably.
2870     */
2871    if (count > 1)
2872       return NULL;
2873 
2874    return binding_var;
2875 }
2876 
2877 nir_scalar
nir_scalar_chase_movs(nir_scalar s)2878 nir_scalar_chase_movs(nir_scalar s)
2879 {
2880    while (nir_scalar_is_alu(s)) {
2881       nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
2882       if (alu->op == nir_op_mov) {
2883          s.def = alu->src[0].src.ssa;
2884          s.comp = alu->src[0].swizzle[s.comp];
2885       } else if (nir_op_is_vec(alu->op)) {
2886          s.def = alu->src[s.comp].src.ssa;
2887          s.comp = alu->src[s.comp].swizzle[0];
2888       } else {
2889          break;
2890       }
2891    }
2892 
2893    return s;
2894 }
2895 
2896 nir_alu_type
nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)2897 nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)
2898 {
2899    switch (base_type) {
2900    /* clang-format off */
2901    case GLSL_TYPE_BOOL:    return nir_type_bool1;
2902    case GLSL_TYPE_UINT:    return nir_type_uint32;
2903    case GLSL_TYPE_INT:     return nir_type_int32;
2904    case GLSL_TYPE_UINT16:  return nir_type_uint16;
2905    case GLSL_TYPE_INT16:   return nir_type_int16;
2906    case GLSL_TYPE_UINT8:   return nir_type_uint8;
2907    case GLSL_TYPE_INT8:    return nir_type_int8;
2908    case GLSL_TYPE_UINT64:  return nir_type_uint64;
2909    case GLSL_TYPE_INT64:   return nir_type_int64;
2910    case GLSL_TYPE_FLOAT:   return nir_type_float32;
2911    case GLSL_TYPE_FLOAT16: return nir_type_float16;
2912    case GLSL_TYPE_DOUBLE:  return nir_type_float64;
2913       /* clang-format on */
2914 
2915    case GLSL_TYPE_COOPERATIVE_MATRIX:
2916    case GLSL_TYPE_SAMPLER:
2917    case GLSL_TYPE_TEXTURE:
2918    case GLSL_TYPE_IMAGE:
2919    case GLSL_TYPE_ATOMIC_UINT:
2920    case GLSL_TYPE_STRUCT:
2921    case GLSL_TYPE_INTERFACE:
2922    case GLSL_TYPE_ARRAY:
2923    case GLSL_TYPE_VOID:
2924    case GLSL_TYPE_SUBROUTINE:
2925    case GLSL_TYPE_ERROR:
2926       return nir_type_invalid;
2927    }
2928 
2929    unreachable("unknown type");
2930 }
2931 
2932 enum glsl_base_type
nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)2933 nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)
2934 {
2935    /* clang-format off */
2936    switch (base_type) {
2937    case nir_type_bool1:    return GLSL_TYPE_BOOL;
2938    case nir_type_uint32:   return GLSL_TYPE_UINT;
2939    case nir_type_int32:    return GLSL_TYPE_INT;
2940    case nir_type_uint16:   return GLSL_TYPE_UINT16;
2941    case nir_type_int16:    return GLSL_TYPE_INT16;
2942    case nir_type_uint8:    return GLSL_TYPE_UINT8;
2943    case nir_type_int8:     return GLSL_TYPE_INT8;
2944    case nir_type_uint64:   return GLSL_TYPE_UINT64;
2945    case nir_type_int64:    return GLSL_TYPE_INT64;
2946    case nir_type_float32:  return GLSL_TYPE_FLOAT;
2947    case nir_type_float16:  return GLSL_TYPE_FLOAT16;
2948    case nir_type_float64:  return GLSL_TYPE_DOUBLE;
2949    default: unreachable("Not a sized nir_alu_type");
2950    }
2951    /* clang-format on */
2952 }
2953 
2954 nir_op
nir_op_vec(unsigned num_components)2955 nir_op_vec(unsigned num_components)
2956 {
2957    /* clang-format off */
2958    switch (num_components) {
2959    case  1: return nir_op_mov;
2960    case  2: return nir_op_vec2;
2961    case  3: return nir_op_vec3;
2962    case  4: return nir_op_vec4;
2963    case  5: return nir_op_vec5;
2964    case  8: return nir_op_vec8;
2965    case 16: return nir_op_vec16;
2966    default: unreachable("bad component count");
2967    }
2968    /* clang-format on */
2969 }
2970 
2971 bool
nir_op_is_vec(nir_op op)2972 nir_op_is_vec(nir_op op)
2973 {
2974    switch (op) {
2975    case nir_op_vec2:
2976    case nir_op_vec3:
2977    case nir_op_vec4:
2978    case nir_op_vec5:
2979    case nir_op_vec8:
2980    case nir_op_vec16:
2981       return true;
2982    default:
2983       return false;
2984    }
2985 }
2986 
2987 nir_component_mask_t
nir_alu_instr_src_read_mask(const nir_alu_instr * instr,unsigned src)2988 nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src)
2989 {
2990    nir_component_mask_t read_mask = 0;
2991    for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
2992       if (!nir_alu_instr_channel_used(instr, src, c))
2993          continue;
2994 
2995       read_mask |= (1 << instr->src[src].swizzle[c]);
2996    }
2997    return read_mask;
2998 }
2999 
3000 unsigned
nir_ssa_alu_instr_src_components(const nir_alu_instr * instr,unsigned src)3001 nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
3002 {
3003    if (nir_op_infos[instr->op].input_sizes[src] > 0)
3004       return nir_op_infos[instr->op].input_sizes[src];
3005 
3006    return instr->def.num_components;
3007 }
3008 
3009 #define CASE_ALL_SIZES(op) \
3010    case op:                \
3011    case op##8:             \
3012    case op##16:            \
3013    case op##32:
3014 
3015 bool
nir_alu_instr_is_comparison(const nir_alu_instr * instr)3016 nir_alu_instr_is_comparison(const nir_alu_instr *instr)
3017 {
3018    switch (instr->op) {
3019       CASE_ALL_SIZES(nir_op_flt)
3020       CASE_ALL_SIZES(nir_op_fge)
3021       CASE_ALL_SIZES(nir_op_fltu)
3022       CASE_ALL_SIZES(nir_op_fgeu)
3023       CASE_ALL_SIZES(nir_op_feq)
3024       CASE_ALL_SIZES(nir_op_fneu)
3025       CASE_ALL_SIZES(nir_op_fequ)
3026       CASE_ALL_SIZES(nir_op_fneo)
3027       CASE_ALL_SIZES(nir_op_funord)
3028       CASE_ALL_SIZES(nir_op_ford)
3029       CASE_ALL_SIZES(nir_op_ilt)
3030       CASE_ALL_SIZES(nir_op_ult)
3031       CASE_ALL_SIZES(nir_op_ige)
3032       CASE_ALL_SIZES(nir_op_uge)
3033       CASE_ALL_SIZES(nir_op_ieq)
3034       CASE_ALL_SIZES(nir_op_ine)
3035       CASE_ALL_SIZES(nir_op_bitz)
3036       CASE_ALL_SIZES(nir_op_bitnz)
3037    case nir_op_inot:
3038       return true;
3039    default:
3040       return false;
3041    }
3042 }
3043 
3044 #undef CASE_ALL_SIZES
3045 
3046 unsigned
nir_intrinsic_src_components(const nir_intrinsic_instr * intr,unsigned srcn)3047 nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn)
3048 {
3049    const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
3050    assert(srcn < info->num_srcs);
3051    if (info->src_components[srcn] > 0)
3052       return info->src_components[srcn];
3053    else if (info->src_components[srcn] == 0)
3054       return intr->num_components;
3055    else
3056       return nir_src_num_components(intr->src[srcn]);
3057 }
3058 
3059 unsigned
nir_intrinsic_dest_components(nir_intrinsic_instr * intr)3060 nir_intrinsic_dest_components(nir_intrinsic_instr *intr)
3061 {
3062    const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
3063    if (!info->has_dest)
3064       return 0;
3065    else if (info->dest_components)
3066       return info->dest_components;
3067    else
3068       return intr->num_components;
3069 }
3070 
3071 nir_alu_type
nir_intrinsic_instr_src_type(const nir_intrinsic_instr * intrin,unsigned src)3072 nir_intrinsic_instr_src_type(const nir_intrinsic_instr *intrin, unsigned src)
3073 {
3074    /* We could go nuts here, but we'll just handle a few simple
3075     * cases and let everything else be untyped.
3076     */
3077    switch (intrin->intrinsic) {
3078    case nir_intrinsic_store_deref: {
3079       nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
3080       if (src == 1)
3081          return nir_get_nir_type_for_glsl_type(deref->type);
3082       break;
3083    }
3084 
3085    case nir_intrinsic_store_output:
3086       if (src == 0)
3087          return nir_intrinsic_src_type(intrin);
3088       break;
3089 
3090    default:
3091       break;
3092    }
3093 
3094    /* For the most part, we leave other intrinsics alone.  Most
3095     * of them don't matter in OpenGL ES 2.0 drivers anyway.
3096     * However, we should at least check if this is some sort of
3097     * IO intrinsic and flag it's offset and index sources.
3098     */
3099    {
3100       int offset_src_idx = nir_get_io_offset_src_number(intrin);
3101       if (src == offset_src_idx) {
3102          const nir_src *offset_src = offset_src_idx >= 0 ? &intrin->src[offset_src_idx] : NULL;
3103          if (offset_src)
3104             return nir_type_int;
3105       }
3106    }
3107 
3108    return nir_type_invalid;
3109 }
3110 
3111 nir_alu_type
nir_intrinsic_instr_dest_type(const nir_intrinsic_instr * intrin)3112 nir_intrinsic_instr_dest_type(const nir_intrinsic_instr *intrin)
3113 {
3114    /* We could go nuts here, but we'll just handle a few simple
3115     * cases and let everything else be untyped.
3116     */
3117    switch (intrin->intrinsic) {
3118    case nir_intrinsic_load_deref: {
3119       nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
3120       return nir_get_nir_type_for_glsl_type(deref->type);
3121    }
3122 
3123    case nir_intrinsic_load_input:
3124    case nir_intrinsic_load_per_primitive_input:
3125    case nir_intrinsic_load_uniform:
3126       return nir_intrinsic_dest_type(intrin);
3127 
3128    default:
3129       break;
3130    }
3131 
3132    return nir_type_invalid;
3133 }
3134 
3135 /**
3136  * Helper to copy const_index[] from src to dst, without assuming they
3137  * match in order.
3138  */
3139 void
nir_intrinsic_copy_const_indices(nir_intrinsic_instr * dst,nir_intrinsic_instr * src)3140 nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src)
3141 {
3142    if (src->intrinsic == dst->intrinsic) {
3143       memcpy(dst->const_index, src->const_index, sizeof(dst->const_index));
3144       return;
3145    }
3146 
3147    const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic];
3148    const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic];
3149 
3150    for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) {
3151       if (src_info->index_map[i] == 0)
3152          continue;
3153 
3154       /* require that dst instruction also uses the same const_index[]: */
3155       assert(dst_info->index_map[i] > 0);
3156 
3157       dst->const_index[dst_info->index_map[i] - 1] =
3158          src->const_index[src_info->index_map[i] - 1];
3159    }
3160 }
3161 
3162 bool
nir_tex_instr_need_sampler(const nir_tex_instr * instr)3163 nir_tex_instr_need_sampler(const nir_tex_instr *instr)
3164 {
3165    switch (instr->op) {
3166    case nir_texop_txf:
3167    case nir_texop_txf_ms:
3168    case nir_texop_txs:
3169    case nir_texop_query_levels:
3170    case nir_texop_texture_samples:
3171    case nir_texop_samples_identical:
3172    case nir_texop_descriptor_amd:
3173       return false;
3174    default:
3175       return true;
3176    }
3177 }
3178 
3179 unsigned
nir_tex_instr_result_size(const nir_tex_instr * instr)3180 nir_tex_instr_result_size(const nir_tex_instr *instr)
3181 {
3182    switch (instr->op) {
3183    case nir_texop_txs: {
3184       unsigned ret;
3185       switch (instr->sampler_dim) {
3186       case GLSL_SAMPLER_DIM_1D:
3187       case GLSL_SAMPLER_DIM_BUF:
3188          ret = 1;
3189          break;
3190       case GLSL_SAMPLER_DIM_2D:
3191       case GLSL_SAMPLER_DIM_CUBE:
3192       case GLSL_SAMPLER_DIM_MS:
3193       case GLSL_SAMPLER_DIM_RECT:
3194       case GLSL_SAMPLER_DIM_EXTERNAL:
3195       case GLSL_SAMPLER_DIM_SUBPASS:
3196       case GLSL_SAMPLER_DIM_SUBPASS_MS:
3197          ret = 2;
3198          break;
3199       case GLSL_SAMPLER_DIM_3D:
3200          ret = 3;
3201          break;
3202       default:
3203          unreachable("not reached");
3204       }
3205       if (instr->is_array)
3206          ret++;
3207       return ret;
3208    }
3209 
3210    case nir_texop_lod:
3211       return 2;
3212 
3213    case nir_texop_texture_samples:
3214    case nir_texop_query_levels:
3215    case nir_texop_samples_identical:
3216    case nir_texop_fragment_mask_fetch_amd:
3217    case nir_texop_lod_bias_agx:
3218    case nir_texop_has_custom_border_color_agx:
3219       return 1;
3220 
3221    case nir_texop_descriptor_amd:
3222       return instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8;
3223 
3224    case nir_texop_sampler_descriptor_amd:
3225       return 4;
3226 
3227    case nir_texop_hdr_dim_nv:
3228    case nir_texop_tex_type_nv:
3229       return 4;
3230 
3231    case nir_texop_custom_border_color_agx:
3232       return 4;
3233 
3234    default:
3235       if (instr->is_shadow && instr->is_new_style_shadow)
3236          return 1;
3237 
3238       return 4;
3239    }
3240 }
3241 
3242 bool
nir_tex_instr_is_query(const nir_tex_instr * instr)3243 nir_tex_instr_is_query(const nir_tex_instr *instr)
3244 {
3245    switch (instr->op) {
3246    case nir_texop_txs:
3247    case nir_texop_lod:
3248    case nir_texop_texture_samples:
3249    case nir_texop_query_levels:
3250    case nir_texop_descriptor_amd:
3251    case nir_texop_sampler_descriptor_amd:
3252    case nir_texop_lod_bias_agx:
3253    case nir_texop_custom_border_color_agx:
3254    case nir_texop_has_custom_border_color_agx:
3255    case nir_texop_hdr_dim_nv:
3256    case nir_texop_tex_type_nv:
3257       return true;
3258    case nir_texop_tex:
3259    case nir_texop_txb:
3260    case nir_texop_txl:
3261    case nir_texop_txd:
3262    case nir_texop_txf:
3263    case nir_texop_txf_ms:
3264    case nir_texop_txf_ms_fb:
3265    case nir_texop_txf_ms_mcs_intel:
3266    case nir_texop_tg4:
3267    case nir_texop_samples_identical:
3268    case nir_texop_fragment_mask_fetch_amd:
3269    case nir_texop_fragment_fetch_amd:
3270       return false;
3271    default:
3272       unreachable("Invalid texture opcode");
3273    }
3274 }
3275 
3276 bool
nir_tex_instr_has_implicit_derivative(const nir_tex_instr * instr)3277 nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr)
3278 {
3279    switch (instr->op) {
3280    case nir_texop_tex:
3281    case nir_texop_txb:
3282    case nir_texop_lod:
3283       return true;
3284    case nir_texop_tg4:
3285       return instr->is_gather_implicit_lod;
3286    default:
3287       return false;
3288    }
3289 }
3290 
3291 nir_alu_type
nir_tex_instr_src_type(const nir_tex_instr * instr,unsigned src)3292 nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
3293 {
3294    switch (instr->src[src].src_type) {
3295    case nir_tex_src_coord:
3296       switch (instr->op) {
3297       case nir_texop_txf:
3298       case nir_texop_txf_ms:
3299       case nir_texop_txf_ms_fb:
3300       case nir_texop_txf_ms_mcs_intel:
3301       case nir_texop_samples_identical:
3302       case nir_texop_fragment_fetch_amd:
3303       case nir_texop_fragment_mask_fetch_amd:
3304          return nir_type_int;
3305 
3306       default:
3307          return nir_type_float;
3308       }
3309 
3310    case nir_tex_src_lod:
3311       switch (instr->op) {
3312       case nir_texop_txs:
3313       case nir_texop_txf:
3314       case nir_texop_txf_ms:
3315       case nir_texop_fragment_fetch_amd:
3316       case nir_texop_fragment_mask_fetch_amd:
3317          return nir_type_int;
3318 
3319       default:
3320          return nir_type_float;
3321       }
3322 
3323    case nir_tex_src_projector:
3324    case nir_tex_src_comparator:
3325    case nir_tex_src_bias:
3326    case nir_tex_src_min_lod:
3327    case nir_tex_src_ddx:
3328    case nir_tex_src_ddy:
3329    case nir_tex_src_backend1:
3330    case nir_tex_src_backend2:
3331       return nir_type_float;
3332 
3333    case nir_tex_src_offset:
3334    case nir_tex_src_ms_index:
3335    case nir_tex_src_plane:
3336       return nir_type_int;
3337 
3338    case nir_tex_src_sampler_deref_intrinsic:
3339    case nir_tex_src_texture_deref_intrinsic:
3340    case nir_tex_src_ms_mcs_intel:
3341    case nir_tex_src_texture_deref:
3342    case nir_tex_src_sampler_deref:
3343    case nir_tex_src_texture_offset:
3344    case nir_tex_src_sampler_offset:
3345    case nir_tex_src_texture_handle:
3346    case nir_tex_src_sampler_handle:
3347       return nir_type_uint;
3348 
3349    case nir_num_tex_src_types:
3350       unreachable("nir_num_tex_src_types is not a valid source type");
3351    }
3352 
3353    unreachable("Invalid texture source type");
3354 }
3355 
3356 unsigned
nir_tex_instr_src_size(const nir_tex_instr * instr,unsigned src)3357 nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src)
3358 {
3359    if (instr->src[src].src_type == nir_tex_src_coord)
3360       return instr->coord_components;
3361 
3362    /* The MCS value is expected to be a vec4 returned by a txf_ms_mcs_intel */
3363    if (instr->src[src].src_type == nir_tex_src_ms_mcs_intel)
3364       return 4;
3365 
3366    if (instr->src[src].src_type == nir_tex_src_ddx ||
3367        instr->src[src].src_type == nir_tex_src_ddy) {
3368 
3369       if (instr->is_array && !instr->array_is_lowered_cube)
3370          return instr->coord_components - 1;
3371       else
3372          return instr->coord_components;
3373    }
3374 
3375    if (instr->src[src].src_type == nir_tex_src_offset) {
3376       if (instr->is_array)
3377          return instr->coord_components - 1;
3378       else
3379          return instr->coord_components;
3380    }
3381 
3382    if (instr->src[src].src_type == nir_tex_src_backend1 ||
3383        instr->src[src].src_type == nir_tex_src_backend2)
3384       return nir_src_num_components(instr->src[src].src);
3385 
3386    /* For AMD, this can be a vec8/vec4 image/sampler descriptor. */
3387    if (instr->src[src].src_type == nir_tex_src_texture_handle ||
3388        instr->src[src].src_type == nir_tex_src_sampler_handle)
3389       return 0;
3390 
3391    return 1;
3392 }
3393 
3394 /**
3395  * Return which components are written into transform feedback buffers.
3396  * The result is relative to 0, not "component".
3397  */
3398 unsigned
nir_instr_xfb_write_mask(nir_intrinsic_instr * instr)3399 nir_instr_xfb_write_mask(nir_intrinsic_instr *instr)
3400 {
3401    unsigned mask = 0;
3402 
3403    if (nir_intrinsic_has_io_xfb(instr)) {
3404       unsigned wr_mask = nir_intrinsic_write_mask(instr) << nir_intrinsic_component(instr);
3405       assert((wr_mask & ~0xf) == 0); /* only 4 components allowed */
3406 
3407       unsigned iter_mask = wr_mask;
3408       while (iter_mask) {
3409          unsigned i = u_bit_scan(&iter_mask);
3410          nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(instr) : nir_intrinsic_io_xfb2(instr);
3411          if (xfb.out[i % 2].num_components)
3412             mask |= BITFIELD_RANGE(i, xfb.out[i % 2].num_components) & wr_mask;
3413       }
3414    }
3415 
3416    return mask;
3417 }
3418 
3419 /**
3420  * Whether an output slot is consumed by fixed-function logic.
3421  */
3422 bool
nir_slot_is_sysval_output(gl_varying_slot slot,gl_shader_stage next_shader)3423 nir_slot_is_sysval_output(gl_varying_slot slot, gl_shader_stage next_shader)
3424 {
3425    switch (next_shader) {
3426    case MESA_SHADER_FRAGMENT:
3427       return slot == VARYING_SLOT_POS ||
3428              slot == VARYING_SLOT_PSIZ ||
3429              slot == VARYING_SLOT_EDGE ||
3430              slot == VARYING_SLOT_CLIP_VERTEX ||
3431              slot == VARYING_SLOT_CLIP_DIST0 ||
3432              slot == VARYING_SLOT_CLIP_DIST1 ||
3433              slot == VARYING_SLOT_CULL_DIST0 ||
3434              slot == VARYING_SLOT_CULL_DIST1 ||
3435              slot == VARYING_SLOT_LAYER ||
3436              slot == VARYING_SLOT_VIEWPORT ||
3437              slot == VARYING_SLOT_VIEW_INDEX ||
3438              slot == VARYING_SLOT_VIEWPORT_MASK ||
3439              slot == VARYING_SLOT_PRIMITIVE_SHADING_RATE ||
3440              /* NV_mesh_shader_only */
3441              slot == VARYING_SLOT_PRIMITIVE_COUNT ||
3442              slot == VARYING_SLOT_PRIMITIVE_INDICES;
3443 
3444    case MESA_SHADER_TESS_EVAL:
3445       return slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
3446              slot == VARYING_SLOT_TESS_LEVEL_INNER ||
3447              slot == VARYING_SLOT_BOUNDING_BOX0 ||
3448              slot == VARYING_SLOT_BOUNDING_BOX1;
3449 
3450    case MESA_SHADER_MESH:
3451       /* NV_mesh_shader only */
3452       return slot == VARYING_SLOT_TASK_COUNT;
3453 
3454    case MESA_SHADER_NONE:
3455       /* NONE means unknown. Check all possibilities. */
3456       return nir_slot_is_sysval_output(slot, MESA_SHADER_FRAGMENT) ||
3457              nir_slot_is_sysval_output(slot, MESA_SHADER_TESS_EVAL) ||
3458              nir_slot_is_sysval_output(slot, MESA_SHADER_MESH);
3459 
3460    default:
3461       /* No other shaders have preceding shaders with sysval outputs. */
3462       return false;
3463    }
3464 }
3465 
3466 /**
3467  * Whether an input/output slot is consumed by the next shader stage,
3468  * or written by the previous shader stage.
3469  *
3470  * Pass MESA_SHADER_NONE if the next shader is unknown.
3471  */
3472 bool
nir_slot_is_varying(gl_varying_slot slot,gl_shader_stage next_shader)3473 nir_slot_is_varying(gl_varying_slot slot, gl_shader_stage next_shader)
3474 {
3475    bool unknown = next_shader == MESA_SHADER_NONE;
3476    bool exactly_before_fs = next_shader == MESA_SHADER_FRAGMENT || unknown;
3477    bool at_most_before_gs = next_shader <= MESA_SHADER_GEOMETRY || unknown;
3478 
3479    return slot >= VARYING_SLOT_VAR0 ||
3480           (slot == VARYING_SLOT_POS && at_most_before_gs) ||
3481           slot == VARYING_SLOT_COL0 ||
3482           slot == VARYING_SLOT_COL1 ||
3483           slot == VARYING_SLOT_BFC0 ||
3484           slot == VARYING_SLOT_BFC1 ||
3485           slot == VARYING_SLOT_FOGC ||
3486           (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) ||
3487           slot == VARYING_SLOT_PNTC ||
3488           (slot == VARYING_SLOT_CLIP_VERTEX && at_most_before_gs) ||
3489           slot == VARYING_SLOT_CLIP_DIST0 ||
3490           slot == VARYING_SLOT_CLIP_DIST1 ||
3491           slot == VARYING_SLOT_CULL_DIST0 ||
3492           slot == VARYING_SLOT_CULL_DIST1 ||
3493           slot == VARYING_SLOT_PRIMITIVE_ID ||
3494           slot == VARYING_SLOT_LAYER ||
3495           slot == VARYING_SLOT_VIEWPORT ||
3496           slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
3497           slot == VARYING_SLOT_TESS_LEVEL_INNER ||
3498           (slot == VARYING_SLOT_VIEW_INDEX && exactly_before_fs);
3499 }
3500 
3501 bool
nir_slot_is_sysval_output_and_varying(gl_varying_slot slot,gl_shader_stage next_shader)3502 nir_slot_is_sysval_output_and_varying(gl_varying_slot slot,
3503                                       gl_shader_stage next_shader)
3504 {
3505    return nir_slot_is_sysval_output(slot, next_shader) &&
3506           nir_slot_is_varying(slot, next_shader);
3507 }
3508 
3509 /**
3510  * This marks the output store instruction as not feeding the next shader
3511  * stage. If the instruction has no other use, it's removed.
3512  */
3513 bool
nir_remove_varying(nir_intrinsic_instr * intr,gl_shader_stage next_shader)3514 nir_remove_varying(nir_intrinsic_instr *intr, gl_shader_stage next_shader)
3515 {
3516    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3517 
3518    if ((!sem.no_sysval_output &&
3519         nir_slot_is_sysval_output(sem.location, next_shader)) ||
3520        nir_instr_xfb_write_mask(intr)) {
3521       /* Demote the store instruction. */
3522       sem.no_varying = true;
3523       nir_intrinsic_set_io_semantics(intr, sem);
3524       return false;
3525    } else {
3526       nir_instr_remove(&intr->instr);
3527       return true;
3528    }
3529 }
3530 
3531 /**
3532  * This marks the output store instruction as not feeding fixed-function
3533  * logic. If the instruction has no other use, it's removed.
3534  */
3535 bool
nir_remove_sysval_output(nir_intrinsic_instr * intr,gl_shader_stage next_shader)3536 nir_remove_sysval_output(nir_intrinsic_instr *intr, gl_shader_stage next_shader)
3537 {
3538    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3539 
3540    if ((!sem.no_varying && nir_slot_is_varying(sem.location, next_shader)) ||
3541        nir_instr_xfb_write_mask(intr)) {
3542       /* Demote the store instruction. */
3543       sem.no_sysval_output = true;
3544       nir_intrinsic_set_io_semantics(intr, sem);
3545       return false;
3546    } else {
3547       nir_instr_remove(&intr->instr);
3548       return true;
3549    }
3550 }
3551 
3552 void
nir_remove_non_entrypoints(nir_shader * nir)3553 nir_remove_non_entrypoints(nir_shader *nir)
3554 {
3555    nir_foreach_function_safe(func, nir) {
3556       if (!func->is_entrypoint)
3557          exec_node_remove(&func->node);
3558    }
3559    assert(exec_list_length(&nir->functions) == 1);
3560 }
3561 
3562 void
nir_remove_non_exported(nir_shader * nir)3563 nir_remove_non_exported(nir_shader *nir)
3564 {
3565    nir_foreach_function_safe(func, nir) {
3566       if (!func->is_exported)
3567          exec_node_remove(&func->node);
3568    }
3569 }
3570 
3571 /*
3572  * After precompiling entrypoints from a kernel library, we want to garbage
3573  * collect the NIR entrypoints but leave the exported library functions. This
3574  * helper does that.
3575  */
3576 void
nir_remove_entrypoints(nir_shader * nir)3577 nir_remove_entrypoints(nir_shader *nir)
3578 {
3579    nir_foreach_entrypoint_safe(func, nir) {
3580       exec_node_remove(&func->node);
3581    }
3582 }
3583 
3584 unsigned
nir_static_workgroup_size(const nir_shader * s)3585 nir_static_workgroup_size(const nir_shader *s)
3586 {
3587    return s->info.workgroup_size[0] * s->info.workgroup_size[1] *
3588           s->info.workgroup_size[2];
3589 }
3590 
3591 bool
nir_block_contains_work(nir_block * block)3592 nir_block_contains_work(nir_block *block)
3593 {
3594    if (!nir_cf_node_is_last(&block->cf_node))
3595       return true;
3596 
3597    nir_foreach_instr(instr, block) {
3598       if (instr->type == nir_instr_type_phi)
3599          continue;
3600       if (instr->type != nir_instr_type_alu ||
3601           !nir_op_is_vec_or_mov(nir_instr_as_alu(instr)->op))
3602          return true;
3603    }
3604 
3605    return false;
3606 }
3607 
3608 nir_op
nir_atomic_op_to_alu(nir_atomic_op op)3609 nir_atomic_op_to_alu(nir_atomic_op op)
3610 {
3611    switch (op) {
3612    case nir_atomic_op_iadd:
3613       return nir_op_iadd;
3614    case nir_atomic_op_imin:
3615       return nir_op_imin;
3616    case nir_atomic_op_umin:
3617       return nir_op_umin;
3618    case nir_atomic_op_imax:
3619       return nir_op_imax;
3620    case nir_atomic_op_umax:
3621       return nir_op_umax;
3622    case nir_atomic_op_iand:
3623       return nir_op_iand;
3624    case nir_atomic_op_ior:
3625       return nir_op_ior;
3626    case nir_atomic_op_ixor:
3627       return nir_op_ixor;
3628    case nir_atomic_op_fadd:
3629       return nir_op_fadd;
3630    case nir_atomic_op_fmin:
3631       return nir_op_fmin;
3632    case nir_atomic_op_fmax:
3633       return nir_op_fmax;
3634 
3635    /* We don't handle exchanges or wraps */
3636    case nir_atomic_op_xchg:
3637    case nir_atomic_op_cmpxchg:
3638    case nir_atomic_op_fcmpxchg:
3639    case nir_atomic_op_inc_wrap:
3640    case nir_atomic_op_dec_wrap:
3641    case nir_atomic_op_ordered_add_gfx12_amd:
3642       return nir_num_opcodes;
3643    }
3644 
3645    unreachable("Invalid nir_atomic_op");
3646 }
3647 
3648