• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Connor Abbott (cwabbott0@gmail.com)
25  *
26  */
27 
28 #include "nir.h"
29 #include "nir_builder.h"
30 #include "nir_control_flow_private.h"
31 #include "nir_worklist.h"
32 #include "util/half_float.h"
33 #include <limits.h>
34 #include <assert.h>
35 #include <math.h>
36 #include "util/u_math.h"
37 #include "util/u_qsort.h"
38 
39 #include "main/menums.h" /* BITFIELD64_MASK */
40 
41 #ifndef NDEBUG
42 uint32_t nir_debug = 0;
43 bool nir_debug_print_shader[MESA_SHADER_KERNEL + 1] = { 0 };
44 
45 static const struct debug_named_value nir_debug_control[] = {
46    { "clone", NIR_DEBUG_CLONE,
47      "Test cloning a shader at each successful lowering/optimization call" },
48    { "serialize", NIR_DEBUG_SERIALIZE,
49      "Test serialize and deserialize shader at each successful lowering/optimization call" },
50    { "novalidate", NIR_DEBUG_NOVALIDATE,
51      "Disable shader validation at each successful lowering/optimization call" },
52    { "validate_ssa_dominance", NIR_DEBUG_VALIDATE_SSA_DOMINANCE,
53      "Validate SSA dominance in shader at each successful lowering/optimization call" },
54    { "validate_gc_list", NIR_DEBUG_VALIDATE_GC_LIST,
55      "Validate the instruction GC list at each successful lowering/optimization call" },
56    { "tgsi", NIR_DEBUG_TGSI,
57      "Dump NIR/TGSI shaders when doing a NIR<->TGSI translation" },
58    { "print", NIR_DEBUG_PRINT,
59      "Dump resulting shader after each successful lowering/optimization call" },
60    { "print_vs", NIR_DEBUG_PRINT_VS,
61      "Dump resulting vertex shader after each successful lowering/optimization call" },
62    { "print_tcs", NIR_DEBUG_PRINT_TCS,
63      "Dump resulting tessellation control shader after each successful lowering/optimization call" },
64    { "print_tes", NIR_DEBUG_PRINT_TES,
65      "Dump resulting tessellation evaluation shader after each successful lowering/optimization call" },
66    { "print_gs", NIR_DEBUG_PRINT_GS,
67      "Dump resulting geometry shader after each successful lowering/optimization call" },
68    { "print_fs", NIR_DEBUG_PRINT_FS,
69      "Dump resulting fragment shader after each successful lowering/optimization call" },
70    { "print_cs", NIR_DEBUG_PRINT_CS,
71      "Dump resulting compute shader after each successful lowering/optimization call" },
72    { "print_ts", NIR_DEBUG_PRINT_TS,
73      "Dump resulting task shader after each successful lowering/optimization call" },
74    { "print_ms", NIR_DEBUG_PRINT_MS,
75      "Dump resulting mesh shader after each successful lowering/optimization call" },
76    { "print_rgs", NIR_DEBUG_PRINT_RGS,
77      "Dump resulting raygen shader after each successful lowering/optimization call" },
78    { "print_ahs", NIR_DEBUG_PRINT_AHS,
79      "Dump resulting any-hit shader after each successful lowering/optimization call" },
80    { "print_chs", NIR_DEBUG_PRINT_CHS,
81      "Dump resulting closest-hit shader after each successful lowering/optimization call" },
82    { "print_mhs", NIR_DEBUG_PRINT_MHS,
83      "Dump resulting miss-hit shader after each successful lowering/optimization call" },
84    { "print_is", NIR_DEBUG_PRINT_IS,
85      "Dump resulting intersection shader after each successful lowering/optimization call" },
86    { "print_cbs", NIR_DEBUG_PRINT_CBS,
87      "Dump resulting callable shader after each successful lowering/optimization call" },
88    { "print_ks", NIR_DEBUG_PRINT_KS,
89      "Dump resulting kernel shader after each successful lowering/optimization call" },
90    { "print_consts", NIR_DEBUG_PRINT_CONSTS,
91      "Print const value near each use of const SSA variable" },
92    { NULL }
93 };
94 
95 DEBUG_GET_ONCE_FLAGS_OPTION(nir_debug, "NIR_DEBUG", nir_debug_control, 0)
96 
97 static void
nir_process_debug_variable_once(void)98 nir_process_debug_variable_once(void)
99 {
100    nir_debug = debug_get_option_nir_debug();
101    nir_debug_print_shader[MESA_SHADER_VERTEX]       = NIR_DEBUG(PRINT_VS);
102    nir_debug_print_shader[MESA_SHADER_TESS_CTRL]    = NIR_DEBUG(PRINT_TCS);
103    nir_debug_print_shader[MESA_SHADER_TESS_EVAL]    = NIR_DEBUG(PRINT_TES);
104    nir_debug_print_shader[MESA_SHADER_GEOMETRY]     = NIR_DEBUG(PRINT_GS);
105    nir_debug_print_shader[MESA_SHADER_FRAGMENT]     = NIR_DEBUG(PRINT_FS);
106    nir_debug_print_shader[MESA_SHADER_COMPUTE]      = NIR_DEBUG(PRINT_CS);
107    nir_debug_print_shader[MESA_SHADER_TASK]         = NIR_DEBUG(PRINT_TS);
108    nir_debug_print_shader[MESA_SHADER_MESH]         = NIR_DEBUG(PRINT_MS);
109    nir_debug_print_shader[MESA_SHADER_RAYGEN]       = NIR_DEBUG(PRINT_RGS);
110    nir_debug_print_shader[MESA_SHADER_ANY_HIT]      = NIR_DEBUG(PRINT_AHS);
111    nir_debug_print_shader[MESA_SHADER_CLOSEST_HIT]  = NIR_DEBUG(PRINT_CHS);
112    nir_debug_print_shader[MESA_SHADER_MISS]         = NIR_DEBUG(PRINT_MHS);
113    nir_debug_print_shader[MESA_SHADER_INTERSECTION] = NIR_DEBUG(PRINT_IS);
114    nir_debug_print_shader[MESA_SHADER_CALLABLE]     = NIR_DEBUG(PRINT_CBS);
115    nir_debug_print_shader[MESA_SHADER_KERNEL]       = NIR_DEBUG(PRINT_KS);
116 }
117 
118 void
nir_process_debug_variable(void)119 nir_process_debug_variable(void)
120 {
121    static once_flag flag = ONCE_FLAG_INIT;
122    call_once(&flag, nir_process_debug_variable_once);
123 }
124 #endif
125 
126 /** Return true if the component mask "mask" with bit size "old_bit_size" can
127  * be re-interpreted to be used with "new_bit_size".
128  */
129 bool
nir_component_mask_can_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)130 nir_component_mask_can_reinterpret(nir_component_mask_t mask,
131                                    unsigned old_bit_size,
132                                    unsigned new_bit_size)
133 {
134    assert(util_is_power_of_two_nonzero(old_bit_size));
135    assert(util_is_power_of_two_nonzero(new_bit_size));
136 
137    if (old_bit_size == new_bit_size)
138       return true;
139 
140    if (old_bit_size == 1 || new_bit_size == 1)
141       return false;
142 
143    if (old_bit_size > new_bit_size) {
144       unsigned ratio = old_bit_size / new_bit_size;
145       return util_last_bit(mask) * ratio <= NIR_MAX_VEC_COMPONENTS;
146    }
147 
148    unsigned iter = mask;
149    while (iter) {
150       int start, count;
151       u_bit_scan_consecutive_range(&iter, &start, &count);
152       start *= old_bit_size;
153       count *= old_bit_size;
154       if (start % new_bit_size != 0)
155          return false;
156       if (count % new_bit_size != 0)
157          return false;
158    }
159    return true;
160 }
161 
162 /** Re-interprets a component mask "mask" with bit size "old_bit_size" so that
163  * it can be used can be used with "new_bit_size".
164  */
165 nir_component_mask_t
nir_component_mask_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)166 nir_component_mask_reinterpret(nir_component_mask_t mask,
167                                unsigned old_bit_size,
168                                unsigned new_bit_size)
169 {
170    assert(nir_component_mask_can_reinterpret(mask, old_bit_size, new_bit_size));
171 
172    if (old_bit_size == new_bit_size)
173       return mask;
174 
175    nir_component_mask_t new_mask = 0;
176    unsigned iter = mask;
177    while (iter) {
178       int start, count;
179       u_bit_scan_consecutive_range(&iter, &start, &count);
180       start = start * old_bit_size / new_bit_size;
181       count = count * old_bit_size / new_bit_size;
182       new_mask |= BITFIELD_RANGE(start, count);
183    }
184    return new_mask;
185 }
186 
187 static void
nir_shader_destructor(void * ptr)188 nir_shader_destructor(void *ptr)
189 {
190    nir_shader *shader = ptr;
191 
192    /* Free all instrs from the shader, since they're not ralloced. */
193    list_for_each_entry_safe(nir_instr, instr, &shader->gc_list, gc_node) {
194       nir_instr_free(instr);
195    }
196 }
197 
198 nir_shader *
nir_shader_create(void * mem_ctx,gl_shader_stage stage,const nir_shader_compiler_options * options,shader_info * si)199 nir_shader_create(void *mem_ctx,
200                   gl_shader_stage stage,
201                   const nir_shader_compiler_options *options,
202                   shader_info *si)
203 {
204    nir_shader *shader = rzalloc(mem_ctx, nir_shader);
205    ralloc_set_destructor(shader, nir_shader_destructor);
206 
207 #ifndef NDEBUG
208    nir_process_debug_variable();
209 #endif
210 
211    exec_list_make_empty(&shader->variables);
212 
213    shader->options = options;
214 
215    if (si) {
216       assert(si->stage == stage);
217       shader->info = *si;
218    } else {
219       shader->info.stage = stage;
220    }
221 
222    exec_list_make_empty(&shader->functions);
223 
224    list_inithead(&shader->gc_list);
225 
226    shader->num_inputs = 0;
227    shader->num_outputs = 0;
228    shader->num_uniforms = 0;
229 
230    return shader;
231 }
232 
233 static nir_register *
reg_create(void * mem_ctx,struct exec_list * list)234 reg_create(void *mem_ctx, struct exec_list *list)
235 {
236    nir_register *reg = ralloc(mem_ctx, nir_register);
237 
238    list_inithead(&reg->uses);
239    list_inithead(&reg->defs);
240    list_inithead(&reg->if_uses);
241 
242    reg->num_components = 0;
243    reg->bit_size = 32;
244    reg->num_array_elems = 0;
245    reg->divergent = false;
246 
247    exec_list_push_tail(list, &reg->node);
248 
249    return reg;
250 }
251 
252 nir_register *
nir_local_reg_create(nir_function_impl * impl)253 nir_local_reg_create(nir_function_impl *impl)
254 {
255    nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers);
256    reg->index = impl->reg_alloc++;
257 
258    return reg;
259 }
260 
261 void
nir_reg_remove(nir_register * reg)262 nir_reg_remove(nir_register *reg)
263 {
264    exec_node_remove(&reg->node);
265 }
266 
267 void
nir_shader_add_variable(nir_shader * shader,nir_variable * var)268 nir_shader_add_variable(nir_shader *shader, nir_variable *var)
269 {
270    switch (var->data.mode) {
271    case nir_var_function_temp:
272       assert(!"nir_shader_add_variable cannot be used for local variables");
273       return;
274 
275    case nir_var_shader_temp:
276    case nir_var_shader_in:
277    case nir_var_shader_out:
278    case nir_var_uniform:
279    case nir_var_mem_ubo:
280    case nir_var_mem_ssbo:
281    case nir_var_image:
282    case nir_var_mem_shared:
283    case nir_var_system_value:
284    case nir_var_mem_push_const:
285    case nir_var_mem_constant:
286    case nir_var_shader_call_data:
287    case nir_var_ray_hit_attrib:
288    case nir_var_mem_task_payload:
289    case nir_var_mem_global:
290       break;
291 
292    default:
293       assert(!"invalid mode");
294       return;
295    }
296 
297    exec_list_push_tail(&shader->variables, &var->node);
298 }
299 
300 nir_variable *
nir_variable_create(nir_shader * shader,nir_variable_mode mode,const struct glsl_type * type,const char * name)301 nir_variable_create(nir_shader *shader, nir_variable_mode mode,
302                     const struct glsl_type *type, const char *name)
303 {
304    nir_variable *var = rzalloc(shader, nir_variable);
305    var->name = ralloc_strdup(var, name);
306    var->type = type;
307    var->data.mode = mode;
308    var->data.how_declared = nir_var_declared_normally;
309 
310    if ((mode == nir_var_shader_in &&
311         shader->info.stage != MESA_SHADER_VERTEX &&
312         shader->info.stage != MESA_SHADER_KERNEL) ||
313        (mode == nir_var_shader_out &&
314         shader->info.stage != MESA_SHADER_FRAGMENT))
315       var->data.interpolation = INTERP_MODE_SMOOTH;
316 
317    if (mode == nir_var_shader_in || mode == nir_var_uniform)
318       var->data.read_only = true;
319 
320    nir_shader_add_variable(shader, var);
321 
322    return var;
323 }
324 
325 nir_variable *
nir_local_variable_create(nir_function_impl * impl,const struct glsl_type * type,const char * name)326 nir_local_variable_create(nir_function_impl *impl,
327                           const struct glsl_type *type, const char *name)
328 {
329    nir_variable *var = rzalloc(impl->function->shader, nir_variable);
330    var->name = ralloc_strdup(var, name);
331    var->type = type;
332    var->data.mode = nir_var_function_temp;
333 
334    nir_function_impl_add_variable(impl, var);
335 
336    return var;
337 }
338 
339 nir_variable *
nir_find_variable_with_location(nir_shader * shader,nir_variable_mode mode,unsigned location)340 nir_find_variable_with_location(nir_shader *shader,
341                                 nir_variable_mode mode,
342                                 unsigned location)
343 {
344    assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
345    nir_foreach_variable_with_modes(var, shader, mode) {
346       if (var->data.location == location)
347          return var;
348    }
349    return NULL;
350 }
351 
352 nir_variable *
nir_find_variable_with_driver_location(nir_shader * shader,nir_variable_mode mode,unsigned location)353 nir_find_variable_with_driver_location(nir_shader *shader,
354                                        nir_variable_mode mode,
355                                        unsigned location)
356 {
357    assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
358    nir_foreach_variable_with_modes(var, shader, mode) {
359       if (var->data.driver_location == location)
360          return var;
361    }
362    return NULL;
363 }
364 
365 /* Annoyingly, qsort_r is not in the C standard library and, in particular, we
366  * can't count on it on MSV and Android.  So we stuff the CMP function into
367  * each array element.  It's a bit messy and burns more memory but the list of
368  * variables should hever be all that long.
369  */
370 struct var_cmp {
371    nir_variable *var;
372    int (*cmp)(const nir_variable *, const nir_variable *);
373 };
374 
375 static int
var_sort_cmp(const void * _a,const void * _b,void * _cmp)376 var_sort_cmp(const void *_a, const void *_b, void *_cmp)
377 {
378    const struct var_cmp *a = _a;
379    const struct var_cmp *b = _b;
380    assert(a->cmp == b->cmp);
381    return a->cmp(a->var, b->var);
382 }
383 
384 void
nir_sort_variables_with_modes(nir_shader * shader,int (* cmp)(const nir_variable *,const nir_variable *),nir_variable_mode modes)385 nir_sort_variables_with_modes(nir_shader *shader,
386                               int (*cmp)(const nir_variable *,
387                                          const nir_variable *),
388                               nir_variable_mode modes)
389 {
390    unsigned num_vars = 0;
391    nir_foreach_variable_with_modes(var, shader, modes) {
392       ++num_vars;
393    }
394    struct var_cmp *vars = ralloc_array(shader, struct var_cmp, num_vars);
395    unsigned i = 0;
396    nir_foreach_variable_with_modes_safe(var, shader, modes) {
397       exec_node_remove(&var->node);
398       vars[i++] = (struct var_cmp){
399          .var = var,
400          .cmp = cmp,
401       };
402    }
403    assert(i == num_vars);
404 
405    util_qsort_r(vars, num_vars, sizeof(*vars), var_sort_cmp, cmp);
406 
407    for (i = 0; i < num_vars; i++)
408       exec_list_push_tail(&shader->variables, &vars[i].var->node);
409 
410    ralloc_free(vars);
411 }
412 
413 nir_function *
nir_function_create(nir_shader * shader,const char * name)414 nir_function_create(nir_shader *shader, const char *name)
415 {
416    nir_function *func = ralloc(shader, nir_function);
417 
418    exec_list_push_tail(&shader->functions, &func->node);
419 
420    func->name = ralloc_strdup(func, name);
421    func->shader = shader;
422    func->num_params = 0;
423    func->params = NULL;
424    func->impl = NULL;
425    func->is_entrypoint = false;
426    func->is_preamble = false;
427 
428    return func;
429 }
430 
src_has_indirect(nir_src * src)431 static bool src_has_indirect(nir_src *src)
432 {
433    return !src->is_ssa && src->reg.indirect;
434 }
435 
src_free_indirects(nir_src * src)436 static void src_free_indirects(nir_src *src)
437 {
438    if (src_has_indirect(src)) {
439       assert(src->reg.indirect->is_ssa || !src->reg.indirect->reg.indirect);
440       free(src->reg.indirect);
441       src->reg.indirect = NULL;
442    }
443 }
444 
dest_free_indirects(nir_dest * dest)445 static void dest_free_indirects(nir_dest *dest)
446 {
447    if (!dest->is_ssa && dest->reg.indirect) {
448       assert(dest->reg.indirect->is_ssa || !dest->reg.indirect->reg.indirect);
449       free(dest->reg.indirect);
450       dest->reg.indirect = NULL;
451    }
452 }
453 
454 /* NOTE: if the instruction you are copying a src to is already added
455  * to the IR, use nir_instr_rewrite_src() instead.
456  */
nir_src_copy(nir_src * dest,const nir_src * src)457 void nir_src_copy(nir_src *dest, const nir_src *src)
458 {
459    src_free_indirects(dest);
460 
461    dest->is_ssa = src->is_ssa;
462    if (src->is_ssa) {
463       dest->ssa = src->ssa;
464    } else {
465       dest->reg.base_offset = src->reg.base_offset;
466       dest->reg.reg = src->reg.reg;
467       if (src->reg.indirect) {
468          dest->reg.indirect = calloc(1, sizeof(nir_src));
469          nir_src_copy(dest->reg.indirect, src->reg.indirect);
470       } else {
471          dest->reg.indirect = NULL;
472       }
473    }
474 }
475 
nir_dest_copy(nir_dest * dest,const nir_dest * src)476 void nir_dest_copy(nir_dest *dest, const nir_dest *src)
477 {
478    /* Copying an SSA definition makes no sense whatsoever. */
479    assert(!src->is_ssa);
480 
481    dest_free_indirects(dest);
482 
483    dest->is_ssa = false;
484 
485    dest->reg.base_offset = src->reg.base_offset;
486    dest->reg.reg = src->reg.reg;
487    if (src->reg.indirect) {
488       dest->reg.indirect = calloc(1, sizeof(nir_src));
489       nir_src_copy(dest->reg.indirect, src->reg.indirect);
490    } else {
491       dest->reg.indirect = NULL;
492    }
493 }
494 
495 void
nir_alu_src_copy(nir_alu_src * dest,const nir_alu_src * src)496 nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src)
497 {
498    nir_src_copy(&dest->src, &src->src);
499    dest->abs = src->abs;
500    dest->negate = src->negate;
501    for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
502       dest->swizzle[i] = src->swizzle[i];
503 }
504 
505 void
nir_alu_dest_copy(nir_alu_dest * dest,const nir_alu_dest * src)506 nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src)
507 {
508    nir_dest_copy(&dest->dest, &src->dest);
509    dest->write_mask = src->write_mask;
510    dest->saturate = src->saturate;
511 }
512 
513 bool
nir_alu_src_is_trivial_ssa(const nir_alu_instr * alu,unsigned srcn)514 nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn)
515 {
516    static uint8_t trivial_swizzle[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
517    STATIC_ASSERT(ARRAY_SIZE(trivial_swizzle) == NIR_MAX_VEC_COMPONENTS);
518 
519    const nir_alu_src *src = &alu->src[srcn];
520    unsigned num_components = nir_ssa_alu_instr_src_components(alu, srcn);
521 
522    return src->src.is_ssa && (src->src.ssa->num_components == num_components) &&
523           !src->abs && !src->negate &&
524           (memcmp(src->swizzle, trivial_swizzle, num_components) == 0);
525 }
526 
527 
528 static void
cf_init(nir_cf_node * node,nir_cf_node_type type)529 cf_init(nir_cf_node *node, nir_cf_node_type type)
530 {
531    exec_node_init(&node->node);
532    node->parent = NULL;
533    node->type = type;
534 }
535 
536 nir_function_impl *
nir_function_impl_create_bare(nir_shader * shader)537 nir_function_impl_create_bare(nir_shader *shader)
538 {
539    nir_function_impl *impl = ralloc(shader, nir_function_impl);
540 
541    impl->function = NULL;
542    impl->preamble = NULL;
543 
544    cf_init(&impl->cf_node, nir_cf_node_function);
545 
546    exec_list_make_empty(&impl->body);
547    exec_list_make_empty(&impl->registers);
548    exec_list_make_empty(&impl->locals);
549    impl->reg_alloc = 0;
550    impl->ssa_alloc = 0;
551    impl->num_blocks = 0;
552    impl->valid_metadata = nir_metadata_none;
553    impl->structured = true;
554 
555    /* create start & end blocks */
556    nir_block *start_block = nir_block_create(shader);
557    nir_block *end_block = nir_block_create(shader);
558    start_block->cf_node.parent = &impl->cf_node;
559    end_block->cf_node.parent = &impl->cf_node;
560    impl->end_block = end_block;
561 
562    exec_list_push_tail(&impl->body, &start_block->cf_node.node);
563 
564    start_block->successors[0] = end_block;
565    _mesa_set_add(end_block->predecessors, start_block);
566    return impl;
567 }
568 
569 nir_function_impl *
nir_function_impl_create(nir_function * function)570 nir_function_impl_create(nir_function *function)
571 {
572    assert(function->impl == NULL);
573 
574    nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
575 
576    function->impl = impl;
577    impl->function = function;
578 
579    return impl;
580 }
581 
582 nir_block *
nir_block_create(nir_shader * shader)583 nir_block_create(nir_shader *shader)
584 {
585    nir_block *block = rzalloc(shader, nir_block);
586 
587    cf_init(&block->cf_node, nir_cf_node_block);
588 
589    block->successors[0] = block->successors[1] = NULL;
590    block->predecessors = _mesa_pointer_set_create(block);
591    block->imm_dom = NULL;
592    /* XXX maybe it would be worth it to defer allocation?  This
593     * way it doesn't get allocated for shader refs that never run
594     * nir_calc_dominance?  For example, state-tracker creates an
595     * initial IR, clones that, runs appropriate lowering pass, passes
596     * to driver which does common lowering/opt, and then stores ref
597     * which is later used to do state specific lowering and futher
598     * opt.  Do any of the references not need dominance metadata?
599     */
600    block->dom_frontier = _mesa_pointer_set_create(block);
601 
602    exec_list_make_empty(&block->instr_list);
603 
604    return block;
605 }
606 
607 static inline void
src_init(nir_src * src)608 src_init(nir_src *src)
609 {
610    src->is_ssa = false;
611    src->reg.reg = NULL;
612    src->reg.indirect = NULL;
613    src->reg.base_offset = 0;
614 }
615 
616 nir_if *
nir_if_create(nir_shader * shader)617 nir_if_create(nir_shader *shader)
618 {
619    nir_if *if_stmt = ralloc(shader, nir_if);
620 
621    if_stmt->control = nir_selection_control_none;
622 
623    cf_init(&if_stmt->cf_node, nir_cf_node_if);
624    src_init(&if_stmt->condition);
625 
626    nir_block *then = nir_block_create(shader);
627    exec_list_make_empty(&if_stmt->then_list);
628    exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
629    then->cf_node.parent = &if_stmt->cf_node;
630 
631    nir_block *else_stmt = nir_block_create(shader);
632    exec_list_make_empty(&if_stmt->else_list);
633    exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
634    else_stmt->cf_node.parent = &if_stmt->cf_node;
635 
636    return if_stmt;
637 }
638 
639 nir_loop *
nir_loop_create(nir_shader * shader)640 nir_loop_create(nir_shader *shader)
641 {
642    nir_loop *loop = rzalloc(shader, nir_loop);
643 
644    cf_init(&loop->cf_node, nir_cf_node_loop);
645    /* Assume that loops are divergent until proven otherwise */
646    loop->divergent = true;
647 
648    nir_block *body = nir_block_create(shader);
649    exec_list_make_empty(&loop->body);
650    exec_list_push_tail(&loop->body, &body->cf_node.node);
651    body->cf_node.parent = &loop->cf_node;
652 
653    body->successors[0] = body;
654    _mesa_set_add(body->predecessors, body);
655 
656    return loop;
657 }
658 
659 static void
instr_init(nir_instr * instr,nir_instr_type type)660 instr_init(nir_instr *instr, nir_instr_type type)
661 {
662    instr->type = type;
663    instr->block = NULL;
664    exec_node_init(&instr->node);
665 }
666 
667 static void
dest_init(nir_dest * dest)668 dest_init(nir_dest *dest)
669 {
670    dest->is_ssa = false;
671    dest->reg.reg = NULL;
672    dest->reg.indirect = NULL;
673    dest->reg.base_offset = 0;
674 }
675 
676 static void
alu_dest_init(nir_alu_dest * dest)677 alu_dest_init(nir_alu_dest *dest)
678 {
679    dest_init(&dest->dest);
680    dest->saturate = false;
681    dest->write_mask = 0xf;
682 }
683 
684 static void
alu_src_init(nir_alu_src * src)685 alu_src_init(nir_alu_src *src)
686 {
687    src_init(&src->src);
688    src->abs = src->negate = false;
689    for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
690       src->swizzle[i] = i;
691 }
692 
693 nir_alu_instr *
nir_alu_instr_create(nir_shader * shader,nir_op op)694 nir_alu_instr_create(nir_shader *shader, nir_op op)
695 {
696    unsigned num_srcs = nir_op_infos[op].num_inputs;
697    /* TODO: don't use calloc */
698    nir_alu_instr *instr = calloc(1, sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
699 
700    instr_init(&instr->instr, nir_instr_type_alu);
701    instr->op = op;
702    alu_dest_init(&instr->dest);
703    for (unsigned i = 0; i < num_srcs; i++)
704       alu_src_init(&instr->src[i]);
705 
706    list_add(&instr->instr.gc_node, &shader->gc_list);
707 
708    return instr;
709 }
710 
711 nir_deref_instr *
nir_deref_instr_create(nir_shader * shader,nir_deref_type deref_type)712 nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
713 {
714    nir_deref_instr *instr = calloc(1, sizeof(*instr));
715 
716    instr_init(&instr->instr, nir_instr_type_deref);
717 
718    instr->deref_type = deref_type;
719    if (deref_type != nir_deref_type_var)
720       src_init(&instr->parent);
721 
722    if (deref_type == nir_deref_type_array ||
723        deref_type == nir_deref_type_ptr_as_array)
724       src_init(&instr->arr.index);
725 
726    dest_init(&instr->dest);
727 
728    list_add(&instr->instr.gc_node, &shader->gc_list);
729 
730    return instr;
731 }
732 
733 nir_jump_instr *
nir_jump_instr_create(nir_shader * shader,nir_jump_type type)734 nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
735 {
736    nir_jump_instr *instr = malloc(sizeof(*instr));
737    instr_init(&instr->instr, nir_instr_type_jump);
738    src_init(&instr->condition);
739    instr->type = type;
740    instr->target = NULL;
741    instr->else_target = NULL;
742 
743    list_add(&instr->instr.gc_node, &shader->gc_list);
744 
745    return instr;
746 }
747 
748 nir_load_const_instr *
nir_load_const_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)749 nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
750                             unsigned bit_size)
751 {
752    nir_load_const_instr *instr =
753       calloc(1, sizeof(*instr) + num_components * sizeof(*instr->value));
754    instr_init(&instr->instr, nir_instr_type_load_const);
755 
756    nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size);
757 
758    list_add(&instr->instr.gc_node, &shader->gc_list);
759 
760    return instr;
761 }
762 
763 nir_intrinsic_instr *
nir_intrinsic_instr_create(nir_shader * shader,nir_intrinsic_op op)764 nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
765 {
766    unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
767    /* TODO: don't use calloc */
768    nir_intrinsic_instr *instr =
769       calloc(1, sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
770 
771    instr_init(&instr->instr, nir_instr_type_intrinsic);
772    instr->intrinsic = op;
773 
774    if (nir_intrinsic_infos[op].has_dest)
775       dest_init(&instr->dest);
776 
777    for (unsigned i = 0; i < num_srcs; i++)
778       src_init(&instr->src[i]);
779 
780    list_add(&instr->instr.gc_node, &shader->gc_list);
781 
782    return instr;
783 }
784 
785 nir_call_instr *
nir_call_instr_create(nir_shader * shader,nir_function * callee)786 nir_call_instr_create(nir_shader *shader, nir_function *callee)
787 {
788    const unsigned num_params = callee->num_params;
789    nir_call_instr *instr =
790       calloc(1, sizeof(*instr) + num_params * sizeof(instr->params[0]));
791 
792    instr_init(&instr->instr, nir_instr_type_call);
793    instr->callee = callee;
794    instr->num_params = num_params;
795    for (unsigned i = 0; i < num_params; i++)
796       src_init(&instr->params[i]);
797 
798    list_add(&instr->instr.gc_node, &shader->gc_list);
799 
800    return instr;
801 }
802 
803 static int8_t default_tg4_offsets[4][2] =
804 {
805    { 0, 1 },
806    { 1, 1 },
807    { 1, 0 },
808    { 0, 0 },
809 };
810 
811 nir_tex_instr *
nir_tex_instr_create(nir_shader * shader,unsigned num_srcs)812 nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
813 {
814    nir_tex_instr *instr = calloc(1, sizeof(*instr));
815    instr_init(&instr->instr, nir_instr_type_tex);
816 
817    dest_init(&instr->dest);
818 
819    instr->num_srcs = num_srcs;
820    instr->src = malloc(sizeof(nir_tex_src) * num_srcs);
821    for (unsigned i = 0; i < num_srcs; i++)
822       src_init(&instr->src[i].src);
823 
824    instr->texture_index = 0;
825    instr->sampler_index = 0;
826    memcpy(instr->tg4_offsets, default_tg4_offsets, sizeof(instr->tg4_offsets));
827 
828    list_add(&instr->instr.gc_node, &shader->gc_list);
829 
830    return instr;
831 }
832 
833 void
nir_tex_instr_add_src(nir_tex_instr * tex,nir_tex_src_type src_type,nir_src src)834 nir_tex_instr_add_src(nir_tex_instr *tex,
835                       nir_tex_src_type src_type,
836                       nir_src src)
837 {
838    nir_tex_src *new_srcs = calloc(sizeof(*new_srcs),
839                                          tex->num_srcs + 1);
840 
841    for (unsigned i = 0; i < tex->num_srcs; i++) {
842       new_srcs[i].src_type = tex->src[i].src_type;
843       nir_instr_move_src(&tex->instr, &new_srcs[i].src,
844                          &tex->src[i].src);
845    }
846 
847    free(tex->src);
848    tex->src = new_srcs;
849 
850    tex->src[tex->num_srcs].src_type = src_type;
851    nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
852    tex->num_srcs++;
853 }
854 
855 void
nir_tex_instr_remove_src(nir_tex_instr * tex,unsigned src_idx)856 nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
857 {
858    assert(src_idx < tex->num_srcs);
859 
860    /* First rewrite the source to NIR_SRC_INIT */
861    nir_instr_rewrite_src(&tex->instr, &tex->src[src_idx].src, NIR_SRC_INIT);
862 
863    /* Now, move all of the other sources down */
864    for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
865       tex->src[i-1].src_type = tex->src[i].src_type;
866       nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
867    }
868    tex->num_srcs--;
869 }
870 
871 bool
nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr * tex)872 nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex)
873 {
874    if (tex->op != nir_texop_tg4)
875       return false;
876    return memcmp(tex->tg4_offsets, default_tg4_offsets,
877                  sizeof(tex->tg4_offsets)) != 0;
878 }
879 
880 nir_phi_instr *
nir_phi_instr_create(nir_shader * shader)881 nir_phi_instr_create(nir_shader *shader)
882 {
883    nir_phi_instr *instr = malloc(sizeof(*instr));
884    instr_init(&instr->instr, nir_instr_type_phi);
885 
886    dest_init(&instr->dest);
887    exec_list_make_empty(&instr->srcs);
888 
889    list_add(&instr->instr.gc_node, &shader->gc_list);
890 
891    return instr;
892 }
893 
894 /**
895  * Adds a new source to a NIR instruction.
896  *
897  * Note that this does not update the def/use relationship for src, assuming
898  * that the instr is not in the shader.  If it is, you have to do:
899  *
900  * list_addtail(&phi_src->src.use_link, &src.ssa->uses);
901  */
902 nir_phi_src *
nir_phi_instr_add_src(nir_phi_instr * instr,nir_block * pred,nir_src src)903 nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src)
904 {
905    nir_phi_src *phi_src;
906 
907    phi_src = calloc(1, sizeof(nir_phi_src));
908    phi_src->pred = pred;
909    phi_src->src = src;
910    phi_src->src.parent_instr = &instr->instr;
911    exec_list_push_tail(&instr->srcs, &phi_src->node);
912 
913    return phi_src;
914 }
915 
916 nir_parallel_copy_instr *
nir_parallel_copy_instr_create(nir_shader * shader)917 nir_parallel_copy_instr_create(nir_shader *shader)
918 {
919    nir_parallel_copy_instr *instr = malloc(sizeof(*instr));
920    instr_init(&instr->instr, nir_instr_type_parallel_copy);
921 
922    exec_list_make_empty(&instr->entries);
923 
924    list_add(&instr->instr.gc_node, &shader->gc_list);
925 
926    return instr;
927 }
928 
929 nir_ssa_undef_instr *
nir_ssa_undef_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)930 nir_ssa_undef_instr_create(nir_shader *shader,
931                            unsigned num_components,
932                            unsigned bit_size)
933 {
934    nir_ssa_undef_instr *instr = malloc(sizeof(*instr));
935    instr_init(&instr->instr, nir_instr_type_ssa_undef);
936 
937    nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size);
938 
939    list_add(&instr->instr.gc_node, &shader->gc_list);
940 
941    return instr;
942 }
943 
944 static nir_const_value
const_value_float(double d,unsigned bit_size)945 const_value_float(double d, unsigned bit_size)
946 {
947    nir_const_value v;
948    memset(&v, 0, sizeof(v));
949    switch (bit_size) {
950    case 16: v.u16 = _mesa_float_to_half(d);  break;
951    case 32: v.f32 = d;                       break;
952    case 64: v.f64 = d;                       break;
953    default:
954       unreachable("Invalid bit size");
955    }
956    return v;
957 }
958 
959 static nir_const_value
const_value_int(int64_t i,unsigned bit_size)960 const_value_int(int64_t i, unsigned bit_size)
961 {
962    nir_const_value v;
963    memset(&v, 0, sizeof(v));
964    switch (bit_size) {
965    case 1:  v.b   = i & 1;  break;
966    case 8:  v.i8  = i;  break;
967    case 16: v.i16 = i;  break;
968    case 32: v.i32 = i;  break;
969    case 64: v.i64 = i;  break;
970    default:
971       unreachable("Invalid bit size");
972    }
973    return v;
974 }
975 
976 nir_const_value
nir_alu_binop_identity(nir_op binop,unsigned bit_size)977 nir_alu_binop_identity(nir_op binop, unsigned bit_size)
978 {
979    const int64_t max_int = (1ull << (bit_size - 1)) - 1;
980    const int64_t min_int = -max_int - 1;
981    switch (binop) {
982    case nir_op_iadd:
983       return const_value_int(0, bit_size);
984    case nir_op_fadd:
985       return const_value_float(0, bit_size);
986    case nir_op_imul:
987       return const_value_int(1, bit_size);
988    case nir_op_fmul:
989       return const_value_float(1, bit_size);
990    case nir_op_imin:
991       return const_value_int(max_int, bit_size);
992    case nir_op_umin:
993       return const_value_int(~0ull, bit_size);
994    case nir_op_fmin:
995       return const_value_float(INFINITY, bit_size);
996    case nir_op_imax:
997       return const_value_int(min_int, bit_size);
998    case nir_op_umax:
999       return const_value_int(0, bit_size);
1000    case nir_op_fmax:
1001       return const_value_float(-INFINITY, bit_size);
1002    case nir_op_iand:
1003       return const_value_int(~0ull, bit_size);
1004    case nir_op_ior:
1005       return const_value_int(0, bit_size);
1006    case nir_op_ixor:
1007       return const_value_int(0, bit_size);
1008    default:
1009       unreachable("Invalid reduction operation");
1010    }
1011 }
1012 
1013 nir_function_impl *
nir_cf_node_get_function(nir_cf_node * node)1014 nir_cf_node_get_function(nir_cf_node *node)
1015 {
1016    while (node->type != nir_cf_node_function) {
1017       node = node->parent;
1018    }
1019 
1020    return nir_cf_node_as_function(node);
1021 }
1022 
1023 /* Reduces a cursor by trying to convert everything to after and trying to
1024  * go up to block granularity when possible.
1025  */
1026 static nir_cursor
reduce_cursor(nir_cursor cursor)1027 reduce_cursor(nir_cursor cursor)
1028 {
1029    switch (cursor.option) {
1030    case nir_cursor_before_block:
1031       if (exec_list_is_empty(&cursor.block->instr_list)) {
1032          /* Empty block.  After is as good as before. */
1033          cursor.option = nir_cursor_after_block;
1034       }
1035       return cursor;
1036 
1037    case nir_cursor_after_block:
1038       return cursor;
1039 
1040    case nir_cursor_before_instr: {
1041       nir_instr *prev_instr = nir_instr_prev(cursor.instr);
1042       if (prev_instr) {
1043          /* Before this instruction is after the previous */
1044          cursor.instr = prev_instr;
1045          cursor.option = nir_cursor_after_instr;
1046       } else {
1047          /* No previous instruction.  Switch to before block */
1048          cursor.block = cursor.instr->block;
1049          cursor.option = nir_cursor_before_block;
1050       }
1051       return reduce_cursor(cursor);
1052    }
1053 
1054    case nir_cursor_after_instr:
1055       if (nir_instr_next(cursor.instr) == NULL) {
1056          /* This is the last instruction, switch to after block */
1057          cursor.option = nir_cursor_after_block;
1058          cursor.block = cursor.instr->block;
1059       }
1060       return cursor;
1061 
1062    default:
1063       unreachable("Inavlid cursor option");
1064    }
1065 }
1066 
1067 bool
nir_cursors_equal(nir_cursor a,nir_cursor b)1068 nir_cursors_equal(nir_cursor a, nir_cursor b)
1069 {
1070    /* Reduced cursors should be unique */
1071    a = reduce_cursor(a);
1072    b = reduce_cursor(b);
1073 
1074    return a.block == b.block && a.option == b.option;
1075 }
1076 
1077 static bool
add_use_cb(nir_src * src,void * state)1078 add_use_cb(nir_src *src, void *state)
1079 {
1080    nir_instr *instr = state;
1081 
1082    src->parent_instr = instr;
1083    list_addtail(&src->use_link,
1084                 src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses);
1085 
1086    return true;
1087 }
1088 
1089 static bool
add_ssa_def_cb(nir_ssa_def * def,void * state)1090 add_ssa_def_cb(nir_ssa_def *def, void *state)
1091 {
1092    nir_instr *instr = state;
1093 
1094    if (instr->block && def->index == UINT_MAX) {
1095       nir_function_impl *impl =
1096          nir_cf_node_get_function(&instr->block->cf_node);
1097 
1098       def->index = impl->ssa_alloc++;
1099 
1100       impl->valid_metadata &= ~nir_metadata_live_ssa_defs;
1101    }
1102 
1103    return true;
1104 }
1105 
1106 static bool
add_reg_def_cb(nir_dest * dest,void * state)1107 add_reg_def_cb(nir_dest *dest, void *state)
1108 {
1109    nir_instr *instr = state;
1110 
1111    if (!dest->is_ssa) {
1112       dest->reg.parent_instr = instr;
1113       list_addtail(&dest->reg.def_link, &dest->reg.reg->defs);
1114    }
1115 
1116    return true;
1117 }
1118 
1119 static void
add_defs_uses(nir_instr * instr)1120 add_defs_uses(nir_instr *instr)
1121 {
1122    nir_foreach_src(instr, add_use_cb, instr);
1123    nir_foreach_dest(instr, add_reg_def_cb, instr);
1124    nir_foreach_ssa_def(instr, add_ssa_def_cb, instr);
1125 }
1126 
1127 void
nir_instr_insert(nir_cursor cursor,nir_instr * instr)1128 nir_instr_insert(nir_cursor cursor, nir_instr *instr)
1129 {
1130    switch (cursor.option) {
1131    case nir_cursor_before_block:
1132       /* Only allow inserting jumps into empty blocks. */
1133       if (instr->type == nir_instr_type_jump)
1134          assert(exec_list_is_empty(&cursor.block->instr_list));
1135 
1136       instr->block = cursor.block;
1137       add_defs_uses(instr);
1138       exec_list_push_head(&cursor.block->instr_list, &instr->node);
1139       break;
1140    case nir_cursor_after_block: {
1141       /* Inserting instructions after a jump is illegal. */
1142       nir_instr *last = nir_block_last_instr(cursor.block);
1143       assert(last == NULL || last->type != nir_instr_type_jump);
1144       (void) last;
1145 
1146       instr->block = cursor.block;
1147       add_defs_uses(instr);
1148       exec_list_push_tail(&cursor.block->instr_list, &instr->node);
1149       break;
1150    }
1151    case nir_cursor_before_instr:
1152       assert(instr->type != nir_instr_type_jump);
1153       instr->block = cursor.instr->block;
1154       add_defs_uses(instr);
1155       exec_node_insert_node_before(&cursor.instr->node, &instr->node);
1156       break;
1157    case nir_cursor_after_instr:
1158       /* Inserting instructions after a jump is illegal. */
1159       assert(cursor.instr->type != nir_instr_type_jump);
1160 
1161       /* Only allow inserting jumps at the end of the block. */
1162       if (instr->type == nir_instr_type_jump)
1163          assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
1164 
1165       instr->block = cursor.instr->block;
1166       add_defs_uses(instr);
1167       exec_node_insert_after(&cursor.instr->node, &instr->node);
1168       break;
1169    }
1170 
1171    if (instr->type == nir_instr_type_jump)
1172       nir_handle_add_jump(instr->block);
1173 
1174    nir_function_impl *impl = nir_cf_node_get_function(&instr->block->cf_node);
1175    impl->valid_metadata &= ~nir_metadata_instr_index;
1176 }
1177 
1178 bool
nir_instr_move(nir_cursor cursor,nir_instr * instr)1179 nir_instr_move(nir_cursor cursor, nir_instr *instr)
1180 {
1181    /* If the cursor happens to refer to this instruction (either before or
1182     * after), don't do anything.
1183     */
1184    if ((cursor.option == nir_cursor_before_instr ||
1185         cursor.option == nir_cursor_after_instr) &&
1186        cursor.instr == instr)
1187       return false;
1188 
1189    nir_instr_remove(instr);
1190    nir_instr_insert(cursor, instr);
1191    return true;
1192 }
1193 
1194 static bool
src_is_valid(const nir_src * src)1195 src_is_valid(const nir_src *src)
1196 {
1197    return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
1198 }
1199 
1200 static bool
remove_use_cb(nir_src * src,void * state)1201 remove_use_cb(nir_src *src, void *state)
1202 {
1203    (void) state;
1204 
1205    if (src_is_valid(src))
1206       list_del(&src->use_link);
1207 
1208    return true;
1209 }
1210 
1211 static bool
remove_def_cb(nir_dest * dest,void * state)1212 remove_def_cb(nir_dest *dest, void *state)
1213 {
1214    (void) state;
1215 
1216    if (!dest->is_ssa)
1217       list_del(&dest->reg.def_link);
1218 
1219    return true;
1220 }
1221 
1222 static void
remove_defs_uses(nir_instr * instr)1223 remove_defs_uses(nir_instr *instr)
1224 {
1225    nir_foreach_dest(instr, remove_def_cb, instr);
1226    nir_foreach_src(instr, remove_use_cb, instr);
1227 }
1228 
nir_instr_remove_v(nir_instr * instr)1229 void nir_instr_remove_v(nir_instr *instr)
1230 {
1231    remove_defs_uses(instr);
1232    exec_node_remove(&instr->node);
1233 
1234    if (instr->type == nir_instr_type_jump) {
1235       nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
1236       nir_handle_remove_jump(instr->block, jump_instr->type);
1237    }
1238 }
1239 
free_src_indirects_cb(nir_src * src,void * state)1240 static bool free_src_indirects_cb(nir_src *src, void *state)
1241 {
1242    src_free_indirects(src);
1243    return true;
1244 }
1245 
free_dest_indirects_cb(nir_dest * dest,void * state)1246 static bool free_dest_indirects_cb(nir_dest *dest, void *state)
1247 {
1248    dest_free_indirects(dest);
1249    return true;
1250 }
1251 
nir_instr_free(nir_instr * instr)1252 void nir_instr_free(nir_instr *instr)
1253 {
1254    nir_foreach_src(instr, free_src_indirects_cb, NULL);
1255    nir_foreach_dest(instr, free_dest_indirects_cb, NULL);
1256 
1257    switch (instr->type) {
1258    case nir_instr_type_tex:
1259       free(nir_instr_as_tex(instr)->src);
1260       break;
1261 
1262    case nir_instr_type_phi: {
1263       nir_phi_instr *phi = nir_instr_as_phi(instr);
1264       nir_foreach_phi_src_safe(phi_src, phi) {
1265          free(phi_src);
1266       }
1267       break;
1268    }
1269 
1270    default:
1271       break;
1272    }
1273 
1274    list_del(&instr->gc_node);
1275    free(instr);
1276 }
1277 
1278 void
nir_instr_free_list(struct exec_list * list)1279 nir_instr_free_list(struct exec_list *list)
1280 {
1281    struct exec_node *node;
1282    while ((node = exec_list_pop_head(list))) {
1283       nir_instr *removed_instr = exec_node_data(nir_instr, node, node);
1284       nir_instr_free(removed_instr);
1285    }
1286 }
1287 
nir_instr_free_and_dce_live_cb(nir_ssa_def * def,void * state)1288 static bool nir_instr_free_and_dce_live_cb(nir_ssa_def *def, void *state)
1289 {
1290    bool *live = state;
1291 
1292    if (!nir_ssa_def_is_unused(def)) {
1293       *live = true;
1294       return false;
1295    } else {
1296       return true;
1297    }
1298 }
1299 
nir_instr_free_and_dce_is_live(nir_instr * instr)1300 static bool nir_instr_free_and_dce_is_live(nir_instr *instr)
1301 {
1302    /* Note: don't have to worry about jumps because they don't have dests to
1303     * become unused.
1304     */
1305    if (instr->type == nir_instr_type_intrinsic) {
1306       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1307       const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
1308       if (!(info->flags & NIR_INTRINSIC_CAN_ELIMINATE))
1309          return true;
1310    }
1311 
1312    bool live = false;
1313    nir_foreach_ssa_def(instr, nir_instr_free_and_dce_live_cb, &live);
1314    return live;
1315 }
1316 
1317 static bool
nir_instr_dce_add_dead_srcs_cb(nir_src * src,void * state)1318 nir_instr_dce_add_dead_srcs_cb(nir_src *src, void *state)
1319 {
1320    nir_instr_worklist *wl = state;
1321 
1322    if (src->is_ssa) {
1323       list_del(&src->use_link);
1324       if (!nir_instr_free_and_dce_is_live(src->ssa->parent_instr))
1325          nir_instr_worklist_push_tail(wl, src->ssa->parent_instr);
1326 
1327       /* Stop nir_instr_remove from trying to delete the link again. */
1328       src->ssa = NULL;
1329    }
1330 
1331    return true;
1332 }
1333 
1334 static void
nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist * wl,nir_instr * instr)1335 nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist *wl, nir_instr *instr)
1336 {
1337    nir_foreach_src(instr, nir_instr_dce_add_dead_srcs_cb, wl);
1338 }
1339 
1340 /**
1341  * Frees an instruction and any SSA defs that it used that are now dead,
1342  * returning a nir_cursor where the instruction previously was.
1343  */
1344 nir_cursor
nir_instr_free_and_dce(nir_instr * instr)1345 nir_instr_free_and_dce(nir_instr *instr)
1346 {
1347    nir_instr_worklist *worklist = nir_instr_worklist_create();
1348 
1349    nir_instr_dce_add_dead_ssa_srcs(worklist, instr);
1350    nir_cursor c = nir_instr_remove(instr);
1351 
1352    struct exec_list to_free;
1353    exec_list_make_empty(&to_free);
1354 
1355    nir_instr *dce_instr;
1356    while ((dce_instr = nir_instr_worklist_pop_head(worklist))) {
1357       nir_instr_dce_add_dead_ssa_srcs(worklist, dce_instr);
1358 
1359       /* If we're removing the instr where our cursor is, then we have to
1360        * point the cursor elsewhere.
1361        */
1362       if ((c.option == nir_cursor_before_instr ||
1363            c.option == nir_cursor_after_instr) &&
1364           c.instr == dce_instr)
1365          c = nir_instr_remove(dce_instr);
1366       else
1367          nir_instr_remove(dce_instr);
1368       exec_list_push_tail(&to_free, &dce_instr->node);
1369    }
1370 
1371    nir_instr_free_list(&to_free);
1372 
1373    nir_instr_worklist_destroy(worklist);
1374 
1375    return c;
1376 }
1377 
1378 /*@}*/
1379 
1380 void
nir_index_local_regs(nir_function_impl * impl)1381 nir_index_local_regs(nir_function_impl *impl)
1382 {
1383    unsigned index = 0;
1384    foreach_list_typed(nir_register, reg, node, &impl->registers) {
1385       reg->index = index++;
1386    }
1387    impl->reg_alloc = index;
1388 }
1389 
1390 struct foreach_ssa_def_state {
1391    nir_foreach_ssa_def_cb cb;
1392    void *client_state;
1393 };
1394 
1395 static inline bool
nir_ssa_def_visitor(nir_dest * dest,void * void_state)1396 nir_ssa_def_visitor(nir_dest *dest, void *void_state)
1397 {
1398    struct foreach_ssa_def_state *state = void_state;
1399 
1400    if (dest->is_ssa)
1401       return state->cb(&dest->ssa, state->client_state);
1402    else
1403       return true;
1404 }
1405 
1406 bool
nir_foreach_ssa_def(nir_instr * instr,nir_foreach_ssa_def_cb cb,void * state)1407 nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
1408 {
1409    switch (instr->type) {
1410    case nir_instr_type_alu:
1411    case nir_instr_type_deref:
1412    case nir_instr_type_tex:
1413    case nir_instr_type_intrinsic:
1414    case nir_instr_type_phi:
1415    case nir_instr_type_parallel_copy: {
1416       struct foreach_ssa_def_state foreach_state = {cb, state};
1417       return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state);
1418    }
1419 
1420    case nir_instr_type_load_const:
1421       return cb(&nir_instr_as_load_const(instr)->def, state);
1422    case nir_instr_type_ssa_undef:
1423       return cb(&nir_instr_as_ssa_undef(instr)->def, state);
1424    case nir_instr_type_call:
1425    case nir_instr_type_jump:
1426       return true;
1427    default:
1428       unreachable("Invalid instruction type");
1429    }
1430 }
1431 
1432 nir_ssa_def *
nir_instr_ssa_def(nir_instr * instr)1433 nir_instr_ssa_def(nir_instr *instr)
1434 {
1435    switch (instr->type) {
1436    case nir_instr_type_alu:
1437       assert(nir_instr_as_alu(instr)->dest.dest.is_ssa);
1438       return &nir_instr_as_alu(instr)->dest.dest.ssa;
1439 
1440    case nir_instr_type_deref:
1441       assert(nir_instr_as_deref(instr)->dest.is_ssa);
1442       return &nir_instr_as_deref(instr)->dest.ssa;
1443 
1444    case nir_instr_type_tex:
1445       assert(nir_instr_as_tex(instr)->dest.is_ssa);
1446       return &nir_instr_as_tex(instr)->dest.ssa;
1447 
1448    case nir_instr_type_intrinsic: {
1449       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1450       if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
1451          assert(intrin->dest.is_ssa);
1452          return &intrin->dest.ssa;
1453       } else {
1454          return NULL;
1455       }
1456    }
1457 
1458    case nir_instr_type_phi:
1459       assert(nir_instr_as_phi(instr)->dest.is_ssa);
1460       return &nir_instr_as_phi(instr)->dest.ssa;
1461 
1462    case nir_instr_type_parallel_copy:
1463       unreachable("Parallel copies are unsupported by this function");
1464 
1465    case nir_instr_type_load_const:
1466       return &nir_instr_as_load_const(instr)->def;
1467 
1468    case nir_instr_type_ssa_undef:
1469       return &nir_instr_as_ssa_undef(instr)->def;
1470 
1471    case nir_instr_type_call:
1472    case nir_instr_type_jump:
1473       return NULL;
1474    }
1475 
1476    unreachable("Invalid instruction type");
1477 }
1478 
1479 bool
nir_instr_def_is_register(nir_instr * instr)1480 nir_instr_def_is_register(nir_instr *instr)
1481 {
1482    switch (instr->type) {
1483    case nir_instr_type_alu:
1484       return !nir_instr_as_alu(instr)->dest.dest.is_ssa;
1485 
1486    case nir_instr_type_deref:
1487       return !nir_instr_as_deref(instr)->dest.is_ssa;
1488 
1489    case nir_instr_type_tex:
1490       return !nir_instr_as_tex(instr)->dest.is_ssa;
1491 
1492    case nir_instr_type_intrinsic: {
1493       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1494       return nir_intrinsic_infos[intrin->intrinsic].has_dest &&
1495              !intrin->dest.is_ssa;
1496    }
1497 
1498    case nir_instr_type_phi:
1499       return !nir_instr_as_phi(instr)->dest.is_ssa;
1500 
1501    case nir_instr_type_parallel_copy:
1502       unreachable("Parallel copies are unsupported by this function");
1503 
1504    case nir_instr_type_load_const:
1505    case nir_instr_type_ssa_undef:
1506       return false;
1507 
1508    case nir_instr_type_call:
1509    case nir_instr_type_jump:
1510       return false;
1511    }
1512 
1513    unreachable("Invalid instruction type");
1514 }
1515 
1516 bool
nir_foreach_phi_src_leaving_block(nir_block * block,nir_foreach_src_cb cb,void * state)1517 nir_foreach_phi_src_leaving_block(nir_block *block,
1518                                   nir_foreach_src_cb cb,
1519                                   void *state)
1520 {
1521    for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
1522       if (block->successors[i] == NULL)
1523          continue;
1524 
1525       nir_foreach_instr(instr, block->successors[i]) {
1526          if (instr->type != nir_instr_type_phi)
1527             break;
1528 
1529          nir_phi_instr *phi = nir_instr_as_phi(instr);
1530          nir_foreach_phi_src(phi_src, phi) {
1531             if (phi_src->pred == block) {
1532                if (!cb(&phi_src->src, state))
1533                   return false;
1534             }
1535          }
1536       }
1537    }
1538 
1539    return true;
1540 }
1541 
1542 nir_const_value
nir_const_value_for_float(double f,unsigned bit_size)1543 nir_const_value_for_float(double f, unsigned bit_size)
1544 {
1545    nir_const_value v;
1546    memset(&v, 0, sizeof(v));
1547 
1548    switch (bit_size) {
1549    case 16:
1550       v.u16 = _mesa_float_to_half(f);
1551       break;
1552    case 32:
1553       v.f32 = f;
1554       break;
1555    case 64:
1556       v.f64 = f;
1557       break;
1558    default:
1559       unreachable("Invalid bit size");
1560    }
1561 
1562    return v;
1563 }
1564 
1565 double
nir_const_value_as_float(nir_const_value value,unsigned bit_size)1566 nir_const_value_as_float(nir_const_value value, unsigned bit_size)
1567 {
1568    switch (bit_size) {
1569    case 16: return _mesa_half_to_float(value.u16);
1570    case 32: return value.f32;
1571    case 64: return value.f64;
1572    default:
1573       unreachable("Invalid bit size");
1574    }
1575 }
1576 
1577 nir_const_value *
nir_src_as_const_value(nir_src src)1578 nir_src_as_const_value(nir_src src)
1579 {
1580    if (!src.is_ssa)
1581       return NULL;
1582 
1583    if (src.ssa->parent_instr->type != nir_instr_type_load_const)
1584       return NULL;
1585 
1586    nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1587 
1588    return load->value;
1589 }
1590 
1591 /**
1592  * Returns true if the source is known to be always uniform. Otherwise it
1593  * returns false which means it may or may not be uniform but it can't be
1594  * determined.
1595  *
1596  * For a more precise analysis of uniform values, use nir_divergence_analysis.
1597  */
1598 bool
nir_src_is_always_uniform(nir_src src)1599 nir_src_is_always_uniform(nir_src src)
1600 {
1601    if (!src.is_ssa)
1602       return false;
1603 
1604    /* Constants are trivially uniform */
1605    if (src.ssa->parent_instr->type == nir_instr_type_load_const)
1606       return true;
1607 
1608    if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
1609       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
1610       /* As are uniform variables */
1611       if (intr->intrinsic == nir_intrinsic_load_uniform &&
1612           nir_src_is_always_uniform(intr->src[0]))
1613          return true;
1614       /* From the Vulkan specification 15.6.1. Push Constant Interface:
1615        * "Any member of a push constant block that is declared as an array must
1616        * only be accessed with dynamically uniform indices."
1617        */
1618       if (intr->intrinsic == nir_intrinsic_load_push_constant)
1619          return true;
1620       if (intr->intrinsic == nir_intrinsic_load_deref &&
1621           nir_deref_mode_is(nir_src_as_deref(intr->src[0]), nir_var_mem_push_const))
1622          return true;
1623    }
1624 
1625    /* Operating together uniform expressions produces a uniform result */
1626    if (src.ssa->parent_instr->type == nir_instr_type_alu) {
1627       nir_alu_instr *alu = nir_instr_as_alu(src.ssa->parent_instr);
1628       for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
1629          if (!nir_src_is_always_uniform(alu->src[i].src))
1630             return false;
1631       }
1632 
1633       return true;
1634    }
1635 
1636    /* XXX: this could have many more tests, such as when a sampler function is
1637     * called with uniform arguments.
1638     */
1639    return false;
1640 }
1641 
1642 static void
src_remove_all_uses(nir_src * src)1643 src_remove_all_uses(nir_src *src)
1644 {
1645    for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1646       if (!src_is_valid(src))
1647          continue;
1648 
1649       list_del(&src->use_link);
1650    }
1651 }
1652 
1653 static void
src_add_all_uses(nir_src * src,nir_instr * parent_instr,nir_if * parent_if)1654 src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
1655 {
1656    for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1657       if (!src_is_valid(src))
1658          continue;
1659 
1660       if (parent_instr) {
1661          src->parent_instr = parent_instr;
1662          if (src->is_ssa)
1663             list_addtail(&src->use_link, &src->ssa->uses);
1664          else
1665             list_addtail(&src->use_link, &src->reg.reg->uses);
1666       } else {
1667          assert(parent_if);
1668          src->parent_if = parent_if;
1669          if (src->is_ssa)
1670             list_addtail(&src->use_link, &src->ssa->if_uses);
1671          else
1672             list_addtail(&src->use_link, &src->reg.reg->if_uses);
1673       }
1674    }
1675 }
1676 
1677 void
nir_instr_rewrite_src(nir_instr * instr,nir_src * src,nir_src new_src)1678 nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src)
1679 {
1680    assert(!src_is_valid(src) || src->parent_instr == instr);
1681 
1682    src_remove_all_uses(src);
1683    nir_src_copy(src, &new_src);
1684    src_add_all_uses(src, instr, NULL);
1685 }
1686 
1687 void
nir_instr_move_src(nir_instr * dest_instr,nir_src * dest,nir_src * src)1688 nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
1689 {
1690    assert(!src_is_valid(dest) || dest->parent_instr == dest_instr);
1691 
1692    src_remove_all_uses(dest);
1693    src_free_indirects(dest);
1694    src_remove_all_uses(src);
1695    *dest = *src;
1696    *src = NIR_SRC_INIT;
1697    src_add_all_uses(dest, dest_instr, NULL);
1698 }
1699 
1700 void
nir_if_rewrite_condition(nir_if * if_stmt,nir_src new_src)1701 nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
1702 {
1703    nir_src *src = &if_stmt->condition;
1704    assert(!src_is_valid(src) || src->parent_if == if_stmt);
1705 
1706    src_remove_all_uses(src);
1707    nir_src_copy(src, &new_src);
1708    src_add_all_uses(src, NULL, if_stmt);
1709 }
1710 
1711 void
nir_instr_rewrite_dest(nir_instr * instr,nir_dest * dest,nir_dest new_dest)1712 nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
1713 {
1714    if (dest->is_ssa) {
1715       /* We can only overwrite an SSA destination if it has no uses. */
1716       assert(nir_ssa_def_is_unused(&dest->ssa));
1717    } else {
1718       list_del(&dest->reg.def_link);
1719       if (dest->reg.indirect)
1720          src_remove_all_uses(dest->reg.indirect);
1721    }
1722 
1723    /* We can't re-write with an SSA def */
1724    assert(!new_dest.is_ssa);
1725 
1726    nir_dest_copy(dest, &new_dest);
1727 
1728    dest->reg.parent_instr = instr;
1729    list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs);
1730 
1731    if (dest->reg.indirect)
1732       src_add_all_uses(dest->reg.indirect, instr, NULL);
1733 }
1734 
1735 /* note: does *not* take ownership of 'name' */
1736 void
nir_ssa_def_init(nir_instr * instr,nir_ssa_def * def,unsigned num_components,unsigned bit_size)1737 nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
1738                  unsigned num_components,
1739                  unsigned bit_size)
1740 {
1741    def->parent_instr = instr;
1742    list_inithead(&def->uses);
1743    list_inithead(&def->if_uses);
1744    def->num_components = num_components;
1745    def->bit_size = bit_size;
1746    def->divergent = true; /* This is the safer default */
1747 
1748    if (instr->block) {
1749       nir_function_impl *impl =
1750          nir_cf_node_get_function(&instr->block->cf_node);
1751 
1752       def->index = impl->ssa_alloc++;
1753 
1754       impl->valid_metadata &= ~nir_metadata_live_ssa_defs;
1755    } else {
1756       def->index = UINT_MAX;
1757    }
1758 }
1759 
1760 /* note: does *not* take ownership of 'name' */
1761 void
nir_ssa_dest_init(nir_instr * instr,nir_dest * dest,unsigned num_components,unsigned bit_size,const char * name)1762 nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
1763                  unsigned num_components, unsigned bit_size,
1764                  const char *name)
1765 {
1766    dest->is_ssa = true;
1767    nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size);
1768 }
1769 
1770 void
nir_ssa_def_rewrite_uses(nir_ssa_def * def,nir_ssa_def * new_ssa)1771 nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_ssa_def *new_ssa)
1772 {
1773    assert(def != new_ssa);
1774    nir_foreach_use_safe(use_src, def)
1775       nir_instr_rewrite_src_ssa(use_src->parent_instr, use_src, new_ssa);
1776 
1777    nir_foreach_if_use_safe(use_src, def)
1778       nir_if_rewrite_condition_ssa(use_src->parent_if, use_src, new_ssa);
1779 }
1780 
1781 void
nir_ssa_def_rewrite_uses_src(nir_ssa_def * def,nir_src new_src)1782 nir_ssa_def_rewrite_uses_src(nir_ssa_def *def, nir_src new_src)
1783 {
1784    if (new_src.is_ssa) {
1785       nir_ssa_def_rewrite_uses(def, new_src.ssa);
1786    } else {
1787       nir_foreach_use_safe(use_src, def)
1788          nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
1789 
1790       nir_foreach_if_use_safe(use_src, def)
1791          nir_if_rewrite_condition(use_src->parent_if, new_src);
1792    }
1793 }
1794 
1795 static bool
is_instr_between(nir_instr * start,nir_instr * end,nir_instr * between)1796 is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
1797 {
1798    assert(start->block == end->block);
1799 
1800    if (between->block != start->block)
1801       return false;
1802 
1803    /* Search backwards looking for "between" */
1804    while (start != end) {
1805       if (between == end)
1806          return true;
1807 
1808       end = nir_instr_prev(end);
1809       assert(end);
1810    }
1811 
1812    return false;
1813 }
1814 
1815 /* Replaces all uses of the given SSA def with the given source but only if
1816  * the use comes after the after_me instruction.  This can be useful if you
1817  * are emitting code to fix up the result of some instruction: you can freely
1818  * use the result in that code and then call rewrite_uses_after and pass the
1819  * last fixup instruction as after_me and it will replace all of the uses you
1820  * want without touching the fixup code.
1821  *
1822  * This function assumes that after_me is in the same block as
1823  * def->parent_instr and that after_me comes after def->parent_instr.
1824  */
1825 void
nir_ssa_def_rewrite_uses_after(nir_ssa_def * def,nir_ssa_def * new_ssa,nir_instr * after_me)1826 nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_ssa_def *new_ssa,
1827                                nir_instr *after_me)
1828 {
1829    if (def == new_ssa)
1830       return;
1831 
1832    nir_foreach_use_safe(use_src, def) {
1833       assert(use_src->parent_instr != def->parent_instr);
1834       /* Since def already dominates all of its uses, the only way a use can
1835        * not be dominated by after_me is if it is between def and after_me in
1836        * the instruction list.
1837        */
1838       if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr))
1839          nir_instr_rewrite_src_ssa(use_src->parent_instr, use_src, new_ssa);
1840    }
1841 
1842    nir_foreach_if_use_safe(use_src, def) {
1843       nir_if_rewrite_condition_ssa(use_src->parent_if,
1844                                    &use_src->parent_if->condition,
1845                                    new_ssa);
1846    }
1847 }
1848 
1849 static nir_ssa_def *
get_store_value(nir_intrinsic_instr * intrin)1850 get_store_value(nir_intrinsic_instr *intrin)
1851 {
1852    assert(nir_intrinsic_has_write_mask(intrin));
1853    /* deref stores have the deref in src[0] and the store value in src[1] */
1854    if (intrin->intrinsic == nir_intrinsic_store_deref ||
1855        intrin->intrinsic == nir_intrinsic_store_deref_block_intel)
1856       return intrin->src[1].ssa;
1857 
1858    /* all other stores have the store value in src[0] */
1859    return intrin->src[0].ssa;
1860 }
1861 
1862 nir_component_mask_t
nir_src_components_read(const nir_src * src)1863 nir_src_components_read(const nir_src *src)
1864 {
1865    assert(src->is_ssa && src->parent_instr);
1866 
1867    if (src->parent_instr->type == nir_instr_type_alu) {
1868       nir_alu_instr *alu = nir_instr_as_alu(src->parent_instr);
1869       nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src);
1870       int src_idx = alu_src - &alu->src[0];
1871       assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
1872       return nir_alu_instr_src_read_mask(alu, src_idx);
1873    } else if (src->parent_instr->type == nir_instr_type_intrinsic) {
1874       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src->parent_instr);
1875       if (nir_intrinsic_has_write_mask(intrin) && src->ssa == get_store_value(intrin))
1876          return nir_intrinsic_write_mask(intrin);
1877       else
1878          return (1 << src->ssa->num_components) - 1;
1879    } else {
1880       return (1 << src->ssa->num_components) - 1;
1881    }
1882 }
1883 
1884 nir_component_mask_t
nir_ssa_def_components_read(const nir_ssa_def * def)1885 nir_ssa_def_components_read(const nir_ssa_def *def)
1886 {
1887    nir_component_mask_t read_mask = 0;
1888 
1889    if (!list_is_empty(&def->if_uses))
1890       read_mask |= 1;
1891 
1892    nir_foreach_use(use, def) {
1893       read_mask |= nir_src_components_read(use);
1894       if (read_mask == (1 << def->num_components) - 1)
1895          return read_mask;
1896    }
1897 
1898    return read_mask;
1899 }
1900 
1901 nir_block *
nir_block_unstructured_next(nir_block * block)1902 nir_block_unstructured_next(nir_block *block)
1903 {
1904    if (block == NULL) {
1905       /* nir_foreach_block_unstructured_safe() will call this function on a
1906        * NULL block after the last iteration, but it won't use the result so
1907        * just return NULL here.
1908        */
1909       return NULL;
1910    }
1911 
1912    nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1913    if (cf_next == NULL && block->cf_node.parent->type == nir_cf_node_function)
1914       return NULL;
1915 
1916    if (cf_next && cf_next->type == nir_cf_node_block)
1917       return nir_cf_node_as_block(cf_next);
1918 
1919    return nir_block_cf_tree_next(block);
1920 }
1921 
1922 nir_block *
nir_unstructured_start_block(nir_function_impl * impl)1923 nir_unstructured_start_block(nir_function_impl *impl)
1924 {
1925    return nir_start_block(impl);
1926 }
1927 
1928 nir_block *
nir_block_cf_tree_next(nir_block * block)1929 nir_block_cf_tree_next(nir_block *block)
1930 {
1931    if (block == NULL) {
1932       /* nir_foreach_block_safe() will call this function on a NULL block
1933        * after the last iteration, but it won't use the result so just return
1934        * NULL here.
1935        */
1936       return NULL;
1937    }
1938 
1939    assert(nir_cf_node_get_function(&block->cf_node)->structured);
1940 
1941    nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1942    if (cf_next)
1943       return nir_cf_node_cf_tree_first(cf_next);
1944 
1945    nir_cf_node *parent = block->cf_node.parent;
1946 
1947    switch (parent->type) {
1948    case nir_cf_node_if: {
1949       /* Are we at the end of the if? Go to the beginning of the else */
1950       nir_if *if_stmt = nir_cf_node_as_if(parent);
1951       if (block == nir_if_last_then_block(if_stmt))
1952          return nir_if_first_else_block(if_stmt);
1953 
1954       assert(block == nir_if_last_else_block(if_stmt));
1955    }
1956    FALLTHROUGH;
1957 
1958    case nir_cf_node_loop:
1959       return nir_cf_node_as_block(nir_cf_node_next(parent));
1960 
1961    case nir_cf_node_function:
1962       return NULL;
1963 
1964    default:
1965       unreachable("unknown cf node type");
1966    }
1967 }
1968 
1969 nir_block *
nir_block_cf_tree_prev(nir_block * block)1970 nir_block_cf_tree_prev(nir_block *block)
1971 {
1972    if (block == NULL) {
1973       /* do this for consistency with nir_block_cf_tree_next() */
1974       return NULL;
1975    }
1976 
1977    assert(nir_cf_node_get_function(&block->cf_node)->structured);
1978 
1979    nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
1980    if (cf_prev)
1981       return nir_cf_node_cf_tree_last(cf_prev);
1982 
1983    nir_cf_node *parent = block->cf_node.parent;
1984 
1985    switch (parent->type) {
1986    case nir_cf_node_if: {
1987       /* Are we at the beginning of the else? Go to the end of the if */
1988       nir_if *if_stmt = nir_cf_node_as_if(parent);
1989       if (block == nir_if_first_else_block(if_stmt))
1990          return nir_if_last_then_block(if_stmt);
1991 
1992       assert(block == nir_if_first_then_block(if_stmt));
1993    }
1994    FALLTHROUGH;
1995 
1996    case nir_cf_node_loop:
1997       return nir_cf_node_as_block(nir_cf_node_prev(parent));
1998 
1999    case nir_cf_node_function:
2000       return NULL;
2001 
2002    default:
2003       unreachable("unknown cf node type");
2004    }
2005 }
2006 
nir_cf_node_cf_tree_first(nir_cf_node * node)2007 nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node)
2008 {
2009    switch (node->type) {
2010    case nir_cf_node_function: {
2011       nir_function_impl *impl = nir_cf_node_as_function(node);
2012       return nir_start_block(impl);
2013    }
2014 
2015    case nir_cf_node_if: {
2016       nir_if *if_stmt = nir_cf_node_as_if(node);
2017       return nir_if_first_then_block(if_stmt);
2018    }
2019 
2020    case nir_cf_node_loop: {
2021       nir_loop *loop = nir_cf_node_as_loop(node);
2022       return nir_loop_first_block(loop);
2023    }
2024 
2025    case nir_cf_node_block: {
2026       return nir_cf_node_as_block(node);
2027    }
2028 
2029    default:
2030       unreachable("unknown node type");
2031    }
2032 }
2033 
nir_cf_node_cf_tree_last(nir_cf_node * node)2034 nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node)
2035 {
2036    switch (node->type) {
2037    case nir_cf_node_function: {
2038       nir_function_impl *impl = nir_cf_node_as_function(node);
2039       return nir_impl_last_block(impl);
2040    }
2041 
2042    case nir_cf_node_if: {
2043       nir_if *if_stmt = nir_cf_node_as_if(node);
2044       return nir_if_last_else_block(if_stmt);
2045    }
2046 
2047    case nir_cf_node_loop: {
2048       nir_loop *loop = nir_cf_node_as_loop(node);
2049       return nir_loop_last_block(loop);
2050    }
2051 
2052    case nir_cf_node_block: {
2053       return nir_cf_node_as_block(node);
2054    }
2055 
2056    default:
2057       unreachable("unknown node type");
2058    }
2059 }
2060 
nir_cf_node_cf_tree_next(nir_cf_node * node)2061 nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node)
2062 {
2063    if (node->type == nir_cf_node_block)
2064       return nir_block_cf_tree_next(nir_cf_node_as_block(node));
2065    else if (node->type == nir_cf_node_function)
2066       return NULL;
2067    else
2068       return nir_cf_node_as_block(nir_cf_node_next(node));
2069 }
2070 
2071 nir_if *
nir_block_get_following_if(nir_block * block)2072 nir_block_get_following_if(nir_block *block)
2073 {
2074    if (exec_node_is_tail_sentinel(&block->cf_node.node))
2075       return NULL;
2076 
2077    if (nir_cf_node_is_last(&block->cf_node))
2078       return NULL;
2079 
2080    nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
2081 
2082    if (next_node->type != nir_cf_node_if)
2083       return NULL;
2084 
2085    return nir_cf_node_as_if(next_node);
2086 }
2087 
2088 nir_loop *
nir_block_get_following_loop(nir_block * block)2089 nir_block_get_following_loop(nir_block *block)
2090 {
2091    if (exec_node_is_tail_sentinel(&block->cf_node.node))
2092       return NULL;
2093 
2094    if (nir_cf_node_is_last(&block->cf_node))
2095       return NULL;
2096 
2097    nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
2098 
2099    if (next_node->type != nir_cf_node_loop)
2100       return NULL;
2101 
2102    return nir_cf_node_as_loop(next_node);
2103 }
2104 
2105 static int
compare_block_index(const void * p1,const void * p2)2106 compare_block_index(const void *p1, const void *p2)
2107 {
2108    const nir_block *block1 = *((const nir_block **) p1);
2109    const nir_block *block2 = *((const nir_block **) p2);
2110 
2111    return (int) block1->index - (int) block2->index;
2112 }
2113 
2114 nir_block **
nir_block_get_predecessors_sorted(const nir_block * block,void * mem_ctx)2115 nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx)
2116 {
2117    nir_block **preds =
2118       ralloc_array(mem_ctx, nir_block *, block->predecessors->entries);
2119 
2120    unsigned i = 0;
2121    set_foreach(block->predecessors, entry)
2122       preds[i++] = (nir_block *) entry->key;
2123    assert(i == block->predecessors->entries);
2124 
2125    qsort(preds, block->predecessors->entries, sizeof(nir_block *),
2126          compare_block_index);
2127 
2128    return preds;
2129 }
2130 
2131 void
nir_index_blocks(nir_function_impl * impl)2132 nir_index_blocks(nir_function_impl *impl)
2133 {
2134    unsigned index = 0;
2135 
2136    if (impl->valid_metadata & nir_metadata_block_index)
2137       return;
2138 
2139    nir_foreach_block_unstructured(block, impl) {
2140       block->index = index++;
2141    }
2142 
2143    /* The end_block isn't really part of the program, which is why its index
2144     * is >= num_blocks.
2145     */
2146    impl->num_blocks = impl->end_block->index = index;
2147 }
2148 
2149 static bool
index_ssa_def_cb(nir_ssa_def * def,void * state)2150 index_ssa_def_cb(nir_ssa_def *def, void *state)
2151 {
2152    unsigned *index = (unsigned *) state;
2153    def->index = (*index)++;
2154 
2155    return true;
2156 }
2157 
2158 /**
2159  * The indices are applied top-to-bottom which has the very nice property
2160  * that, if A dominates B, then A->index <= B->index.
2161  */
2162 void
nir_index_ssa_defs(nir_function_impl * impl)2163 nir_index_ssa_defs(nir_function_impl *impl)
2164 {
2165    unsigned index = 0;
2166 
2167    impl->valid_metadata &= ~nir_metadata_live_ssa_defs;
2168 
2169    nir_foreach_block_unstructured(block, impl) {
2170       nir_foreach_instr(instr, block)
2171          nir_foreach_ssa_def(instr, index_ssa_def_cb, &index);
2172    }
2173 
2174    impl->ssa_alloc = index;
2175 }
2176 
2177 /**
2178  * The indices are applied top-to-bottom which has the very nice property
2179  * that, if A dominates B, then A->index <= B->index.
2180  */
2181 unsigned
nir_index_instrs(nir_function_impl * impl)2182 nir_index_instrs(nir_function_impl *impl)
2183 {
2184    unsigned index = 0;
2185 
2186    nir_foreach_block(block, impl) {
2187       block->start_ip = index++;
2188 
2189       nir_foreach_instr(instr, block)
2190          instr->index = index++;
2191 
2192       block->end_ip = index++;
2193    }
2194 
2195    return index;
2196 }
2197 
2198 unsigned
nir_shader_index_vars(nir_shader * shader,nir_variable_mode modes)2199 nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes)
2200 {
2201    unsigned count = 0;
2202    nir_foreach_variable_with_modes(var, shader, modes)
2203       var->index = count++;
2204    return count;
2205 }
2206 
2207 unsigned
nir_function_impl_index_vars(nir_function_impl * impl)2208 nir_function_impl_index_vars(nir_function_impl *impl)
2209 {
2210    unsigned count = 0;
2211    nir_foreach_function_temp_variable(var, impl)
2212       var->index = count++;
2213    return count;
2214 }
2215 
2216 static nir_instr *
cursor_next_instr(nir_cursor cursor)2217 cursor_next_instr(nir_cursor cursor)
2218 {
2219    switch (cursor.option) {
2220    case nir_cursor_before_block:
2221       for (nir_block *block = cursor.block; block;
2222            block = nir_block_cf_tree_next(block)) {
2223          nir_instr *instr = nir_block_first_instr(block);
2224          if (instr)
2225             return instr;
2226       }
2227       return NULL;
2228 
2229    case nir_cursor_after_block:
2230       cursor.block = nir_block_cf_tree_next(cursor.block);
2231       if (cursor.block == NULL)
2232          return NULL;
2233 
2234       cursor.option = nir_cursor_before_block;
2235       return cursor_next_instr(cursor);
2236 
2237    case nir_cursor_before_instr:
2238       return cursor.instr;
2239 
2240    case nir_cursor_after_instr:
2241       if (nir_instr_next(cursor.instr))
2242          return nir_instr_next(cursor.instr);
2243 
2244       cursor.option = nir_cursor_after_block;
2245       cursor.block = cursor.instr->block;
2246       return cursor_next_instr(cursor);
2247    }
2248 
2249    unreachable("Inavlid cursor option");
2250 }
2251 
2252 ASSERTED static bool
dest_is_ssa(nir_dest * dest,void * _state)2253 dest_is_ssa(nir_dest *dest, void *_state)
2254 {
2255    (void) _state;
2256    return dest->is_ssa;
2257 }
2258 
2259 bool
nir_function_impl_lower_instructions(nir_function_impl * impl,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2260 nir_function_impl_lower_instructions(nir_function_impl *impl,
2261                                      nir_instr_filter_cb filter,
2262                                      nir_lower_instr_cb lower,
2263                                      void *cb_data)
2264 {
2265    nir_builder b;
2266    nir_builder_init(&b, impl);
2267 
2268    nir_metadata preserved = nir_metadata_block_index |
2269                             nir_metadata_dominance;
2270 
2271    bool progress = false;
2272    nir_cursor iter = nir_before_cf_list(&impl->body);
2273    nir_instr *instr;
2274    while ((instr = cursor_next_instr(iter)) != NULL) {
2275       if (filter && !filter(instr, cb_data)) {
2276          iter = nir_after_instr(instr);
2277          continue;
2278       }
2279 
2280       assert(nir_foreach_dest(instr, dest_is_ssa, NULL));
2281       nir_ssa_def *old_def = nir_instr_ssa_def(instr);
2282       struct list_head old_uses, old_if_uses;
2283       if (old_def != NULL) {
2284          /* We're about to ask the callback to generate a replacement for instr.
2285           * Save off the uses from instr's SSA def so we know what uses to
2286           * rewrite later.  If we use nir_ssa_def_rewrite_uses, it fails in the
2287           * case where the generated replacement code uses the result of instr
2288           * itself.  If we use nir_ssa_def_rewrite_uses_after (which is the
2289           * normal solution to this problem), it doesn't work well if control-
2290           * flow is inserted as part of the replacement, doesn't handle cases
2291           * where the replacement is something consumed by instr, and suffers
2292           * from performance issues.  This is the only way to 100% guarantee
2293           * that we rewrite the correct set efficiently.
2294           */
2295 
2296          list_replace(&old_def->uses, &old_uses);
2297          list_inithead(&old_def->uses);
2298          list_replace(&old_def->if_uses, &old_if_uses);
2299          list_inithead(&old_def->if_uses);
2300       }
2301 
2302       b.cursor = nir_after_instr(instr);
2303       nir_ssa_def *new_def = lower(&b, instr, cb_data);
2304       if (new_def && new_def != NIR_LOWER_INSTR_PROGRESS &&
2305           new_def != NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2306          assert(old_def != NULL);
2307          if (new_def->parent_instr->block != instr->block)
2308             preserved = nir_metadata_none;
2309 
2310          nir_src new_src = nir_src_for_ssa(new_def);
2311          list_for_each_entry_safe(nir_src, use_src, &old_uses, use_link)
2312             nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
2313 
2314          list_for_each_entry_safe(nir_src, use_src, &old_if_uses, use_link)
2315             nir_if_rewrite_condition(use_src->parent_if, new_src);
2316 
2317          if (nir_ssa_def_is_unused(old_def)) {
2318             iter = nir_instr_free_and_dce(instr);
2319          } else {
2320             iter = nir_after_instr(instr);
2321          }
2322          progress = true;
2323       } else {
2324          /* We didn't end up lowering after all.  Put the uses back */
2325          if (old_def) {
2326             list_replace(&old_uses, &old_def->uses);
2327             list_replace(&old_if_uses, &old_def->if_uses);
2328          }
2329          if (new_def == NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2330             /* Only instructions without a return value can be removed like this */
2331             assert(!old_def);
2332             iter = nir_instr_free_and_dce(instr);
2333             progress = true;
2334          } else
2335             iter = nir_after_instr(instr);
2336 
2337          if (new_def == NIR_LOWER_INSTR_PROGRESS)
2338             progress = true;
2339       }
2340    }
2341 
2342    if (progress) {
2343       nir_metadata_preserve(impl, preserved);
2344    } else {
2345       nir_metadata_preserve(impl, nir_metadata_all);
2346    }
2347 
2348    return progress;
2349 }
2350 
2351 bool
nir_shader_lower_instructions(nir_shader * shader,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2352 nir_shader_lower_instructions(nir_shader *shader,
2353                               nir_instr_filter_cb filter,
2354                               nir_lower_instr_cb lower,
2355                               void *cb_data)
2356 {
2357    bool progress = false;
2358 
2359    nir_foreach_function(function, shader) {
2360       if (function->impl &&
2361           nir_function_impl_lower_instructions(function->impl,
2362                                                filter, lower, cb_data))
2363          progress = true;
2364    }
2365 
2366    return progress;
2367 }
2368 
2369 /**
2370  * Returns true if the shader supports quad-based implicit derivatives on
2371  * texture sampling.
2372  */
nir_shader_supports_implicit_lod(nir_shader * shader)2373 bool nir_shader_supports_implicit_lod(nir_shader *shader)
2374 {
2375    return (shader->info.stage == MESA_SHADER_FRAGMENT ||
2376            (shader->info.stage == MESA_SHADER_COMPUTE &&
2377             shader->info.cs.derivative_group != DERIVATIVE_GROUP_NONE));
2378 }
2379 
2380 nir_intrinsic_op
nir_intrinsic_from_system_value(gl_system_value val)2381 nir_intrinsic_from_system_value(gl_system_value val)
2382 {
2383    switch (val) {
2384    case SYSTEM_VALUE_VERTEX_ID:
2385       return nir_intrinsic_load_vertex_id;
2386    case SYSTEM_VALUE_INSTANCE_ID:
2387       return nir_intrinsic_load_instance_id;
2388    case SYSTEM_VALUE_DRAW_ID:
2389       return nir_intrinsic_load_draw_id;
2390    case SYSTEM_VALUE_BASE_INSTANCE:
2391       return nir_intrinsic_load_base_instance;
2392    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
2393       return nir_intrinsic_load_vertex_id_zero_base;
2394    case SYSTEM_VALUE_IS_INDEXED_DRAW:
2395       return nir_intrinsic_load_is_indexed_draw;
2396    case SYSTEM_VALUE_FIRST_VERTEX:
2397       return nir_intrinsic_load_first_vertex;
2398    case SYSTEM_VALUE_BASE_VERTEX:
2399       return nir_intrinsic_load_base_vertex;
2400    case SYSTEM_VALUE_INVOCATION_ID:
2401       return nir_intrinsic_load_invocation_id;
2402    case SYSTEM_VALUE_FRAG_COORD:
2403       return nir_intrinsic_load_frag_coord;
2404    case SYSTEM_VALUE_POINT_COORD:
2405       return nir_intrinsic_load_point_coord;
2406    case SYSTEM_VALUE_LINE_COORD:
2407       return nir_intrinsic_load_line_coord;
2408    case SYSTEM_VALUE_FRONT_FACE:
2409       return nir_intrinsic_load_front_face;
2410    case SYSTEM_VALUE_SAMPLE_ID:
2411       return nir_intrinsic_load_sample_id;
2412    case SYSTEM_VALUE_SAMPLE_POS:
2413       return nir_intrinsic_load_sample_pos;
2414    case SYSTEM_VALUE_SAMPLE_POS_OR_CENTER:
2415       return nir_intrinsic_load_sample_pos_or_center;
2416    case SYSTEM_VALUE_SAMPLE_MASK_IN:
2417       return nir_intrinsic_load_sample_mask_in;
2418    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
2419       return nir_intrinsic_load_local_invocation_id;
2420    case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
2421       return nir_intrinsic_load_local_invocation_index;
2422    case SYSTEM_VALUE_WORKGROUP_ID:
2423       return nir_intrinsic_load_workgroup_id;
2424    case SYSTEM_VALUE_WORKGROUP_INDEX:
2425       return nir_intrinsic_load_workgroup_index;
2426    case SYSTEM_VALUE_NUM_WORKGROUPS:
2427       return nir_intrinsic_load_num_workgroups;
2428    case SYSTEM_VALUE_PRIMITIVE_ID:
2429       return nir_intrinsic_load_primitive_id;
2430    case SYSTEM_VALUE_TESS_COORD:
2431       return nir_intrinsic_load_tess_coord;
2432    case SYSTEM_VALUE_TESS_LEVEL_OUTER:
2433       return nir_intrinsic_load_tess_level_outer;
2434    case SYSTEM_VALUE_TESS_LEVEL_INNER:
2435       return nir_intrinsic_load_tess_level_inner;
2436    case SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT:
2437       return nir_intrinsic_load_tess_level_outer_default;
2438    case SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT:
2439       return nir_intrinsic_load_tess_level_inner_default;
2440    case SYSTEM_VALUE_VERTICES_IN:
2441       return nir_intrinsic_load_patch_vertices_in;
2442    case SYSTEM_VALUE_HELPER_INVOCATION:
2443       return nir_intrinsic_load_helper_invocation;
2444    case SYSTEM_VALUE_COLOR0:
2445       return nir_intrinsic_load_color0;
2446    case SYSTEM_VALUE_COLOR1:
2447       return nir_intrinsic_load_color1;
2448    case SYSTEM_VALUE_VIEW_INDEX:
2449       return nir_intrinsic_load_view_index;
2450    case SYSTEM_VALUE_SUBGROUP_SIZE:
2451       return nir_intrinsic_load_subgroup_size;
2452    case SYSTEM_VALUE_SUBGROUP_INVOCATION:
2453       return nir_intrinsic_load_subgroup_invocation;
2454    case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
2455       return nir_intrinsic_load_subgroup_eq_mask;
2456    case SYSTEM_VALUE_SUBGROUP_GE_MASK:
2457       return nir_intrinsic_load_subgroup_ge_mask;
2458    case SYSTEM_VALUE_SUBGROUP_GT_MASK:
2459       return nir_intrinsic_load_subgroup_gt_mask;
2460    case SYSTEM_VALUE_SUBGROUP_LE_MASK:
2461       return nir_intrinsic_load_subgroup_le_mask;
2462    case SYSTEM_VALUE_SUBGROUP_LT_MASK:
2463       return nir_intrinsic_load_subgroup_lt_mask;
2464    case SYSTEM_VALUE_NUM_SUBGROUPS:
2465       return nir_intrinsic_load_num_subgroups;
2466    case SYSTEM_VALUE_SUBGROUP_ID:
2467       return nir_intrinsic_load_subgroup_id;
2468    case SYSTEM_VALUE_WORKGROUP_SIZE:
2469       return nir_intrinsic_load_workgroup_size;
2470    case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
2471       return nir_intrinsic_load_global_invocation_id;
2472    case SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID:
2473       return nir_intrinsic_load_base_global_invocation_id;
2474    case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX:
2475       return nir_intrinsic_load_global_invocation_index;
2476    case SYSTEM_VALUE_WORK_DIM:
2477       return nir_intrinsic_load_work_dim;
2478    case SYSTEM_VALUE_USER_DATA_AMD:
2479       return nir_intrinsic_load_user_data_amd;
2480    case SYSTEM_VALUE_RAY_LAUNCH_ID:
2481       return nir_intrinsic_load_ray_launch_id;
2482    case SYSTEM_VALUE_RAY_LAUNCH_SIZE:
2483       return nir_intrinsic_load_ray_launch_size;
2484    case SYSTEM_VALUE_RAY_LAUNCH_SIZE_ADDR_AMD:
2485       return nir_intrinsic_load_ray_launch_size_addr_amd;
2486    case SYSTEM_VALUE_RAY_WORLD_ORIGIN:
2487       return nir_intrinsic_load_ray_world_origin;
2488    case SYSTEM_VALUE_RAY_WORLD_DIRECTION:
2489       return nir_intrinsic_load_ray_world_direction;
2490    case SYSTEM_VALUE_RAY_OBJECT_ORIGIN:
2491       return nir_intrinsic_load_ray_object_origin;
2492    case SYSTEM_VALUE_RAY_OBJECT_DIRECTION:
2493       return nir_intrinsic_load_ray_object_direction;
2494    case SYSTEM_VALUE_RAY_T_MIN:
2495       return nir_intrinsic_load_ray_t_min;
2496    case SYSTEM_VALUE_RAY_T_MAX:
2497       return nir_intrinsic_load_ray_t_max;
2498    case SYSTEM_VALUE_RAY_OBJECT_TO_WORLD:
2499       return nir_intrinsic_load_ray_object_to_world;
2500    case SYSTEM_VALUE_RAY_WORLD_TO_OBJECT:
2501       return nir_intrinsic_load_ray_world_to_object;
2502    case SYSTEM_VALUE_RAY_HIT_KIND:
2503       return nir_intrinsic_load_ray_hit_kind;
2504    case SYSTEM_VALUE_RAY_FLAGS:
2505       return nir_intrinsic_load_ray_flags;
2506    case SYSTEM_VALUE_RAY_GEOMETRY_INDEX:
2507       return nir_intrinsic_load_ray_geometry_index;
2508    case SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX:
2509       return nir_intrinsic_load_ray_instance_custom_index;
2510    case SYSTEM_VALUE_CULL_MASK:
2511       return nir_intrinsic_load_cull_mask;
2512    case SYSTEM_VALUE_MESH_VIEW_COUNT:
2513       return nir_intrinsic_load_mesh_view_count;
2514    case SYSTEM_VALUE_FRAG_SHADING_RATE:
2515       return nir_intrinsic_load_frag_shading_rate;
2516    default:
2517       unreachable("system value does not directly correspond to intrinsic");
2518    }
2519 }
2520 
2521 gl_system_value
nir_system_value_from_intrinsic(nir_intrinsic_op intrin)2522 nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
2523 {
2524    switch (intrin) {
2525    case nir_intrinsic_load_vertex_id:
2526       return SYSTEM_VALUE_VERTEX_ID;
2527    case nir_intrinsic_load_instance_id:
2528       return SYSTEM_VALUE_INSTANCE_ID;
2529    case nir_intrinsic_load_draw_id:
2530       return SYSTEM_VALUE_DRAW_ID;
2531    case nir_intrinsic_load_base_instance:
2532       return SYSTEM_VALUE_BASE_INSTANCE;
2533    case nir_intrinsic_load_vertex_id_zero_base:
2534       return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
2535    case nir_intrinsic_load_first_vertex:
2536       return SYSTEM_VALUE_FIRST_VERTEX;
2537    case nir_intrinsic_load_is_indexed_draw:
2538       return SYSTEM_VALUE_IS_INDEXED_DRAW;
2539    case nir_intrinsic_load_base_vertex:
2540       return SYSTEM_VALUE_BASE_VERTEX;
2541    case nir_intrinsic_load_invocation_id:
2542       return SYSTEM_VALUE_INVOCATION_ID;
2543    case nir_intrinsic_load_frag_coord:
2544       return SYSTEM_VALUE_FRAG_COORD;
2545    case nir_intrinsic_load_point_coord:
2546       return SYSTEM_VALUE_POINT_COORD;
2547    case nir_intrinsic_load_line_coord:
2548       return SYSTEM_VALUE_LINE_COORD;
2549    case nir_intrinsic_load_front_face:
2550       return SYSTEM_VALUE_FRONT_FACE;
2551    case nir_intrinsic_load_sample_id:
2552       return SYSTEM_VALUE_SAMPLE_ID;
2553    case nir_intrinsic_load_sample_pos:
2554       return SYSTEM_VALUE_SAMPLE_POS;
2555    case nir_intrinsic_load_sample_pos_or_center:
2556       return SYSTEM_VALUE_SAMPLE_POS_OR_CENTER;
2557    case nir_intrinsic_load_sample_mask_in:
2558       return SYSTEM_VALUE_SAMPLE_MASK_IN;
2559    case nir_intrinsic_load_local_invocation_id:
2560       return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
2561    case nir_intrinsic_load_local_invocation_index:
2562       return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
2563    case nir_intrinsic_load_num_workgroups:
2564       return SYSTEM_VALUE_NUM_WORKGROUPS;
2565    case nir_intrinsic_load_workgroup_id:
2566       return SYSTEM_VALUE_WORKGROUP_ID;
2567    case nir_intrinsic_load_workgroup_index:
2568       return SYSTEM_VALUE_WORKGROUP_INDEX;
2569    case nir_intrinsic_load_primitive_id:
2570       return SYSTEM_VALUE_PRIMITIVE_ID;
2571    case nir_intrinsic_load_tess_coord:
2572       return SYSTEM_VALUE_TESS_COORD;
2573    case nir_intrinsic_load_tess_level_outer:
2574       return SYSTEM_VALUE_TESS_LEVEL_OUTER;
2575    case nir_intrinsic_load_tess_level_inner:
2576       return SYSTEM_VALUE_TESS_LEVEL_INNER;
2577    case nir_intrinsic_load_tess_level_outer_default:
2578       return SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT;
2579    case nir_intrinsic_load_tess_level_inner_default:
2580       return SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT;
2581    case nir_intrinsic_load_patch_vertices_in:
2582       return SYSTEM_VALUE_VERTICES_IN;
2583    case nir_intrinsic_load_helper_invocation:
2584       return SYSTEM_VALUE_HELPER_INVOCATION;
2585    case nir_intrinsic_load_color0:
2586       return SYSTEM_VALUE_COLOR0;
2587    case nir_intrinsic_load_color1:
2588       return SYSTEM_VALUE_COLOR1;
2589    case nir_intrinsic_load_view_index:
2590       return SYSTEM_VALUE_VIEW_INDEX;
2591    case nir_intrinsic_load_subgroup_size:
2592       return SYSTEM_VALUE_SUBGROUP_SIZE;
2593    case nir_intrinsic_load_subgroup_invocation:
2594       return SYSTEM_VALUE_SUBGROUP_INVOCATION;
2595    case nir_intrinsic_load_subgroup_eq_mask:
2596       return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
2597    case nir_intrinsic_load_subgroup_ge_mask:
2598       return SYSTEM_VALUE_SUBGROUP_GE_MASK;
2599    case nir_intrinsic_load_subgroup_gt_mask:
2600       return SYSTEM_VALUE_SUBGROUP_GT_MASK;
2601    case nir_intrinsic_load_subgroup_le_mask:
2602       return SYSTEM_VALUE_SUBGROUP_LE_MASK;
2603    case nir_intrinsic_load_subgroup_lt_mask:
2604       return SYSTEM_VALUE_SUBGROUP_LT_MASK;
2605    case nir_intrinsic_load_num_subgroups:
2606       return SYSTEM_VALUE_NUM_SUBGROUPS;
2607    case nir_intrinsic_load_subgroup_id:
2608       return SYSTEM_VALUE_SUBGROUP_ID;
2609    case nir_intrinsic_load_workgroup_size:
2610       return SYSTEM_VALUE_WORKGROUP_SIZE;
2611    case nir_intrinsic_load_global_invocation_id:
2612       return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
2613    case nir_intrinsic_load_base_global_invocation_id:
2614       return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID;
2615    case nir_intrinsic_load_global_invocation_index:
2616       return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX;
2617    case nir_intrinsic_load_work_dim:
2618       return SYSTEM_VALUE_WORK_DIM;
2619    case nir_intrinsic_load_user_data_amd:
2620       return SYSTEM_VALUE_USER_DATA_AMD;
2621    case nir_intrinsic_load_barycentric_model:
2622       return SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL;
2623    case nir_intrinsic_load_gs_header_ir3:
2624       return SYSTEM_VALUE_GS_HEADER_IR3;
2625    case nir_intrinsic_load_tcs_header_ir3:
2626       return SYSTEM_VALUE_TCS_HEADER_IR3;
2627    case nir_intrinsic_load_ray_launch_id:
2628       return SYSTEM_VALUE_RAY_LAUNCH_ID;
2629    case nir_intrinsic_load_ray_launch_size:
2630       return SYSTEM_VALUE_RAY_LAUNCH_SIZE;
2631    case nir_intrinsic_load_ray_launch_size_addr_amd:
2632       return SYSTEM_VALUE_RAY_LAUNCH_SIZE_ADDR_AMD;
2633    case nir_intrinsic_load_ray_world_origin:
2634       return SYSTEM_VALUE_RAY_WORLD_ORIGIN;
2635    case nir_intrinsic_load_ray_world_direction:
2636       return SYSTEM_VALUE_RAY_WORLD_DIRECTION;
2637    case nir_intrinsic_load_ray_object_origin:
2638       return SYSTEM_VALUE_RAY_OBJECT_ORIGIN;
2639    case nir_intrinsic_load_ray_object_direction:
2640       return SYSTEM_VALUE_RAY_OBJECT_DIRECTION;
2641    case nir_intrinsic_load_ray_t_min:
2642       return SYSTEM_VALUE_RAY_T_MIN;
2643    case nir_intrinsic_load_ray_t_max:
2644       return SYSTEM_VALUE_RAY_T_MAX;
2645    case nir_intrinsic_load_ray_object_to_world:
2646       return SYSTEM_VALUE_RAY_OBJECT_TO_WORLD;
2647    case nir_intrinsic_load_ray_world_to_object:
2648       return SYSTEM_VALUE_RAY_WORLD_TO_OBJECT;
2649    case nir_intrinsic_load_ray_hit_kind:
2650       return SYSTEM_VALUE_RAY_HIT_KIND;
2651    case nir_intrinsic_load_ray_flags:
2652       return SYSTEM_VALUE_RAY_FLAGS;
2653    case nir_intrinsic_load_ray_geometry_index:
2654       return SYSTEM_VALUE_RAY_GEOMETRY_INDEX;
2655    case nir_intrinsic_load_ray_instance_custom_index:
2656       return SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX;
2657    case nir_intrinsic_load_cull_mask:
2658       return SYSTEM_VALUE_CULL_MASK;
2659    case nir_intrinsic_load_frag_shading_rate:
2660       return SYSTEM_VALUE_FRAG_SHADING_RATE;
2661    case nir_intrinsic_load_mesh_view_count:
2662       return SYSTEM_VALUE_MESH_VIEW_COUNT;
2663    default:
2664       unreachable("intrinsic doesn't produce a system value");
2665    }
2666 }
2667 
2668 /* OpenGL utility method that remaps the location attributes if they are
2669  * doubles. Not needed for vulkan due the differences on the input location
2670  * count for doubles on vulkan vs OpenGL
2671  *
2672  * The bitfield returned in dual_slot is one bit for each double input slot in
2673  * the original OpenGL single-slot input numbering.  The mapping from old
2674  * locations to new locations is as follows:
2675  *
2676  *    new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc))
2677  */
2678 void
nir_remap_dual_slot_attributes(nir_shader * shader,uint64_t * dual_slot)2679 nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot)
2680 {
2681    assert(shader->info.stage == MESA_SHADER_VERTEX);
2682 
2683    *dual_slot = 0;
2684    nir_foreach_shader_in_variable(var, shader) {
2685       if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
2686          unsigned slots = glsl_count_attribute_slots(var->type, true);
2687          *dual_slot |= BITFIELD64_MASK(slots) << var->data.location;
2688       }
2689    }
2690 
2691    nir_foreach_shader_in_variable(var, shader) {
2692       var->data.location +=
2693          util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location));
2694    }
2695 }
2696 
2697 /* Returns an attribute mask that has been re-compacted using the given
2698  * dual_slot mask.
2699  */
2700 uint64_t
nir_get_single_slot_attribs_mask(uint64_t attribs,uint64_t dual_slot)2701 nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot)
2702 {
2703    while (dual_slot) {
2704       unsigned loc = u_bit_scan64(&dual_slot);
2705       /* mask of all bits up to and including loc */
2706       uint64_t mask = BITFIELD64_MASK(loc + 1);
2707       attribs = (attribs & mask) | ((attribs & ~mask) >> 1);
2708    }
2709    return attribs;
2710 }
2711 
2712 void
nir_rewrite_image_intrinsic(nir_intrinsic_instr * intrin,nir_ssa_def * src,bool bindless)2713 nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *src,
2714                             bool bindless)
2715 {
2716    enum gl_access_qualifier access = nir_intrinsic_access(intrin);
2717 
2718    /* Image intrinsics only have one of these */
2719    assert(!nir_intrinsic_has_src_type(intrin) ||
2720           !nir_intrinsic_has_dest_type(intrin));
2721 
2722    nir_alu_type data_type = nir_type_invalid;
2723    if (nir_intrinsic_has_src_type(intrin))
2724       data_type = nir_intrinsic_src_type(intrin);
2725    if (nir_intrinsic_has_dest_type(intrin))
2726       data_type = nir_intrinsic_dest_type(intrin);
2727 
2728    switch (intrin->intrinsic) {
2729 #define CASE(op) \
2730    case nir_intrinsic_image_deref_##op: \
2731       intrin->intrinsic = bindless ? nir_intrinsic_bindless_image_##op \
2732                                    : nir_intrinsic_image_##op; \
2733       break;
2734    CASE(load)
2735    CASE(sparse_load)
2736    CASE(store)
2737    CASE(atomic_add)
2738    CASE(atomic_imin)
2739    CASE(atomic_umin)
2740    CASE(atomic_imax)
2741    CASE(atomic_umax)
2742    CASE(atomic_and)
2743    CASE(atomic_or)
2744    CASE(atomic_xor)
2745    CASE(atomic_exchange)
2746    CASE(atomic_comp_swap)
2747    CASE(atomic_fadd)
2748    CASE(atomic_fmin)
2749    CASE(atomic_fmax)
2750    CASE(atomic_inc_wrap)
2751    CASE(atomic_dec_wrap)
2752    CASE(size)
2753    CASE(samples)
2754    CASE(load_raw_intel)
2755    CASE(store_raw_intel)
2756 #undef CASE
2757    default:
2758       unreachable("Unhanded image intrinsic");
2759    }
2760 
2761    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2762    nir_variable *var = nir_deref_instr_get_variable(deref);
2763 
2764    /* Only update the format if the intrinsic doesn't have one set */
2765    if (nir_intrinsic_format(intrin) == PIPE_FORMAT_NONE)
2766       nir_intrinsic_set_format(intrin, var->data.image.format);
2767 
2768    nir_intrinsic_set_access(intrin, access | var->data.access);
2769    if (nir_intrinsic_has_src_type(intrin))
2770       nir_intrinsic_set_src_type(intrin, data_type);
2771    if (nir_intrinsic_has_dest_type(intrin))
2772       nir_intrinsic_set_dest_type(intrin, data_type);
2773 
2774    nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
2775                          nir_src_for_ssa(src));
2776 }
2777 
2778 unsigned
nir_image_intrinsic_coord_components(const nir_intrinsic_instr * instr)2779 nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr)
2780 {
2781    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
2782    int coords = glsl_get_sampler_dim_coordinate_components(dim);
2783    if (dim == GLSL_SAMPLER_DIM_CUBE)
2784       return coords;
2785    else
2786       return coords + nir_intrinsic_image_array(instr);
2787 }
2788 
2789 nir_src *
nir_get_shader_call_payload_src(nir_intrinsic_instr * call)2790 nir_get_shader_call_payload_src(nir_intrinsic_instr *call)
2791 {
2792    switch (call->intrinsic) {
2793    case nir_intrinsic_trace_ray:
2794    case nir_intrinsic_rt_trace_ray:
2795       return &call->src[10];
2796    case nir_intrinsic_execute_callable:
2797    case nir_intrinsic_rt_execute_callable:
2798       return &call->src[1];
2799    default:
2800       unreachable("Not a call intrinsic");
2801       return NULL;
2802    }
2803 }
2804 
nir_chase_binding(nir_src rsrc)2805 nir_binding nir_chase_binding(nir_src rsrc)
2806 {
2807    nir_binding res = {0};
2808    if (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2809       const struct glsl_type *type = glsl_without_array(nir_src_as_deref(rsrc)->type);
2810       bool is_image = glsl_type_is_image(type) || glsl_type_is_sampler(type);
2811       while (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2812          nir_deref_instr *deref = nir_src_as_deref(rsrc);
2813 
2814          if (deref->deref_type == nir_deref_type_var) {
2815             res.success = true;
2816             res.var = deref->var;
2817             res.desc_set = deref->var->data.descriptor_set;
2818             res.binding = deref->var->data.binding;
2819             return res;
2820          } else if (deref->deref_type == nir_deref_type_array && is_image) {
2821             if (res.num_indices == ARRAY_SIZE(res.indices))
2822                return (nir_binding){0};
2823             res.indices[res.num_indices++] = deref->arr.index;
2824          }
2825 
2826          rsrc = deref->parent;
2827       }
2828    }
2829 
2830    /* Skip copies and trimming. Trimming can appear as nir_op_mov instructions
2831     * when removing the offset from addresses. We also consider nir_op_is_vec()
2832     * instructions to skip trimming of vec2_index_32bit_offset addresses after
2833     * lowering ALU to scalar.
2834     */
2835    while (true) {
2836       nir_alu_instr *alu = nir_src_as_alu_instr(rsrc);
2837       nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2838       if (alu && alu->op == nir_op_mov) {
2839          for (unsigned i = 0; i < alu->dest.dest.ssa.num_components; i++) {
2840             if (alu->src[0].swizzle[i] != i)
2841                return (nir_binding){0};
2842          }
2843          rsrc = alu->src[0].src;
2844       } else if (alu && nir_op_is_vec(alu->op)) {
2845          for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
2846             if (alu->src[i].swizzle[0] != i || alu->src[i].src.ssa != alu->src[0].src.ssa)
2847                return (nir_binding){0};
2848          }
2849          rsrc = alu->src[0].src;
2850       } else if (intrin && intrin->intrinsic == nir_intrinsic_read_first_invocation) {
2851          /* The caller might want to be aware if only the first invocation of
2852           * the indices are used.
2853           */
2854          res.read_first_invocation = true;
2855          rsrc = intrin->src[0];
2856       } else {
2857          break;
2858       }
2859    }
2860 
2861    if (nir_src_is_const(rsrc)) {
2862       /* GL binding model after deref lowering */
2863       res.success = true;
2864       /* Can't use just nir_src_as_uint. Vulkan resource index produces a
2865        * vec2. Some drivers lower it to vec1 (to handle get_ssbo_size for
2866        * example) but others just keep it around as a vec2 (v3dv).
2867        */
2868       res.binding = nir_src_comp_as_uint(rsrc, 0);
2869       return res;
2870    }
2871 
2872    /* otherwise, must be Vulkan binding model after deref lowering or GL bindless */
2873 
2874    nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2875    if (!intrin)
2876       return (nir_binding){0};
2877 
2878    /* skip load_vulkan_descriptor */
2879    if (intrin->intrinsic == nir_intrinsic_load_vulkan_descriptor) {
2880       intrin = nir_src_as_intrinsic(intrin->src[0]);
2881       if (!intrin)
2882          return (nir_binding){0};
2883    }
2884 
2885    if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
2886       return (nir_binding){0};
2887 
2888    assert(res.num_indices == 0);
2889    res.success = true;
2890    res.desc_set = nir_intrinsic_desc_set(intrin);
2891    res.binding = nir_intrinsic_binding(intrin);
2892    res.num_indices = 1;
2893    res.indices[0] = intrin->src[0];
2894    return res;
2895 }
2896 
nir_get_binding_variable(nir_shader * shader,nir_binding binding)2897 nir_variable *nir_get_binding_variable(nir_shader *shader, nir_binding binding)
2898 {
2899    nir_variable *binding_var = NULL;
2900    unsigned count = 0;
2901 
2902    if (!binding.success)
2903       return NULL;
2904 
2905    if (binding.var)
2906       return binding.var;
2907 
2908    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ubo | nir_var_mem_ssbo) {
2909       if (var->data.descriptor_set == binding.desc_set && var->data.binding == binding.binding) {
2910          binding_var = var;
2911          count++;
2912       }
2913    }
2914 
2915    /* Be conservative if another variable is using the same binding/desc_set
2916     * because the access mask might be different and we can't get it reliably.
2917     */
2918    if (count > 1)
2919       return NULL;
2920 
2921    return binding_var;
2922 }
2923 
2924 bool
nir_alu_instr_is_copy(nir_alu_instr * instr)2925 nir_alu_instr_is_copy(nir_alu_instr *instr)
2926 {
2927    assert(instr->src[0].src.is_ssa);
2928 
2929    if (instr->op == nir_op_mov) {
2930       return !instr->dest.saturate &&
2931              !instr->src[0].abs &&
2932              !instr->src[0].negate;
2933    } else if (nir_op_is_vec(instr->op)) {
2934       for (unsigned i = 0; i < instr->dest.dest.ssa.num_components; i++) {
2935          if (instr->src[i].abs || instr->src[i].negate)
2936             return false;
2937       }
2938       return !instr->dest.saturate;
2939    } else {
2940       return false;
2941    }
2942 }
2943 
2944 nir_ssa_scalar
nir_ssa_scalar_chase_movs(nir_ssa_scalar s)2945 nir_ssa_scalar_chase_movs(nir_ssa_scalar s)
2946 {
2947    while (nir_ssa_scalar_is_alu(s)) {
2948       nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
2949       if (!nir_alu_instr_is_copy(alu))
2950          break;
2951 
2952       if (alu->op == nir_op_mov) {
2953          s.def = alu->src[0].src.ssa;
2954          s.comp = alu->src[0].swizzle[s.comp];
2955       } else {
2956          assert(nir_op_is_vec(alu->op));
2957          s.def = alu->src[s.comp].src.ssa;
2958          s.comp = alu->src[s.comp].swizzle[0];
2959       }
2960    }
2961 
2962    return s;
2963 }
2964 
2965 nir_alu_type
nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)2966 nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)
2967 {
2968    switch (base_type) {
2969    case GLSL_TYPE_BOOL:
2970       return nir_type_bool1;
2971       break;
2972    case GLSL_TYPE_UINT:
2973       return nir_type_uint32;
2974       break;
2975    case GLSL_TYPE_INT:
2976       return nir_type_int32;
2977       break;
2978    case GLSL_TYPE_UINT16:
2979       return nir_type_uint16;
2980       break;
2981    case GLSL_TYPE_INT16:
2982       return nir_type_int16;
2983       break;
2984    case GLSL_TYPE_UINT8:
2985       return nir_type_uint8;
2986    case GLSL_TYPE_INT8:
2987       return nir_type_int8;
2988    case GLSL_TYPE_UINT64:
2989       return nir_type_uint64;
2990       break;
2991    case GLSL_TYPE_INT64:
2992       return nir_type_int64;
2993       break;
2994    case GLSL_TYPE_FLOAT:
2995       return nir_type_float32;
2996       break;
2997    case GLSL_TYPE_FLOAT16:
2998       return nir_type_float16;
2999       break;
3000    case GLSL_TYPE_DOUBLE:
3001       return nir_type_float64;
3002       break;
3003 
3004    case GLSL_TYPE_SAMPLER:
3005    case GLSL_TYPE_TEXTURE:
3006    case GLSL_TYPE_IMAGE:
3007    case GLSL_TYPE_ATOMIC_UINT:
3008    case GLSL_TYPE_STRUCT:
3009    case GLSL_TYPE_INTERFACE:
3010    case GLSL_TYPE_ARRAY:
3011    case GLSL_TYPE_VOID:
3012    case GLSL_TYPE_SUBROUTINE:
3013    case GLSL_TYPE_FUNCTION:
3014    case GLSL_TYPE_ERROR:
3015       return nir_type_invalid;
3016    }
3017 
3018    unreachable("unknown type");
3019 }
3020 
3021 enum glsl_base_type
nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)3022 nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)
3023 {
3024    switch (base_type) {
3025    case nir_type_bool1:
3026       return GLSL_TYPE_BOOL;
3027    case nir_type_uint32:
3028       return GLSL_TYPE_UINT;
3029    case nir_type_int32:
3030       return GLSL_TYPE_INT;
3031    case nir_type_uint16:
3032       return GLSL_TYPE_UINT16;
3033    case nir_type_int16:
3034       return GLSL_TYPE_INT16;
3035    case nir_type_uint8:
3036       return GLSL_TYPE_UINT8;
3037    case nir_type_int8:
3038       return GLSL_TYPE_INT8;
3039    case nir_type_uint64:
3040       return GLSL_TYPE_UINT64;
3041    case nir_type_int64:
3042       return GLSL_TYPE_INT64;
3043    case nir_type_float32:
3044       return GLSL_TYPE_FLOAT;
3045    case nir_type_float16:
3046       return GLSL_TYPE_FLOAT16;
3047    case nir_type_float64:
3048       return GLSL_TYPE_DOUBLE;
3049 
3050    default: unreachable("Not a sized nir_alu_type");
3051    }
3052 }
3053 
3054 nir_op
nir_op_vec(unsigned components)3055 nir_op_vec(unsigned components)
3056 {
3057    switch (components) {
3058    case  1: return nir_op_mov;
3059    case  2: return nir_op_vec2;
3060    case  3: return nir_op_vec3;
3061    case  4: return nir_op_vec4;
3062    case  5: return nir_op_vec5;
3063    case  8: return nir_op_vec8;
3064    case 16: return nir_op_vec16;
3065    default: unreachable("bad component count");
3066    }
3067 }
3068 
3069 bool
nir_op_is_vec(nir_op op)3070 nir_op_is_vec(nir_op op)
3071 {
3072    switch (op) {
3073    case nir_op_mov:
3074    case nir_op_vec2:
3075    case nir_op_vec3:
3076    case nir_op_vec4:
3077    case nir_op_vec5:
3078    case nir_op_vec8:
3079    case nir_op_vec16:
3080       return true;
3081    default:
3082       return false;
3083    }
3084 }
3085 
3086 bool
nir_alu_instr_channel_used(const nir_alu_instr * instr,unsigned src,unsigned channel)3087 nir_alu_instr_channel_used(const nir_alu_instr *instr, unsigned src,
3088                            unsigned channel)
3089 {
3090    if (nir_op_infos[instr->op].input_sizes[src] > 0)
3091       return channel < nir_op_infos[instr->op].input_sizes[src];
3092 
3093    return (instr->dest.write_mask >> channel) & 1;
3094 }
3095 
3096 nir_component_mask_t
nir_alu_instr_src_read_mask(const nir_alu_instr * instr,unsigned src)3097 nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src)
3098 {
3099    nir_component_mask_t read_mask = 0;
3100    for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
3101       if (!nir_alu_instr_channel_used(instr, src, c))
3102          continue;
3103 
3104       read_mask |= (1 << instr->src[src].swizzle[c]);
3105    }
3106    return read_mask;
3107 }
3108 
3109 unsigned
nir_ssa_alu_instr_src_components(const nir_alu_instr * instr,unsigned src)3110 nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
3111 {
3112    if (nir_op_infos[instr->op].input_sizes[src] > 0)
3113       return nir_op_infos[instr->op].input_sizes[src];
3114 
3115    return nir_dest_num_components(instr->dest.dest);
3116 }
3117 
3118 #define CASE_ALL_SIZES(op) \
3119    case op: \
3120    case op ## 8: \
3121    case op ## 16: \
3122    case op ## 32: \
3123 
3124 bool
nir_alu_instr_is_comparison(const nir_alu_instr * instr)3125 nir_alu_instr_is_comparison(const nir_alu_instr *instr)
3126 {
3127    switch (instr->op) {
3128    CASE_ALL_SIZES(nir_op_flt)
3129    CASE_ALL_SIZES(nir_op_fge)
3130    CASE_ALL_SIZES(nir_op_feq)
3131    CASE_ALL_SIZES(nir_op_fneu)
3132    CASE_ALL_SIZES(nir_op_ilt)
3133    CASE_ALL_SIZES(nir_op_ult)
3134    CASE_ALL_SIZES(nir_op_ige)
3135    CASE_ALL_SIZES(nir_op_uge)
3136    CASE_ALL_SIZES(nir_op_ieq)
3137    CASE_ALL_SIZES(nir_op_ine)
3138    case nir_op_i2b1:
3139    case nir_op_i2b8:
3140    case nir_op_i2b16:
3141    case nir_op_i2b32:
3142    case nir_op_f2b1:
3143    case nir_op_f2b8:
3144    case nir_op_f2b16:
3145    case nir_op_f2b32:
3146    case nir_op_inot:
3147       return true;
3148    default:
3149       return false;
3150    }
3151 }
3152 
3153 #undef CASE_ALL_SIZES
3154 
3155 unsigned
nir_intrinsic_src_components(const nir_intrinsic_instr * intr,unsigned srcn)3156 nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn)
3157 {
3158    const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
3159    assert(srcn < info->num_srcs);
3160    if (info->src_components[srcn] > 0)
3161       return info->src_components[srcn];
3162    else if (info->src_components[srcn] == 0)
3163       return intr->num_components;
3164    else
3165       return nir_src_num_components(intr->src[srcn]);
3166 }
3167 
3168 unsigned
nir_intrinsic_dest_components(nir_intrinsic_instr * intr)3169 nir_intrinsic_dest_components(nir_intrinsic_instr *intr)
3170 {
3171    const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
3172    if (!info->has_dest)
3173       return 0;
3174    else if (info->dest_components)
3175       return info->dest_components;
3176    else
3177       return intr->num_components;
3178 }
3179 
3180 /**
3181  * Helper to copy const_index[] from src to dst, without assuming they
3182  * match in order.
3183  */
3184 void
nir_intrinsic_copy_const_indices(nir_intrinsic_instr * dst,nir_intrinsic_instr * src)3185 nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src)
3186 {
3187    if (src->intrinsic == dst->intrinsic) {
3188       memcpy(dst->const_index, src->const_index, sizeof(dst->const_index));
3189       return;
3190    }
3191 
3192    const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic];
3193    const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic];
3194 
3195    for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) {
3196       if (src_info->index_map[i] == 0)
3197          continue;
3198 
3199       /* require that dst instruction also uses the same const_index[]: */
3200       assert(dst_info->index_map[i] > 0);
3201 
3202       dst->const_index[dst_info->index_map[i] - 1] =
3203             src->const_index[src_info->index_map[i] - 1];
3204    }
3205 }
3206 
3207 
3208 bool
nir_tex_instr_need_sampler(const nir_tex_instr * instr)3209 nir_tex_instr_need_sampler(const nir_tex_instr *instr)
3210 {
3211    switch (instr->op) {
3212    case nir_texop_txf:
3213    case nir_texop_txf_ms:
3214    case nir_texop_txs:
3215    case nir_texop_query_levels:
3216    case nir_texop_texture_samples:
3217    case nir_texop_samples_identical:
3218       return false;
3219    default:
3220       return true;
3221    }
3222 }
3223 
3224 unsigned
nir_tex_instr_result_size(const nir_tex_instr * instr)3225 nir_tex_instr_result_size(const nir_tex_instr *instr)
3226 {
3227    switch (instr->op) {
3228    case nir_texop_txs: {
3229       unsigned ret;
3230       switch (instr->sampler_dim) {
3231          case GLSL_SAMPLER_DIM_1D:
3232          case GLSL_SAMPLER_DIM_BUF:
3233             ret = 1;
3234             break;
3235          case GLSL_SAMPLER_DIM_2D:
3236          case GLSL_SAMPLER_DIM_CUBE:
3237          case GLSL_SAMPLER_DIM_MS:
3238          case GLSL_SAMPLER_DIM_RECT:
3239          case GLSL_SAMPLER_DIM_EXTERNAL:
3240          case GLSL_SAMPLER_DIM_SUBPASS:
3241             ret = 2;
3242             break;
3243          case GLSL_SAMPLER_DIM_3D:
3244             ret = 3;
3245             break;
3246          default:
3247             unreachable("not reached");
3248       }
3249       if (instr->is_array)
3250          ret++;
3251       return ret;
3252    }
3253 
3254    case nir_texop_lod:
3255       return 2;
3256 
3257    case nir_texop_texture_samples:
3258    case nir_texop_query_levels:
3259    case nir_texop_samples_identical:
3260    case nir_texop_fragment_mask_fetch_amd:
3261       return 1;
3262 
3263    default:
3264       if (instr->is_shadow && instr->is_new_style_shadow)
3265          return 1;
3266 
3267       return 4;
3268    }
3269 }
3270 
3271 bool
nir_tex_instr_is_query(const nir_tex_instr * instr)3272 nir_tex_instr_is_query(const nir_tex_instr *instr)
3273 {
3274    switch (instr->op) {
3275    case nir_texop_txs:
3276    case nir_texop_lod:
3277    case nir_texop_texture_samples:
3278    case nir_texop_query_levels:
3279       return true;
3280    case nir_texop_tex:
3281    case nir_texop_txb:
3282    case nir_texop_txl:
3283    case nir_texop_txd:
3284    case nir_texop_txf:
3285    case nir_texop_txf_ms:
3286    case nir_texop_txf_ms_fb:
3287    case nir_texop_txf_ms_mcs_intel:
3288    case nir_texop_tg4:
3289       return false;
3290    default:
3291       unreachable("Invalid texture opcode");
3292    }
3293 }
3294 
3295 bool
nir_tex_instr_has_implicit_derivative(const nir_tex_instr * instr)3296 nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr)
3297 {
3298    switch (instr->op) {
3299    case nir_texop_tex:
3300    case nir_texop_txb:
3301    case nir_texop_lod:
3302       return true;
3303    default:
3304       return false;
3305    }
3306 }
3307 
3308 nir_alu_type
nir_tex_instr_src_type(const nir_tex_instr * instr,unsigned src)3309 nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
3310 {
3311    switch (instr->src[src].src_type) {
3312    case nir_tex_src_coord:
3313       switch (instr->op) {
3314       case nir_texop_txf:
3315       case nir_texop_txf_ms:
3316       case nir_texop_txf_ms_fb:
3317       case nir_texop_txf_ms_mcs_intel:
3318       case nir_texop_samples_identical:
3319       case nir_texop_fragment_fetch_amd:
3320       case nir_texop_fragment_mask_fetch_amd:
3321          return nir_type_int;
3322 
3323       default:
3324          return nir_type_float;
3325       }
3326 
3327    case nir_tex_src_lod:
3328       switch (instr->op) {
3329       case nir_texop_txs:
3330       case nir_texop_txf:
3331       case nir_texop_txf_ms:
3332       case nir_texop_fragment_fetch_amd:
3333       case nir_texop_fragment_mask_fetch_amd:
3334          return nir_type_int;
3335 
3336       default:
3337          return nir_type_float;
3338       }
3339 
3340    case nir_tex_src_projector:
3341    case nir_tex_src_comparator:
3342    case nir_tex_src_bias:
3343    case nir_tex_src_min_lod:
3344    case nir_tex_src_ddx:
3345    case nir_tex_src_ddy:
3346    case nir_tex_src_backend1:
3347    case nir_tex_src_backend2:
3348       return nir_type_float;
3349 
3350    case nir_tex_src_offset:
3351    case nir_tex_src_ms_index:
3352    case nir_tex_src_plane:
3353       return nir_type_int;
3354 
3355    case nir_tex_src_ms_mcs_intel:
3356    case nir_tex_src_texture_deref:
3357    case nir_tex_src_sampler_deref:
3358    case nir_tex_src_texture_offset:
3359    case nir_tex_src_sampler_offset:
3360    case nir_tex_src_texture_handle:
3361    case nir_tex_src_sampler_handle:
3362       return nir_type_uint;
3363 
3364    case nir_num_tex_src_types:
3365       unreachable("nir_num_tex_src_types is not a valid source type");
3366    }
3367 
3368    unreachable("Invalid texture source type");
3369 }
3370 
3371 unsigned
nir_tex_instr_src_size(const nir_tex_instr * instr,unsigned src)3372 nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src)
3373 {
3374    if (instr->src[src].src_type == nir_tex_src_coord)
3375       return instr->coord_components;
3376 
3377    /* The MCS value is expected to be a vec4 returned by a txf_ms_mcs_intel */
3378    if (instr->src[src].src_type == nir_tex_src_ms_mcs_intel)
3379       return 4;
3380 
3381    if (instr->src[src].src_type == nir_tex_src_ddx ||
3382        instr->src[src].src_type == nir_tex_src_ddy) {
3383 
3384       if (instr->is_array && !instr->array_is_lowered_cube)
3385          return instr->coord_components - 1;
3386       else
3387          return instr->coord_components;
3388    }
3389 
3390    if (instr->src[src].src_type == nir_tex_src_offset) {
3391       if (instr->is_array)
3392          return instr->coord_components - 1;
3393       else
3394          return instr->coord_components;
3395    }
3396 
3397    if (instr->src[src].src_type == nir_tex_src_backend1 ||
3398        instr->src[src].src_type == nir_tex_src_backend2)
3399       return nir_src_num_components(instr->src[src].src);
3400 
3401    /* For AMD, this can be a vec8/vec4 image/sampler descriptor. */
3402    if (instr->src[src].src_type == nir_tex_src_texture_handle ||
3403        instr->src[src].src_type == nir_tex_src_sampler_handle)
3404       return 0;
3405 
3406    return 1;
3407 }
3408 
3409 /**
3410  * Return which components are written into transform feedback buffers.
3411  * The result is relative to 0, not "component".
3412  */
3413 unsigned
nir_instr_xfb_write_mask(nir_intrinsic_instr * instr)3414 nir_instr_xfb_write_mask(nir_intrinsic_instr *instr)
3415 {
3416    unsigned mask = 0;
3417 
3418    if (nir_intrinsic_has_io_xfb(instr)) {
3419       unsigned wr_mask = nir_intrinsic_write_mask(instr) <<
3420                          nir_intrinsic_component(instr);
3421       assert((wr_mask & ~0xf) == 0); /* only 4 components allowed */
3422 
3423       unsigned iter_mask = wr_mask;
3424       while (iter_mask) {
3425          unsigned i = u_bit_scan(&iter_mask);
3426          nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(instr) :
3427                                   nir_intrinsic_io_xfb2(instr);
3428          if (xfb.out[i % 2].num_components)
3429             mask |= BITFIELD_RANGE(i, xfb.out[i % 2].num_components) & wr_mask;
3430       }
3431    }
3432 
3433    return mask;
3434 }
3435 
3436 /**
3437  * Whether an output slot is consumed by fixed-function logic.
3438  */
3439 bool
nir_slot_is_sysval_output(gl_varying_slot slot)3440 nir_slot_is_sysval_output(gl_varying_slot slot)
3441 {
3442    return slot == VARYING_SLOT_POS ||
3443           slot == VARYING_SLOT_PSIZ ||
3444           slot == VARYING_SLOT_EDGE ||
3445           slot == VARYING_SLOT_CLIP_VERTEX ||
3446           slot == VARYING_SLOT_CLIP_DIST0 ||
3447           slot == VARYING_SLOT_CLIP_DIST1 ||
3448           slot == VARYING_SLOT_CULL_DIST0 ||
3449           slot == VARYING_SLOT_CULL_DIST1 ||
3450           slot == VARYING_SLOT_LAYER ||
3451           slot == VARYING_SLOT_VIEWPORT ||
3452           slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
3453           slot == VARYING_SLOT_TESS_LEVEL_INNER ||
3454           slot == VARYING_SLOT_BOUNDING_BOX0 ||
3455           slot == VARYING_SLOT_BOUNDING_BOX1 ||
3456           slot == VARYING_SLOT_VIEW_INDEX ||
3457           slot == VARYING_SLOT_VIEWPORT_MASK ||
3458           slot == VARYING_SLOT_PRIMITIVE_SHADING_RATE ||
3459           slot == VARYING_SLOT_PRIMITIVE_COUNT ||
3460           slot == VARYING_SLOT_PRIMITIVE_INDICES ||
3461           slot == VARYING_SLOT_TASK_COUNT;
3462 }
3463 
3464 /**
3465  * Whether an input/output slot is consumed by the next shader stage,
3466  * or written by the previous shader stage.
3467  */
3468 bool
nir_slot_is_varying(gl_varying_slot slot)3469 nir_slot_is_varying(gl_varying_slot slot)
3470 {
3471    return slot >= VARYING_SLOT_VAR0 ||
3472           slot == VARYING_SLOT_COL0 ||
3473           slot == VARYING_SLOT_COL1 ||
3474           slot == VARYING_SLOT_BFC0 ||
3475           slot == VARYING_SLOT_BFC1 ||
3476           slot == VARYING_SLOT_FOGC ||
3477           (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) ||
3478           slot == VARYING_SLOT_PNTC ||
3479           slot == VARYING_SLOT_CLIP_DIST0 ||
3480           slot == VARYING_SLOT_CLIP_DIST1 ||
3481           slot == VARYING_SLOT_CULL_DIST0 ||
3482           slot == VARYING_SLOT_CULL_DIST1 ||
3483           slot == VARYING_SLOT_PRIMITIVE_ID ||
3484           slot == VARYING_SLOT_LAYER ||
3485           slot == VARYING_SLOT_VIEWPORT ||
3486           slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
3487           slot == VARYING_SLOT_TESS_LEVEL_INNER;
3488 }
3489 
3490 bool
nir_slot_is_sysval_output_and_varying(gl_varying_slot slot)3491 nir_slot_is_sysval_output_and_varying(gl_varying_slot slot)
3492 {
3493    return nir_slot_is_sysval_output(slot) &&
3494           nir_slot_is_varying(slot);
3495 }
3496 
3497 /**
3498  * This marks the output store instruction as not feeding the next shader
3499  * stage. If the instruction has no other use, it's removed.
3500  */
nir_remove_varying(nir_intrinsic_instr * intr)3501 void nir_remove_varying(nir_intrinsic_instr *intr)
3502 {
3503    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3504 
3505    if ((!sem.no_sysval_output && nir_slot_is_sysval_output(sem.location)) ||
3506        nir_instr_xfb_write_mask(intr)) {
3507       /* Demote the store instruction. */
3508       sem.no_varying = true;
3509       nir_intrinsic_set_io_semantics(intr, sem);
3510    } else {
3511       nir_instr_remove(&intr->instr);
3512    }
3513 }
3514 
3515 /**
3516  * This marks the output store instruction as not feeding fixed-function
3517  * logic. If the instruction has no other use, it's removed.
3518  */
nir_remove_sysval_output(nir_intrinsic_instr * intr)3519 void nir_remove_sysval_output(nir_intrinsic_instr *intr)
3520 {
3521    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3522 
3523    if ((!sem.no_varying && nir_slot_is_varying(sem.location)) ||
3524        nir_instr_xfb_write_mask(intr)) {
3525       /* Demote the store instruction. */
3526       sem.no_sysval_output = true;
3527       nir_intrinsic_set_io_semantics(intr, sem);
3528    } else {
3529       nir_instr_remove(&intr->instr);
3530    }
3531 }
3532 
nir_remove_non_entrypoints(nir_shader * nir)3533 void nir_remove_non_entrypoints(nir_shader *nir)
3534 {
3535    foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
3536       if (!func->is_entrypoint)
3537          exec_node_remove(&func->node);
3538    }
3539    assert(exec_list_length(&nir->functions) == 1);
3540 }
3541