• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Connor Abbott (cwabbott0@gmail.com)
25  *
26  */
27 
28 #include "nir.h"
29 #include "nir_builder.h"
30 #include "nir_control_flow_private.h"
31 #include "nir_worklist.h"
32 #include "util/half_float.h"
33 #include <limits.h>
34 #include <assert.h>
35 #include <math.h>
36 #include "util/u_math.h"
37 #include "util/u_qsort.h"
38 
39 #include "main/menums.h" /* BITFIELD64_MASK */
40 
41 
42 /** Return true if the component mask "mask" with bit size "old_bit_size" can
43  * be re-interpreted to be used with "new_bit_size".
44  */
45 bool
nir_component_mask_can_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)46 nir_component_mask_can_reinterpret(nir_component_mask_t mask,
47                                    unsigned old_bit_size,
48                                    unsigned new_bit_size)
49 {
50    assert(util_is_power_of_two_nonzero(old_bit_size));
51    assert(util_is_power_of_two_nonzero(new_bit_size));
52 
53    if (old_bit_size == new_bit_size)
54       return true;
55 
56    if (old_bit_size == 1 || new_bit_size == 1)
57       return false;
58 
59    if (old_bit_size > new_bit_size) {
60       unsigned ratio = old_bit_size / new_bit_size;
61       return util_last_bit(mask) * ratio <= NIR_MAX_VEC_COMPONENTS;
62    }
63 
64    unsigned iter = mask;
65    while (iter) {
66       int start, count;
67       u_bit_scan_consecutive_range(&iter, &start, &count);
68       start *= old_bit_size;
69       count *= old_bit_size;
70       if (start % new_bit_size != 0)
71          return false;
72       if (count % new_bit_size != 0)
73          return false;
74    }
75    return true;
76 }
77 
78 /** Re-interprets a component mask "mask" with bit size "old_bit_size" so that
79  * it can be used can be used with "new_bit_size".
80  */
81 nir_component_mask_t
nir_component_mask_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)82 nir_component_mask_reinterpret(nir_component_mask_t mask,
83                                unsigned old_bit_size,
84                                unsigned new_bit_size)
85 {
86    assert(nir_component_mask_can_reinterpret(mask, old_bit_size, new_bit_size));
87 
88    if (old_bit_size == new_bit_size)
89       return mask;
90 
91    nir_component_mask_t new_mask = 0;
92    unsigned iter = mask;
93    while (iter) {
94       int start, count;
95       u_bit_scan_consecutive_range(&iter, &start, &count);
96       start = start * old_bit_size / new_bit_size;
97       count = count * old_bit_size / new_bit_size;
98       new_mask |= BITFIELD_RANGE(start, count);
99    }
100    return new_mask;
101 }
102 
103 static void
nir_shader_destructor(void * ptr)104 nir_shader_destructor(void *ptr)
105 {
106    nir_shader *shader = ptr;
107 
108    /* Free all instrs from the shader, since they're not ralloced. */
109    list_for_each_entry_safe(nir_instr, instr, &shader->gc_list, gc_node) {
110       nir_instr_free(instr);
111    }
112 }
113 
114 nir_shader *
nir_shader_create(void * mem_ctx,gl_shader_stage stage,const nir_shader_compiler_options * options,shader_info * si)115 nir_shader_create(void *mem_ctx,
116                   gl_shader_stage stage,
117                   const nir_shader_compiler_options *options,
118                   shader_info *si)
119 {
120    nir_shader *shader = rzalloc(mem_ctx, nir_shader);
121    ralloc_set_destructor(shader, nir_shader_destructor);
122 
123    exec_list_make_empty(&shader->variables);
124 
125    shader->options = options;
126 
127    if (si) {
128       assert(si->stage == stage);
129       shader->info = *si;
130    } else {
131       shader->info.stage = stage;
132    }
133 
134    exec_list_make_empty(&shader->functions);
135 
136    list_inithead(&shader->gc_list);
137 
138    shader->num_inputs = 0;
139    shader->num_outputs = 0;
140    shader->num_uniforms = 0;
141 
142    return shader;
143 }
144 
145 static nir_register *
reg_create(void * mem_ctx,struct exec_list * list)146 reg_create(void *mem_ctx, struct exec_list *list)
147 {
148    nir_register *reg = ralloc(mem_ctx, nir_register);
149 
150    list_inithead(&reg->uses);
151    list_inithead(&reg->defs);
152    list_inithead(&reg->if_uses);
153 
154    reg->num_components = 0;
155    reg->bit_size = 32;
156    reg->num_array_elems = 0;
157    reg->divergent = false;
158 
159    exec_list_push_tail(list, &reg->node);
160 
161    return reg;
162 }
163 
164 nir_register *
nir_local_reg_create(nir_function_impl * impl)165 nir_local_reg_create(nir_function_impl *impl)
166 {
167    nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers);
168    reg->index = impl->reg_alloc++;
169 
170    return reg;
171 }
172 
173 void
nir_reg_remove(nir_register * reg)174 nir_reg_remove(nir_register *reg)
175 {
176    exec_node_remove(&reg->node);
177 }
178 
179 void
nir_shader_add_variable(nir_shader * shader,nir_variable * var)180 nir_shader_add_variable(nir_shader *shader, nir_variable *var)
181 {
182    switch (var->data.mode) {
183    case nir_var_function_temp:
184       assert(!"nir_shader_add_variable cannot be used for local variables");
185       return;
186 
187    case nir_var_shader_temp:
188    case nir_var_shader_in:
189    case nir_var_shader_out:
190    case nir_var_uniform:
191    case nir_var_mem_ubo:
192    case nir_var_mem_ssbo:
193    case nir_var_mem_shared:
194    case nir_var_system_value:
195    case nir_var_mem_push_const:
196    case nir_var_mem_constant:
197    case nir_var_shader_call_data:
198    case nir_var_ray_hit_attrib:
199       break;
200 
201    case nir_var_mem_global:
202       assert(!"nir_shader_add_variable cannot be used for global memory");
203       return;
204 
205    default:
206       assert(!"invalid mode");
207       return;
208    }
209 
210    exec_list_push_tail(&shader->variables, &var->node);
211 }
212 
213 nir_variable *
nir_variable_create(nir_shader * shader,nir_variable_mode mode,const struct glsl_type * type,const char * name)214 nir_variable_create(nir_shader *shader, nir_variable_mode mode,
215                     const struct glsl_type *type, const char *name)
216 {
217    nir_variable *var = rzalloc(shader, nir_variable);
218    var->name = ralloc_strdup(var, name);
219    var->type = type;
220    var->data.mode = mode;
221    var->data.how_declared = nir_var_declared_normally;
222 
223    if ((mode == nir_var_shader_in &&
224         shader->info.stage != MESA_SHADER_VERTEX &&
225         shader->info.stage != MESA_SHADER_KERNEL) ||
226        (mode == nir_var_shader_out &&
227         shader->info.stage != MESA_SHADER_FRAGMENT))
228       var->data.interpolation = INTERP_MODE_SMOOTH;
229 
230    if (mode == nir_var_shader_in || mode == nir_var_uniform)
231       var->data.read_only = true;
232 
233    nir_shader_add_variable(shader, var);
234 
235    return var;
236 }
237 
238 nir_variable *
nir_local_variable_create(nir_function_impl * impl,const struct glsl_type * type,const char * name)239 nir_local_variable_create(nir_function_impl *impl,
240                           const struct glsl_type *type, const char *name)
241 {
242    nir_variable *var = rzalloc(impl->function->shader, nir_variable);
243    var->name = ralloc_strdup(var, name);
244    var->type = type;
245    var->data.mode = nir_var_function_temp;
246 
247    nir_function_impl_add_variable(impl, var);
248 
249    return var;
250 }
251 
252 nir_variable *
nir_find_variable_with_location(nir_shader * shader,nir_variable_mode mode,unsigned location)253 nir_find_variable_with_location(nir_shader *shader,
254                                 nir_variable_mode mode,
255                                 unsigned location)
256 {
257    assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
258    nir_foreach_variable_with_modes(var, shader, mode) {
259       if (var->data.location == location)
260          return var;
261    }
262    return NULL;
263 }
264 
265 nir_variable *
nir_find_variable_with_driver_location(nir_shader * shader,nir_variable_mode mode,unsigned location)266 nir_find_variable_with_driver_location(nir_shader *shader,
267                                        nir_variable_mode mode,
268                                        unsigned location)
269 {
270    assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
271    nir_foreach_variable_with_modes(var, shader, mode) {
272       if (var->data.driver_location == location)
273          return var;
274    }
275    return NULL;
276 }
277 
278 /* Annoyingly, qsort_r is not in the C standard library and, in particular, we
279  * can't count on it on MSV and Android.  So we stuff the CMP function into
280  * each array element.  It's a bit messy and burns more memory but the list of
281  * variables should hever be all that long.
282  */
283 struct var_cmp {
284    nir_variable *var;
285    int (*cmp)(const nir_variable *, const nir_variable *);
286 };
287 
288 static int
var_sort_cmp(const void * _a,const void * _b,void * _cmp)289 var_sort_cmp(const void *_a, const void *_b, void *_cmp)
290 {
291    const struct var_cmp *a = _a;
292    const struct var_cmp *b = _b;
293    assert(a->cmp == b->cmp);
294    return a->cmp(a->var, b->var);
295 }
296 
297 void
nir_sort_variables_with_modes(nir_shader * shader,int (* cmp)(const nir_variable *,const nir_variable *),nir_variable_mode modes)298 nir_sort_variables_with_modes(nir_shader *shader,
299                               int (*cmp)(const nir_variable *,
300                                          const nir_variable *),
301                               nir_variable_mode modes)
302 {
303    unsigned num_vars = 0;
304    nir_foreach_variable_with_modes(var, shader, modes) {
305       ++num_vars;
306    }
307    struct var_cmp *vars = ralloc_array(shader, struct var_cmp, num_vars);
308    unsigned i = 0;
309    nir_foreach_variable_with_modes_safe(var, shader, modes) {
310       exec_node_remove(&var->node);
311       vars[i++] = (struct var_cmp){
312          .var = var,
313          .cmp = cmp,
314       };
315    }
316    assert(i == num_vars);
317 
318    util_qsort_r(vars, num_vars, sizeof(*vars), var_sort_cmp, cmp);
319 
320    for (i = 0; i < num_vars; i++)
321       exec_list_push_tail(&shader->variables, &vars[i].var->node);
322 
323    ralloc_free(vars);
324 }
325 
326 nir_function *
nir_function_create(nir_shader * shader,const char * name)327 nir_function_create(nir_shader *shader, const char *name)
328 {
329    nir_function *func = ralloc(shader, nir_function);
330 
331    exec_list_push_tail(&shader->functions, &func->node);
332 
333    func->name = ralloc_strdup(func, name);
334    func->shader = shader;
335    func->num_params = 0;
336    func->params = NULL;
337    func->impl = NULL;
338    func->is_entrypoint = false;
339 
340    return func;
341 }
342 
src_has_indirect(nir_src * src)343 static bool src_has_indirect(nir_src *src)
344 {
345    return !src->is_ssa && src->reg.indirect;
346 }
347 
src_free_indirects(nir_src * src)348 static void src_free_indirects(nir_src *src)
349 {
350    if (src_has_indirect(src)) {
351       assert(src->reg.indirect->is_ssa || !src->reg.indirect->reg.indirect);
352       free(src->reg.indirect);
353       src->reg.indirect = NULL;
354    }
355 }
356 
dest_free_indirects(nir_dest * dest)357 static void dest_free_indirects(nir_dest *dest)
358 {
359    if (!dest->is_ssa && dest->reg.indirect) {
360       assert(dest->reg.indirect->is_ssa || !dest->reg.indirect->reg.indirect);
361       free(dest->reg.indirect);
362       dest->reg.indirect = NULL;
363    }
364 }
365 
366 /* NOTE: if the instruction you are copying a src to is already added
367  * to the IR, use nir_instr_rewrite_src() instead.
368  */
nir_src_copy(nir_src * dest,const nir_src * src)369 void nir_src_copy(nir_src *dest, const nir_src *src)
370 {
371    src_free_indirects(dest);
372 
373    dest->is_ssa = src->is_ssa;
374    if (src->is_ssa) {
375       dest->ssa = src->ssa;
376    } else {
377       dest->reg.base_offset = src->reg.base_offset;
378       dest->reg.reg = src->reg.reg;
379       if (src->reg.indirect) {
380          dest->reg.indirect = calloc(1, sizeof(nir_src));
381          nir_src_copy(dest->reg.indirect, src->reg.indirect);
382       } else {
383          dest->reg.indirect = NULL;
384       }
385    }
386 }
387 
nir_dest_copy(nir_dest * dest,const nir_dest * src)388 void nir_dest_copy(nir_dest *dest, const nir_dest *src)
389 {
390    /* Copying an SSA definition makes no sense whatsoever. */
391    assert(!src->is_ssa);
392 
393    dest_free_indirects(dest);
394 
395    dest->is_ssa = false;
396 
397    dest->reg.base_offset = src->reg.base_offset;
398    dest->reg.reg = src->reg.reg;
399    if (src->reg.indirect) {
400       dest->reg.indirect = calloc(1, sizeof(nir_src));
401       nir_src_copy(dest->reg.indirect, src->reg.indirect);
402    } else {
403       dest->reg.indirect = NULL;
404    }
405 }
406 
407 void
nir_alu_src_copy(nir_alu_src * dest,const nir_alu_src * src)408 nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src)
409 {
410    nir_src_copy(&dest->src, &src->src);
411    dest->abs = src->abs;
412    dest->negate = src->negate;
413    for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
414       dest->swizzle[i] = src->swizzle[i];
415 }
416 
417 void
nir_alu_dest_copy(nir_alu_dest * dest,const nir_alu_dest * src)418 nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src)
419 {
420    nir_dest_copy(&dest->dest, &src->dest);
421    dest->write_mask = src->write_mask;
422    dest->saturate = src->saturate;
423 }
424 
425 bool
nir_alu_src_is_trivial_ssa(const nir_alu_instr * alu,unsigned srcn)426 nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn)
427 {
428    static uint8_t trivial_swizzle[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
429    STATIC_ASSERT(ARRAY_SIZE(trivial_swizzle) == NIR_MAX_VEC_COMPONENTS);
430 
431    const nir_alu_src *src = &alu->src[srcn];
432    unsigned num_components = nir_ssa_alu_instr_src_components(alu, srcn);
433 
434    return src->src.is_ssa && (src->src.ssa->num_components == num_components) &&
435           !src->abs && !src->negate &&
436           (memcmp(src->swizzle, trivial_swizzle, num_components) == 0);
437 }
438 
439 
440 static void
cf_init(nir_cf_node * node,nir_cf_node_type type)441 cf_init(nir_cf_node *node, nir_cf_node_type type)
442 {
443    exec_node_init(&node->node);
444    node->parent = NULL;
445    node->type = type;
446 }
447 
448 nir_function_impl *
nir_function_impl_create_bare(nir_shader * shader)449 nir_function_impl_create_bare(nir_shader *shader)
450 {
451    nir_function_impl *impl = ralloc(shader, nir_function_impl);
452 
453    impl->function = NULL;
454 
455    cf_init(&impl->cf_node, nir_cf_node_function);
456 
457    exec_list_make_empty(&impl->body);
458    exec_list_make_empty(&impl->registers);
459    exec_list_make_empty(&impl->locals);
460    impl->reg_alloc = 0;
461    impl->ssa_alloc = 0;
462    impl->num_blocks = 0;
463    impl->valid_metadata = nir_metadata_none;
464    impl->structured = true;
465 
466    /* create start & end blocks */
467    nir_block *start_block = nir_block_create(shader);
468    nir_block *end_block = nir_block_create(shader);
469    start_block->cf_node.parent = &impl->cf_node;
470    end_block->cf_node.parent = &impl->cf_node;
471    impl->end_block = end_block;
472 
473    exec_list_push_tail(&impl->body, &start_block->cf_node.node);
474 
475    start_block->successors[0] = end_block;
476    _mesa_set_add(end_block->predecessors, start_block);
477    return impl;
478 }
479 
480 nir_function_impl *
nir_function_impl_create(nir_function * function)481 nir_function_impl_create(nir_function *function)
482 {
483    assert(function->impl == NULL);
484 
485    nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
486 
487    function->impl = impl;
488    impl->function = function;
489 
490    return impl;
491 }
492 
493 nir_block *
nir_block_create(nir_shader * shader)494 nir_block_create(nir_shader *shader)
495 {
496    nir_block *block = rzalloc(shader, nir_block);
497 
498    cf_init(&block->cf_node, nir_cf_node_block);
499 
500    block->successors[0] = block->successors[1] = NULL;
501    block->predecessors = _mesa_pointer_set_create(block);
502    block->imm_dom = NULL;
503    /* XXX maybe it would be worth it to defer allocation?  This
504     * way it doesn't get allocated for shader refs that never run
505     * nir_calc_dominance?  For example, state-tracker creates an
506     * initial IR, clones that, runs appropriate lowering pass, passes
507     * to driver which does common lowering/opt, and then stores ref
508     * which is later used to do state specific lowering and futher
509     * opt.  Do any of the references not need dominance metadata?
510     */
511    block->dom_frontier = _mesa_pointer_set_create(block);
512 
513    exec_list_make_empty(&block->instr_list);
514 
515    return block;
516 }
517 
518 static inline void
src_init(nir_src * src)519 src_init(nir_src *src)
520 {
521    src->is_ssa = false;
522    src->reg.reg = NULL;
523    src->reg.indirect = NULL;
524    src->reg.base_offset = 0;
525 }
526 
527 nir_if *
nir_if_create(nir_shader * shader)528 nir_if_create(nir_shader *shader)
529 {
530    nir_if *if_stmt = ralloc(shader, nir_if);
531 
532    if_stmt->control = nir_selection_control_none;
533 
534    cf_init(&if_stmt->cf_node, nir_cf_node_if);
535    src_init(&if_stmt->condition);
536 
537    nir_block *then = nir_block_create(shader);
538    exec_list_make_empty(&if_stmt->then_list);
539    exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
540    then->cf_node.parent = &if_stmt->cf_node;
541 
542    nir_block *else_stmt = nir_block_create(shader);
543    exec_list_make_empty(&if_stmt->else_list);
544    exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
545    else_stmt->cf_node.parent = &if_stmt->cf_node;
546 
547    return if_stmt;
548 }
549 
550 nir_loop *
nir_loop_create(nir_shader * shader)551 nir_loop_create(nir_shader *shader)
552 {
553    nir_loop *loop = rzalloc(shader, nir_loop);
554 
555    cf_init(&loop->cf_node, nir_cf_node_loop);
556    /* Assume that loops are divergent until proven otherwise */
557    loop->divergent = true;
558 
559    nir_block *body = nir_block_create(shader);
560    exec_list_make_empty(&loop->body);
561    exec_list_push_tail(&loop->body, &body->cf_node.node);
562    body->cf_node.parent = &loop->cf_node;
563 
564    body->successors[0] = body;
565    _mesa_set_add(body->predecessors, body);
566 
567    return loop;
568 }
569 
570 static void
instr_init(nir_instr * instr,nir_instr_type type)571 instr_init(nir_instr *instr, nir_instr_type type)
572 {
573    instr->type = type;
574    instr->block = NULL;
575    exec_node_init(&instr->node);
576 }
577 
578 static void
dest_init(nir_dest * dest)579 dest_init(nir_dest *dest)
580 {
581    dest->is_ssa = false;
582    dest->reg.reg = NULL;
583    dest->reg.indirect = NULL;
584    dest->reg.base_offset = 0;
585 }
586 
587 static void
alu_dest_init(nir_alu_dest * dest)588 alu_dest_init(nir_alu_dest *dest)
589 {
590    dest_init(&dest->dest);
591    dest->saturate = false;
592    dest->write_mask = 0xf;
593 }
594 
595 static void
alu_src_init(nir_alu_src * src)596 alu_src_init(nir_alu_src *src)
597 {
598    src_init(&src->src);
599    src->abs = src->negate = false;
600    for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
601       src->swizzle[i] = i;
602 }
603 
604 nir_alu_instr *
nir_alu_instr_create(nir_shader * shader,nir_op op)605 nir_alu_instr_create(nir_shader *shader, nir_op op)
606 {
607    unsigned num_srcs = nir_op_infos[op].num_inputs;
608    /* TODO: don't use calloc */
609    nir_alu_instr *instr = calloc(1, sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
610 
611    instr_init(&instr->instr, nir_instr_type_alu);
612    instr->op = op;
613    alu_dest_init(&instr->dest);
614    for (unsigned i = 0; i < num_srcs; i++)
615       alu_src_init(&instr->src[i]);
616 
617    list_add(&instr->instr.gc_node, &shader->gc_list);
618 
619    return instr;
620 }
621 
622 nir_deref_instr *
nir_deref_instr_create(nir_shader * shader,nir_deref_type deref_type)623 nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
624 {
625    nir_deref_instr *instr = calloc(1, sizeof(*instr));
626 
627    instr_init(&instr->instr, nir_instr_type_deref);
628 
629    instr->deref_type = deref_type;
630    if (deref_type != nir_deref_type_var)
631       src_init(&instr->parent);
632 
633    if (deref_type == nir_deref_type_array ||
634        deref_type == nir_deref_type_ptr_as_array)
635       src_init(&instr->arr.index);
636 
637    dest_init(&instr->dest);
638 
639    list_add(&instr->instr.gc_node, &shader->gc_list);
640 
641    return instr;
642 }
643 
644 nir_jump_instr *
nir_jump_instr_create(nir_shader * shader,nir_jump_type type)645 nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
646 {
647    nir_jump_instr *instr = malloc(sizeof(*instr));
648    instr_init(&instr->instr, nir_instr_type_jump);
649    src_init(&instr->condition);
650    instr->type = type;
651    instr->target = NULL;
652    instr->else_target = NULL;
653 
654    list_add(&instr->instr.gc_node, &shader->gc_list);
655 
656    return instr;
657 }
658 
659 nir_load_const_instr *
nir_load_const_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)660 nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
661                             unsigned bit_size)
662 {
663    nir_load_const_instr *instr =
664       calloc(1, sizeof(*instr) + num_components * sizeof(*instr->value));
665    instr_init(&instr->instr, nir_instr_type_load_const);
666 
667    nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size);
668 
669    list_add(&instr->instr.gc_node, &shader->gc_list);
670 
671    return instr;
672 }
673 
674 nir_intrinsic_instr *
nir_intrinsic_instr_create(nir_shader * shader,nir_intrinsic_op op)675 nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
676 {
677    unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
678    /* TODO: don't use calloc */
679    nir_intrinsic_instr *instr =
680       calloc(1, sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
681 
682    instr_init(&instr->instr, nir_instr_type_intrinsic);
683    instr->intrinsic = op;
684 
685    if (nir_intrinsic_infos[op].has_dest)
686       dest_init(&instr->dest);
687 
688    for (unsigned i = 0; i < num_srcs; i++)
689       src_init(&instr->src[i]);
690 
691    list_add(&instr->instr.gc_node, &shader->gc_list);
692 
693    return instr;
694 }
695 
696 nir_call_instr *
nir_call_instr_create(nir_shader * shader,nir_function * callee)697 nir_call_instr_create(nir_shader *shader, nir_function *callee)
698 {
699    const unsigned num_params = callee->num_params;
700    nir_call_instr *instr =
701       calloc(1, sizeof(*instr) + num_params * sizeof(instr->params[0]));
702 
703    instr_init(&instr->instr, nir_instr_type_call);
704    instr->callee = callee;
705    instr->num_params = num_params;
706    for (unsigned i = 0; i < num_params; i++)
707       src_init(&instr->params[i]);
708 
709    list_add(&instr->instr.gc_node, &shader->gc_list);
710 
711    return instr;
712 }
713 
714 static int8_t default_tg4_offsets[4][2] =
715 {
716    { 0, 1 },
717    { 1, 1 },
718    { 1, 0 },
719    { 0, 0 },
720 };
721 
722 nir_tex_instr *
nir_tex_instr_create(nir_shader * shader,unsigned num_srcs)723 nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
724 {
725    nir_tex_instr *instr = calloc(1, sizeof(*instr));
726    instr_init(&instr->instr, nir_instr_type_tex);
727 
728    dest_init(&instr->dest);
729 
730    instr->num_srcs = num_srcs;
731    instr->src = malloc(sizeof(nir_tex_src) * num_srcs);
732    for (unsigned i = 0; i < num_srcs; i++)
733       src_init(&instr->src[i].src);
734 
735    instr->texture_index = 0;
736    instr->sampler_index = 0;
737    memcpy(instr->tg4_offsets, default_tg4_offsets, sizeof(instr->tg4_offsets));
738 
739    list_add(&instr->instr.gc_node, &shader->gc_list);
740 
741    return instr;
742 }
743 
744 void
nir_tex_instr_add_src(nir_tex_instr * tex,nir_tex_src_type src_type,nir_src src)745 nir_tex_instr_add_src(nir_tex_instr *tex,
746                       nir_tex_src_type src_type,
747                       nir_src src)
748 {
749    nir_tex_src *new_srcs = calloc(sizeof(*new_srcs),
750                                          tex->num_srcs + 1);
751 
752    for (unsigned i = 0; i < tex->num_srcs; i++) {
753       new_srcs[i].src_type = tex->src[i].src_type;
754       nir_instr_move_src(&tex->instr, &new_srcs[i].src,
755                          &tex->src[i].src);
756    }
757 
758    free(tex->src);
759    tex->src = new_srcs;
760 
761    tex->src[tex->num_srcs].src_type = src_type;
762    nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
763    tex->num_srcs++;
764 }
765 
766 void
nir_tex_instr_remove_src(nir_tex_instr * tex,unsigned src_idx)767 nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
768 {
769    assert(src_idx < tex->num_srcs);
770 
771    /* First rewrite the source to NIR_SRC_INIT */
772    nir_instr_rewrite_src(&tex->instr, &tex->src[src_idx].src, NIR_SRC_INIT);
773 
774    /* Now, move all of the other sources down */
775    for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
776       tex->src[i-1].src_type = tex->src[i].src_type;
777       nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
778    }
779    tex->num_srcs--;
780 }
781 
782 bool
nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr * tex)783 nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex)
784 {
785    if (tex->op != nir_texop_tg4)
786       return false;
787    return memcmp(tex->tg4_offsets, default_tg4_offsets,
788                  sizeof(tex->tg4_offsets)) != 0;
789 }
790 
791 nir_phi_instr *
nir_phi_instr_create(nir_shader * shader)792 nir_phi_instr_create(nir_shader *shader)
793 {
794    nir_phi_instr *instr = malloc(sizeof(*instr));
795    instr_init(&instr->instr, nir_instr_type_phi);
796 
797    dest_init(&instr->dest);
798    exec_list_make_empty(&instr->srcs);
799 
800    list_add(&instr->instr.gc_node, &shader->gc_list);
801 
802    return instr;
803 }
804 
805 /**
806  * Adds a new source to a NIR instruction.
807  *
808  * Note that this does not update the def/use relationship for src, assuming
809  * that the instr is not in the shader.  If it is, you have to do:
810  *
811  * list_addtail(&phi_src->src.use_link, &src.ssa->uses);
812  */
813 nir_phi_src *
nir_phi_instr_add_src(nir_phi_instr * instr,nir_block * pred,nir_src src)814 nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src)
815 {
816    nir_phi_src *phi_src;
817 
818    phi_src = calloc(1, sizeof(nir_phi_src));
819    phi_src->pred = pred;
820    phi_src->src = src;
821    phi_src->src.parent_instr = &instr->instr;
822    exec_list_push_tail(&instr->srcs, &phi_src->node);
823 
824    return phi_src;
825 }
826 
827 nir_parallel_copy_instr *
nir_parallel_copy_instr_create(nir_shader * shader)828 nir_parallel_copy_instr_create(nir_shader *shader)
829 {
830    nir_parallel_copy_instr *instr = malloc(sizeof(*instr));
831    instr_init(&instr->instr, nir_instr_type_parallel_copy);
832 
833    exec_list_make_empty(&instr->entries);
834 
835    list_add(&instr->instr.gc_node, &shader->gc_list);
836 
837    return instr;
838 }
839 
840 nir_ssa_undef_instr *
nir_ssa_undef_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)841 nir_ssa_undef_instr_create(nir_shader *shader,
842                            unsigned num_components,
843                            unsigned bit_size)
844 {
845    nir_ssa_undef_instr *instr = malloc(sizeof(*instr));
846    instr_init(&instr->instr, nir_instr_type_ssa_undef);
847 
848    nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size);
849 
850    list_add(&instr->instr.gc_node, &shader->gc_list);
851 
852    return instr;
853 }
854 
855 static nir_const_value
const_value_float(double d,unsigned bit_size)856 const_value_float(double d, unsigned bit_size)
857 {
858    nir_const_value v;
859    memset(&v, 0, sizeof(v));
860    switch (bit_size) {
861    case 16: v.u16 = _mesa_float_to_half(d);  break;
862    case 32: v.f32 = d;                       break;
863    case 64: v.f64 = d;                       break;
864    default:
865       unreachable("Invalid bit size");
866    }
867    return v;
868 }
869 
870 static nir_const_value
const_value_int(int64_t i,unsigned bit_size)871 const_value_int(int64_t i, unsigned bit_size)
872 {
873    nir_const_value v;
874    memset(&v, 0, sizeof(v));
875    switch (bit_size) {
876    case 1:  v.b   = i & 1;  break;
877    case 8:  v.i8  = i;  break;
878    case 16: v.i16 = i;  break;
879    case 32: v.i32 = i;  break;
880    case 64: v.i64 = i;  break;
881    default:
882       unreachable("Invalid bit size");
883    }
884    return v;
885 }
886 
887 nir_const_value
nir_alu_binop_identity(nir_op binop,unsigned bit_size)888 nir_alu_binop_identity(nir_op binop, unsigned bit_size)
889 {
890    const int64_t max_int = (1ull << (bit_size - 1)) - 1;
891    const int64_t min_int = -max_int - 1;
892    switch (binop) {
893    case nir_op_iadd:
894       return const_value_int(0, bit_size);
895    case nir_op_fadd:
896       return const_value_float(0, bit_size);
897    case nir_op_imul:
898       return const_value_int(1, bit_size);
899    case nir_op_fmul:
900       return const_value_float(1, bit_size);
901    case nir_op_imin:
902       return const_value_int(max_int, bit_size);
903    case nir_op_umin:
904       return const_value_int(~0ull, bit_size);
905    case nir_op_fmin:
906       return const_value_float(INFINITY, bit_size);
907    case nir_op_imax:
908       return const_value_int(min_int, bit_size);
909    case nir_op_umax:
910       return const_value_int(0, bit_size);
911    case nir_op_fmax:
912       return const_value_float(-INFINITY, bit_size);
913    case nir_op_iand:
914       return const_value_int(~0ull, bit_size);
915    case nir_op_ior:
916       return const_value_int(0, bit_size);
917    case nir_op_ixor:
918       return const_value_int(0, bit_size);
919    default:
920       unreachable("Invalid reduction operation");
921    }
922 }
923 
924 nir_function_impl *
nir_cf_node_get_function(nir_cf_node * node)925 nir_cf_node_get_function(nir_cf_node *node)
926 {
927    while (node->type != nir_cf_node_function) {
928       node = node->parent;
929    }
930 
931    return nir_cf_node_as_function(node);
932 }
933 
934 /* Reduces a cursor by trying to convert everything to after and trying to
935  * go up to block granularity when possible.
936  */
937 static nir_cursor
reduce_cursor(nir_cursor cursor)938 reduce_cursor(nir_cursor cursor)
939 {
940    switch (cursor.option) {
941    case nir_cursor_before_block:
942       if (exec_list_is_empty(&cursor.block->instr_list)) {
943          /* Empty block.  After is as good as before. */
944          cursor.option = nir_cursor_after_block;
945       }
946       return cursor;
947 
948    case nir_cursor_after_block:
949       return cursor;
950 
951    case nir_cursor_before_instr: {
952       nir_instr *prev_instr = nir_instr_prev(cursor.instr);
953       if (prev_instr) {
954          /* Before this instruction is after the previous */
955          cursor.instr = prev_instr;
956          cursor.option = nir_cursor_after_instr;
957       } else {
958          /* No previous instruction.  Switch to before block */
959          cursor.block = cursor.instr->block;
960          cursor.option = nir_cursor_before_block;
961       }
962       return reduce_cursor(cursor);
963    }
964 
965    case nir_cursor_after_instr:
966       if (nir_instr_next(cursor.instr) == NULL) {
967          /* This is the last instruction, switch to after block */
968          cursor.option = nir_cursor_after_block;
969          cursor.block = cursor.instr->block;
970       }
971       return cursor;
972 
973    default:
974       unreachable("Inavlid cursor option");
975    }
976 }
977 
978 bool
nir_cursors_equal(nir_cursor a,nir_cursor b)979 nir_cursors_equal(nir_cursor a, nir_cursor b)
980 {
981    /* Reduced cursors should be unique */
982    a = reduce_cursor(a);
983    b = reduce_cursor(b);
984 
985    return a.block == b.block && a.option == b.option;
986 }
987 
988 static bool
add_use_cb(nir_src * src,void * state)989 add_use_cb(nir_src *src, void *state)
990 {
991    nir_instr *instr = state;
992 
993    src->parent_instr = instr;
994    list_addtail(&src->use_link,
995                 src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses);
996 
997    return true;
998 }
999 
1000 static bool
add_ssa_def_cb(nir_ssa_def * def,void * state)1001 add_ssa_def_cb(nir_ssa_def *def, void *state)
1002 {
1003    nir_instr *instr = state;
1004 
1005    if (instr->block && def->index == UINT_MAX) {
1006       nir_function_impl *impl =
1007          nir_cf_node_get_function(&instr->block->cf_node);
1008 
1009       def->index = impl->ssa_alloc++;
1010 
1011       impl->valid_metadata &= ~nir_metadata_live_ssa_defs;
1012    }
1013 
1014    return true;
1015 }
1016 
1017 static bool
add_reg_def_cb(nir_dest * dest,void * state)1018 add_reg_def_cb(nir_dest *dest, void *state)
1019 {
1020    nir_instr *instr = state;
1021 
1022    if (!dest->is_ssa) {
1023       dest->reg.parent_instr = instr;
1024       list_addtail(&dest->reg.def_link, &dest->reg.reg->defs);
1025    }
1026 
1027    return true;
1028 }
1029 
1030 static void
add_defs_uses(nir_instr * instr)1031 add_defs_uses(nir_instr *instr)
1032 {
1033    nir_foreach_src(instr, add_use_cb, instr);
1034    nir_foreach_dest(instr, add_reg_def_cb, instr);
1035    nir_foreach_ssa_def(instr, add_ssa_def_cb, instr);
1036 }
1037 
1038 void
nir_instr_insert(nir_cursor cursor,nir_instr * instr)1039 nir_instr_insert(nir_cursor cursor, nir_instr *instr)
1040 {
1041    switch (cursor.option) {
1042    case nir_cursor_before_block:
1043       /* Only allow inserting jumps into empty blocks. */
1044       if (instr->type == nir_instr_type_jump)
1045          assert(exec_list_is_empty(&cursor.block->instr_list));
1046 
1047       instr->block = cursor.block;
1048       add_defs_uses(instr);
1049       exec_list_push_head(&cursor.block->instr_list, &instr->node);
1050       break;
1051    case nir_cursor_after_block: {
1052       /* Inserting instructions after a jump is illegal. */
1053       nir_instr *last = nir_block_last_instr(cursor.block);
1054       assert(last == NULL || last->type != nir_instr_type_jump);
1055       (void) last;
1056 
1057       instr->block = cursor.block;
1058       add_defs_uses(instr);
1059       exec_list_push_tail(&cursor.block->instr_list, &instr->node);
1060       break;
1061    }
1062    case nir_cursor_before_instr:
1063       assert(instr->type != nir_instr_type_jump);
1064       instr->block = cursor.instr->block;
1065       add_defs_uses(instr);
1066       exec_node_insert_node_before(&cursor.instr->node, &instr->node);
1067       break;
1068    case nir_cursor_after_instr:
1069       /* Inserting instructions after a jump is illegal. */
1070       assert(cursor.instr->type != nir_instr_type_jump);
1071 
1072       /* Only allow inserting jumps at the end of the block. */
1073       if (instr->type == nir_instr_type_jump)
1074          assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
1075 
1076       instr->block = cursor.instr->block;
1077       add_defs_uses(instr);
1078       exec_node_insert_after(&cursor.instr->node, &instr->node);
1079       break;
1080    }
1081 
1082    if (instr->type == nir_instr_type_jump)
1083       nir_handle_add_jump(instr->block);
1084 
1085    nir_function_impl *impl = nir_cf_node_get_function(&instr->block->cf_node);
1086    impl->valid_metadata &= ~nir_metadata_instr_index;
1087 }
1088 
1089 bool
nir_instr_move(nir_cursor cursor,nir_instr * instr)1090 nir_instr_move(nir_cursor cursor, nir_instr *instr)
1091 {
1092    /* If the cursor happens to refer to this instruction (either before or
1093     * after), don't do anything.
1094     */
1095    if ((cursor.option == nir_cursor_before_instr ||
1096         cursor.option == nir_cursor_after_instr) &&
1097        cursor.instr == instr)
1098       return false;
1099 
1100    nir_instr_remove(instr);
1101    nir_instr_insert(cursor, instr);
1102    return true;
1103 }
1104 
1105 static bool
src_is_valid(const nir_src * src)1106 src_is_valid(const nir_src *src)
1107 {
1108    return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
1109 }
1110 
1111 static bool
remove_use_cb(nir_src * src,void * state)1112 remove_use_cb(nir_src *src, void *state)
1113 {
1114    (void) state;
1115 
1116    if (src_is_valid(src))
1117       list_del(&src->use_link);
1118 
1119    return true;
1120 }
1121 
1122 static bool
remove_def_cb(nir_dest * dest,void * state)1123 remove_def_cb(nir_dest *dest, void *state)
1124 {
1125    (void) state;
1126 
1127    if (!dest->is_ssa)
1128       list_del(&dest->reg.def_link);
1129 
1130    return true;
1131 }
1132 
1133 static void
remove_defs_uses(nir_instr * instr)1134 remove_defs_uses(nir_instr *instr)
1135 {
1136    nir_foreach_dest(instr, remove_def_cb, instr);
1137    nir_foreach_src(instr, remove_use_cb, instr);
1138 }
1139 
nir_instr_remove_v(nir_instr * instr)1140 void nir_instr_remove_v(nir_instr *instr)
1141 {
1142    remove_defs_uses(instr);
1143    exec_node_remove(&instr->node);
1144 
1145    if (instr->type == nir_instr_type_jump) {
1146       nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
1147       nir_handle_remove_jump(instr->block, jump_instr->type);
1148    }
1149 }
1150 
free_src_indirects_cb(nir_src * src,void * state)1151 static bool free_src_indirects_cb(nir_src *src, void *state)
1152 {
1153    src_free_indirects(src);
1154    return true;
1155 }
1156 
free_dest_indirects_cb(nir_dest * dest,void * state)1157 static bool free_dest_indirects_cb(nir_dest *dest, void *state)
1158 {
1159    dest_free_indirects(dest);
1160    return true;
1161 }
1162 
nir_instr_free(nir_instr * instr)1163 void nir_instr_free(nir_instr *instr)
1164 {
1165    nir_foreach_src(instr, free_src_indirects_cb, NULL);
1166    nir_foreach_dest(instr, free_dest_indirects_cb, NULL);
1167 
1168    switch (instr->type) {
1169    case nir_instr_type_tex:
1170       free(nir_instr_as_tex(instr)->src);
1171       break;
1172 
1173    case nir_instr_type_phi: {
1174       nir_phi_instr *phi = nir_instr_as_phi(instr);
1175       nir_foreach_phi_src_safe(phi_src, phi) {
1176          free(phi_src);
1177       }
1178       break;
1179    }
1180 
1181    default:
1182       break;
1183    }
1184 
1185    list_del(&instr->gc_node);
1186    free(instr);
1187 }
1188 
1189 void
nir_instr_free_list(struct exec_list * list)1190 nir_instr_free_list(struct exec_list *list)
1191 {
1192    struct exec_node *node;
1193    while ((node = exec_list_pop_head(list))) {
1194       nir_instr *removed_instr = exec_node_data(nir_instr, node, node);
1195       nir_instr_free(removed_instr);
1196    }
1197 }
1198 
nir_instr_free_and_dce_live_cb(nir_ssa_def * def,void * state)1199 static bool nir_instr_free_and_dce_live_cb(nir_ssa_def *def, void *state)
1200 {
1201    bool *live = state;
1202 
1203    if (!nir_ssa_def_is_unused(def)) {
1204       *live = true;
1205       return false;
1206    } else {
1207       return true;
1208    }
1209 }
1210 
nir_instr_free_and_dce_is_live(nir_instr * instr)1211 static bool nir_instr_free_and_dce_is_live(nir_instr *instr)
1212 {
1213    /* Note: don't have to worry about jumps because they don't have dests to
1214     * become unused.
1215     */
1216    if (instr->type == nir_instr_type_intrinsic) {
1217       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1218       const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
1219       if (!(info->flags & NIR_INTRINSIC_CAN_ELIMINATE))
1220          return true;
1221    }
1222 
1223    bool live = false;
1224    nir_foreach_ssa_def(instr, nir_instr_free_and_dce_live_cb, &live);
1225    return live;
1226 }
1227 
1228 static bool
nir_instr_dce_add_dead_srcs_cb(nir_src * src,void * state)1229 nir_instr_dce_add_dead_srcs_cb(nir_src *src, void *state)
1230 {
1231    nir_instr_worklist *wl = state;
1232 
1233    if (src->is_ssa) {
1234       list_del(&src->use_link);
1235       if (!nir_instr_free_and_dce_is_live(src->ssa->parent_instr))
1236          nir_instr_worklist_push_tail(wl, src->ssa->parent_instr);
1237 
1238       /* Stop nir_instr_remove from trying to delete the link again. */
1239       src->ssa = NULL;
1240    }
1241 
1242    return true;
1243 }
1244 
1245 static void
nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist * wl,nir_instr * instr)1246 nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist *wl, nir_instr *instr)
1247 {
1248    nir_foreach_src(instr, nir_instr_dce_add_dead_srcs_cb, wl);
1249 }
1250 
1251 /**
1252  * Frees an instruction and any SSA defs that it used that are now dead,
1253  * returning a nir_cursor where the instruction previously was.
1254  */
1255 nir_cursor
nir_instr_free_and_dce(nir_instr * instr)1256 nir_instr_free_and_dce(nir_instr *instr)
1257 {
1258    nir_instr_worklist *worklist = nir_instr_worklist_create();
1259 
1260    nir_instr_dce_add_dead_ssa_srcs(worklist, instr);
1261    nir_cursor c = nir_instr_remove(instr);
1262 
1263    struct exec_list to_free;
1264    exec_list_make_empty(&to_free);
1265 
1266    nir_instr *dce_instr;
1267    while ((dce_instr = nir_instr_worklist_pop_head(worklist))) {
1268       nir_instr_dce_add_dead_ssa_srcs(worklist, dce_instr);
1269 
1270       /* If we're removing the instr where our cursor is, then we have to
1271        * point the cursor elsewhere.
1272        */
1273       if ((c.option == nir_cursor_before_instr ||
1274            c.option == nir_cursor_after_instr) &&
1275           c.instr == dce_instr)
1276          c = nir_instr_remove(dce_instr);
1277       else
1278          nir_instr_remove(dce_instr);
1279       exec_list_push_tail(&to_free, &dce_instr->node);
1280    }
1281 
1282    nir_instr_free_list(&to_free);
1283 
1284    nir_instr_worklist_destroy(worklist);
1285 
1286    return c;
1287 }
1288 
1289 /*@}*/
1290 
1291 void
nir_index_local_regs(nir_function_impl * impl)1292 nir_index_local_regs(nir_function_impl *impl)
1293 {
1294    unsigned index = 0;
1295    foreach_list_typed(nir_register, reg, node, &impl->registers) {
1296       reg->index = index++;
1297    }
1298    impl->reg_alloc = index;
1299 }
1300 
1301 struct foreach_ssa_def_state {
1302    nir_foreach_ssa_def_cb cb;
1303    void *client_state;
1304 };
1305 
1306 static inline bool
nir_ssa_def_visitor(nir_dest * dest,void * void_state)1307 nir_ssa_def_visitor(nir_dest *dest, void *void_state)
1308 {
1309    struct foreach_ssa_def_state *state = void_state;
1310 
1311    if (dest->is_ssa)
1312       return state->cb(&dest->ssa, state->client_state);
1313    else
1314       return true;
1315 }
1316 
1317 bool
nir_foreach_ssa_def(nir_instr * instr,nir_foreach_ssa_def_cb cb,void * state)1318 nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
1319 {
1320    switch (instr->type) {
1321    case nir_instr_type_alu:
1322    case nir_instr_type_deref:
1323    case nir_instr_type_tex:
1324    case nir_instr_type_intrinsic:
1325    case nir_instr_type_phi:
1326    case nir_instr_type_parallel_copy: {
1327       struct foreach_ssa_def_state foreach_state = {cb, state};
1328       return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state);
1329    }
1330 
1331    case nir_instr_type_load_const:
1332       return cb(&nir_instr_as_load_const(instr)->def, state);
1333    case nir_instr_type_ssa_undef:
1334       return cb(&nir_instr_as_ssa_undef(instr)->def, state);
1335    case nir_instr_type_call:
1336    case nir_instr_type_jump:
1337       return true;
1338    default:
1339       unreachable("Invalid instruction type");
1340    }
1341 }
1342 
1343 nir_ssa_def *
nir_instr_ssa_def(nir_instr * instr)1344 nir_instr_ssa_def(nir_instr *instr)
1345 {
1346    switch (instr->type) {
1347    case nir_instr_type_alu:
1348       assert(nir_instr_as_alu(instr)->dest.dest.is_ssa);
1349       return &nir_instr_as_alu(instr)->dest.dest.ssa;
1350 
1351    case nir_instr_type_deref:
1352       assert(nir_instr_as_deref(instr)->dest.is_ssa);
1353       return &nir_instr_as_deref(instr)->dest.ssa;
1354 
1355    case nir_instr_type_tex:
1356       assert(nir_instr_as_tex(instr)->dest.is_ssa);
1357       return &nir_instr_as_tex(instr)->dest.ssa;
1358 
1359    case nir_instr_type_intrinsic: {
1360       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1361       if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
1362          assert(intrin->dest.is_ssa);
1363          return &intrin->dest.ssa;
1364       } else {
1365          return NULL;
1366       }
1367    }
1368 
1369    case nir_instr_type_phi:
1370       assert(nir_instr_as_phi(instr)->dest.is_ssa);
1371       return &nir_instr_as_phi(instr)->dest.ssa;
1372 
1373    case nir_instr_type_parallel_copy:
1374       unreachable("Parallel copies are unsupported by this function");
1375 
1376    case nir_instr_type_load_const:
1377       return &nir_instr_as_load_const(instr)->def;
1378 
1379    case nir_instr_type_ssa_undef:
1380       return &nir_instr_as_ssa_undef(instr)->def;
1381 
1382    case nir_instr_type_call:
1383    case nir_instr_type_jump:
1384       return NULL;
1385    }
1386 
1387    unreachable("Invalid instruction type");
1388 }
1389 
1390 bool
nir_foreach_phi_src_leaving_block(nir_block * block,nir_foreach_src_cb cb,void * state)1391 nir_foreach_phi_src_leaving_block(nir_block *block,
1392                                   nir_foreach_src_cb cb,
1393                                   void *state)
1394 {
1395    for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
1396       if (block->successors[i] == NULL)
1397          continue;
1398 
1399       nir_foreach_instr(instr, block->successors[i]) {
1400          if (instr->type != nir_instr_type_phi)
1401             break;
1402 
1403          nir_phi_instr *phi = nir_instr_as_phi(instr);
1404          nir_foreach_phi_src(phi_src, phi) {
1405             if (phi_src->pred == block) {
1406                if (!cb(&phi_src->src, state))
1407                   return false;
1408             }
1409          }
1410       }
1411    }
1412 
1413    return true;
1414 }
1415 
1416 nir_const_value
nir_const_value_for_float(double f,unsigned bit_size)1417 nir_const_value_for_float(double f, unsigned bit_size)
1418 {
1419    nir_const_value v;
1420    memset(&v, 0, sizeof(v));
1421 
1422    switch (bit_size) {
1423    case 16:
1424       v.u16 = _mesa_float_to_half(f);
1425       break;
1426    case 32:
1427       v.f32 = f;
1428       break;
1429    case 64:
1430       v.f64 = f;
1431       break;
1432    default:
1433       unreachable("Invalid bit size");
1434    }
1435 
1436    return v;
1437 }
1438 
1439 double
nir_const_value_as_float(nir_const_value value,unsigned bit_size)1440 nir_const_value_as_float(nir_const_value value, unsigned bit_size)
1441 {
1442    switch (bit_size) {
1443    case 16: return _mesa_half_to_float(value.u16);
1444    case 32: return value.f32;
1445    case 64: return value.f64;
1446    default:
1447       unreachable("Invalid bit size");
1448    }
1449 }
1450 
1451 nir_const_value *
nir_src_as_const_value(nir_src src)1452 nir_src_as_const_value(nir_src src)
1453 {
1454    if (!src.is_ssa)
1455       return NULL;
1456 
1457    if (src.ssa->parent_instr->type != nir_instr_type_load_const)
1458       return NULL;
1459 
1460    nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1461 
1462    return load->value;
1463 }
1464 
1465 /**
1466  * Returns true if the source is known to be dynamically uniform. Otherwise it
1467  * returns false which means it may or may not be dynamically uniform but it
1468  * can't be determined.
1469  */
1470 bool
nir_src_is_dynamically_uniform(nir_src src)1471 nir_src_is_dynamically_uniform(nir_src src)
1472 {
1473    if (!src.is_ssa)
1474       return false;
1475 
1476    /* Constants are trivially dynamically uniform */
1477    if (src.ssa->parent_instr->type == nir_instr_type_load_const)
1478       return true;
1479 
1480    if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
1481       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
1482       /* As are uniform variables */
1483       if (intr->intrinsic == nir_intrinsic_load_uniform &&
1484           nir_src_is_dynamically_uniform(intr->src[0]))
1485          return true;
1486       /* Push constant loads always use uniform offsets. */
1487       if (intr->intrinsic == nir_intrinsic_load_push_constant)
1488          return true;
1489       if (intr->intrinsic == nir_intrinsic_load_deref &&
1490           nir_deref_mode_is(nir_src_as_deref(intr->src[0]), nir_var_mem_push_const))
1491          return true;
1492    }
1493 
1494    /* Operating together dynamically uniform expressions produces a
1495     * dynamically uniform result
1496     */
1497    if (src.ssa->parent_instr->type == nir_instr_type_alu) {
1498       nir_alu_instr *alu = nir_instr_as_alu(src.ssa->parent_instr);
1499       for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
1500          if (!nir_src_is_dynamically_uniform(alu->src[i].src))
1501             return false;
1502       }
1503 
1504       return true;
1505    }
1506 
1507    /* XXX: this could have many more tests, such as when a sampler function is
1508     * called with dynamically uniform arguments.
1509     */
1510    return false;
1511 }
1512 
1513 static void
src_remove_all_uses(nir_src * src)1514 src_remove_all_uses(nir_src *src)
1515 {
1516    for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1517       if (!src_is_valid(src))
1518          continue;
1519 
1520       list_del(&src->use_link);
1521    }
1522 }
1523 
1524 static void
src_add_all_uses(nir_src * src,nir_instr * parent_instr,nir_if * parent_if)1525 src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
1526 {
1527    for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1528       if (!src_is_valid(src))
1529          continue;
1530 
1531       if (parent_instr) {
1532          src->parent_instr = parent_instr;
1533          if (src->is_ssa)
1534             list_addtail(&src->use_link, &src->ssa->uses);
1535          else
1536             list_addtail(&src->use_link, &src->reg.reg->uses);
1537       } else {
1538          assert(parent_if);
1539          src->parent_if = parent_if;
1540          if (src->is_ssa)
1541             list_addtail(&src->use_link, &src->ssa->if_uses);
1542          else
1543             list_addtail(&src->use_link, &src->reg.reg->if_uses);
1544       }
1545    }
1546 }
1547 
1548 void
nir_instr_rewrite_src(nir_instr * instr,nir_src * src,nir_src new_src)1549 nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src)
1550 {
1551    assert(!src_is_valid(src) || src->parent_instr == instr);
1552 
1553    src_remove_all_uses(src);
1554    nir_src_copy(src, &new_src);
1555    src_add_all_uses(src, instr, NULL);
1556 }
1557 
1558 void
nir_instr_move_src(nir_instr * dest_instr,nir_src * dest,nir_src * src)1559 nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
1560 {
1561    assert(!src_is_valid(dest) || dest->parent_instr == dest_instr);
1562 
1563    src_remove_all_uses(dest);
1564    src_free_indirects(dest);
1565    src_remove_all_uses(src);
1566    *dest = *src;
1567    *src = NIR_SRC_INIT;
1568    src_add_all_uses(dest, dest_instr, NULL);
1569 }
1570 
1571 void
nir_if_rewrite_condition(nir_if * if_stmt,nir_src new_src)1572 nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
1573 {
1574    nir_src *src = &if_stmt->condition;
1575    assert(!src_is_valid(src) || src->parent_if == if_stmt);
1576 
1577    src_remove_all_uses(src);
1578    nir_src_copy(src, &new_src);
1579    src_add_all_uses(src, NULL, if_stmt);
1580 }
1581 
1582 void
nir_instr_rewrite_dest(nir_instr * instr,nir_dest * dest,nir_dest new_dest)1583 nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
1584 {
1585    if (dest->is_ssa) {
1586       /* We can only overwrite an SSA destination if it has no uses. */
1587       assert(nir_ssa_def_is_unused(&dest->ssa));
1588    } else {
1589       list_del(&dest->reg.def_link);
1590       if (dest->reg.indirect)
1591          src_remove_all_uses(dest->reg.indirect);
1592    }
1593 
1594    /* We can't re-write with an SSA def */
1595    assert(!new_dest.is_ssa);
1596 
1597    nir_dest_copy(dest, &new_dest);
1598 
1599    dest->reg.parent_instr = instr;
1600    list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs);
1601 
1602    if (dest->reg.indirect)
1603       src_add_all_uses(dest->reg.indirect, instr, NULL);
1604 }
1605 
1606 /* note: does *not* take ownership of 'name' */
1607 void
nir_ssa_def_init(nir_instr * instr,nir_ssa_def * def,unsigned num_components,unsigned bit_size)1608 nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
1609                  unsigned num_components,
1610                  unsigned bit_size)
1611 {
1612    def->parent_instr = instr;
1613    list_inithead(&def->uses);
1614    list_inithead(&def->if_uses);
1615    def->num_components = num_components;
1616    def->bit_size = bit_size;
1617    def->divergent = true; /* This is the safer default */
1618 
1619    if (instr->block) {
1620       nir_function_impl *impl =
1621          nir_cf_node_get_function(&instr->block->cf_node);
1622 
1623       def->index = impl->ssa_alloc++;
1624 
1625       impl->valid_metadata &= ~nir_metadata_live_ssa_defs;
1626    } else {
1627       def->index = UINT_MAX;
1628    }
1629 }
1630 
1631 /* note: does *not* take ownership of 'name' */
1632 void
nir_ssa_dest_init(nir_instr * instr,nir_dest * dest,unsigned num_components,unsigned bit_size,const char * name)1633 nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
1634                  unsigned num_components, unsigned bit_size,
1635                  const char *name)
1636 {
1637    dest->is_ssa = true;
1638    nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size);
1639 }
1640 
1641 void
nir_ssa_def_rewrite_uses(nir_ssa_def * def,nir_ssa_def * new_ssa)1642 nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_ssa_def *new_ssa)
1643 {
1644    assert(def != new_ssa);
1645    nir_foreach_use_safe(use_src, def)
1646       nir_instr_rewrite_src_ssa(use_src->parent_instr, use_src, new_ssa);
1647 
1648    nir_foreach_if_use_safe(use_src, def)
1649       nir_if_rewrite_condition_ssa(use_src->parent_if, use_src, new_ssa);
1650 }
1651 
1652 void
nir_ssa_def_rewrite_uses_src(nir_ssa_def * def,nir_src new_src)1653 nir_ssa_def_rewrite_uses_src(nir_ssa_def *def, nir_src new_src)
1654 {
1655    if (new_src.is_ssa) {
1656       nir_ssa_def_rewrite_uses(def, new_src.ssa);
1657    } else {
1658       nir_foreach_use_safe(use_src, def)
1659          nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
1660 
1661       nir_foreach_if_use_safe(use_src, def)
1662          nir_if_rewrite_condition(use_src->parent_if, new_src);
1663    }
1664 }
1665 
1666 static bool
is_instr_between(nir_instr * start,nir_instr * end,nir_instr * between)1667 is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
1668 {
1669    assert(start->block == end->block);
1670 
1671    if (between->block != start->block)
1672       return false;
1673 
1674    /* Search backwards looking for "between" */
1675    while (start != end) {
1676       if (between == end)
1677          return true;
1678 
1679       end = nir_instr_prev(end);
1680       assert(end);
1681    }
1682 
1683    return false;
1684 }
1685 
1686 /* Replaces all uses of the given SSA def with the given source but only if
1687  * the use comes after the after_me instruction.  This can be useful if you
1688  * are emitting code to fix up the result of some instruction: you can freely
1689  * use the result in that code and then call rewrite_uses_after and pass the
1690  * last fixup instruction as after_me and it will replace all of the uses you
1691  * want without touching the fixup code.
1692  *
1693  * This function assumes that after_me is in the same block as
1694  * def->parent_instr and that after_me comes after def->parent_instr.
1695  */
1696 void
nir_ssa_def_rewrite_uses_after(nir_ssa_def * def,nir_ssa_def * new_ssa,nir_instr * after_me)1697 nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_ssa_def *new_ssa,
1698                                nir_instr *after_me)
1699 {
1700    if (def == new_ssa)
1701       return;
1702 
1703    nir_foreach_use_safe(use_src, def) {
1704       assert(use_src->parent_instr != def->parent_instr);
1705       /* Since def already dominates all of its uses, the only way a use can
1706        * not be dominated by after_me is if it is between def and after_me in
1707        * the instruction list.
1708        */
1709       if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr))
1710          nir_instr_rewrite_src_ssa(use_src->parent_instr, use_src, new_ssa);
1711    }
1712 
1713    nir_foreach_if_use_safe(use_src, def) {
1714       nir_if_rewrite_condition_ssa(use_src->parent_if,
1715                                    &use_src->parent_if->condition,
1716                                    new_ssa);
1717    }
1718 }
1719 
1720 static nir_ssa_def *
get_store_value(nir_intrinsic_instr * intrin)1721 get_store_value(nir_intrinsic_instr *intrin)
1722 {
1723    assert(nir_intrinsic_has_write_mask(intrin));
1724    /* deref stores have the deref in src[0] and the store value in src[1] */
1725    if (intrin->intrinsic == nir_intrinsic_store_deref ||
1726        intrin->intrinsic == nir_intrinsic_store_deref_block_intel)
1727       return intrin->src[1].ssa;
1728 
1729    /* all other stores have the store value in src[0] */
1730    return intrin->src[0].ssa;
1731 }
1732 
1733 nir_component_mask_t
nir_src_components_read(const nir_src * src)1734 nir_src_components_read(const nir_src *src)
1735 {
1736    assert(src->is_ssa && src->parent_instr);
1737 
1738    if (src->parent_instr->type == nir_instr_type_alu) {
1739       nir_alu_instr *alu = nir_instr_as_alu(src->parent_instr);
1740       nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src);
1741       int src_idx = alu_src - &alu->src[0];
1742       assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
1743       return nir_alu_instr_src_read_mask(alu, src_idx);
1744    } else if (src->parent_instr->type == nir_instr_type_intrinsic) {
1745       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src->parent_instr);
1746       if (nir_intrinsic_has_write_mask(intrin) && src->ssa == get_store_value(intrin))
1747          return nir_intrinsic_write_mask(intrin);
1748       else
1749          return (1 << src->ssa->num_components) - 1;
1750    } else {
1751       return (1 << src->ssa->num_components) - 1;
1752    }
1753 }
1754 
1755 nir_component_mask_t
nir_ssa_def_components_read(const nir_ssa_def * def)1756 nir_ssa_def_components_read(const nir_ssa_def *def)
1757 {
1758    nir_component_mask_t read_mask = 0;
1759 
1760    if (!list_is_empty(&def->if_uses))
1761       read_mask |= 1;
1762 
1763    nir_foreach_use(use, def) {
1764       read_mask |= nir_src_components_read(use);
1765       if (read_mask == (1 << def->num_components) - 1)
1766          return read_mask;
1767    }
1768 
1769    return read_mask;
1770 }
1771 
1772 nir_block *
nir_block_unstructured_next(nir_block * block)1773 nir_block_unstructured_next(nir_block *block)
1774 {
1775    if (block == NULL) {
1776       /* nir_foreach_block_unstructured_safe() will call this function on a
1777        * NULL block after the last iteration, but it won't use the result so
1778        * just return NULL here.
1779        */
1780       return NULL;
1781    }
1782 
1783    nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1784    if (cf_next == NULL && block->cf_node.parent->type == nir_cf_node_function)
1785       return NULL;
1786 
1787    if (cf_next && cf_next->type == nir_cf_node_block)
1788       return nir_cf_node_as_block(cf_next);
1789 
1790    return nir_block_cf_tree_next(block);
1791 }
1792 
1793 nir_block *
nir_unstructured_start_block(nir_function_impl * impl)1794 nir_unstructured_start_block(nir_function_impl *impl)
1795 {
1796    return nir_start_block(impl);
1797 }
1798 
1799 nir_block *
nir_block_cf_tree_next(nir_block * block)1800 nir_block_cf_tree_next(nir_block *block)
1801 {
1802    if (block == NULL) {
1803       /* nir_foreach_block_safe() will call this function on a NULL block
1804        * after the last iteration, but it won't use the result so just return
1805        * NULL here.
1806        */
1807       return NULL;
1808    }
1809 
1810    assert(nir_cf_node_get_function(&block->cf_node)->structured);
1811 
1812    nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1813    if (cf_next)
1814       return nir_cf_node_cf_tree_first(cf_next);
1815 
1816    nir_cf_node *parent = block->cf_node.parent;
1817 
1818    switch (parent->type) {
1819    case nir_cf_node_if: {
1820       /* Are we at the end of the if? Go to the beginning of the else */
1821       nir_if *if_stmt = nir_cf_node_as_if(parent);
1822       if (block == nir_if_last_then_block(if_stmt))
1823          return nir_if_first_else_block(if_stmt);
1824 
1825       assert(block == nir_if_last_else_block(if_stmt));
1826    }
1827    FALLTHROUGH;
1828 
1829    case nir_cf_node_loop:
1830       return nir_cf_node_as_block(nir_cf_node_next(parent));
1831 
1832    case nir_cf_node_function:
1833       return NULL;
1834 
1835    default:
1836       unreachable("unknown cf node type");
1837    }
1838 }
1839 
1840 nir_block *
nir_block_cf_tree_prev(nir_block * block)1841 nir_block_cf_tree_prev(nir_block *block)
1842 {
1843    if (block == NULL) {
1844       /* do this for consistency with nir_block_cf_tree_next() */
1845       return NULL;
1846    }
1847 
1848    assert(nir_cf_node_get_function(&block->cf_node)->structured);
1849 
1850    nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
1851    if (cf_prev)
1852       return nir_cf_node_cf_tree_last(cf_prev);
1853 
1854    nir_cf_node *parent = block->cf_node.parent;
1855 
1856    switch (parent->type) {
1857    case nir_cf_node_if: {
1858       /* Are we at the beginning of the else? Go to the end of the if */
1859       nir_if *if_stmt = nir_cf_node_as_if(parent);
1860       if (block == nir_if_first_else_block(if_stmt))
1861          return nir_if_last_then_block(if_stmt);
1862 
1863       assert(block == nir_if_first_then_block(if_stmt));
1864    }
1865    FALLTHROUGH;
1866 
1867    case nir_cf_node_loop:
1868       return nir_cf_node_as_block(nir_cf_node_prev(parent));
1869 
1870    case nir_cf_node_function:
1871       return NULL;
1872 
1873    default:
1874       unreachable("unknown cf node type");
1875    }
1876 }
1877 
nir_cf_node_cf_tree_first(nir_cf_node * node)1878 nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node)
1879 {
1880    switch (node->type) {
1881    case nir_cf_node_function: {
1882       nir_function_impl *impl = nir_cf_node_as_function(node);
1883       return nir_start_block(impl);
1884    }
1885 
1886    case nir_cf_node_if: {
1887       nir_if *if_stmt = nir_cf_node_as_if(node);
1888       return nir_if_first_then_block(if_stmt);
1889    }
1890 
1891    case nir_cf_node_loop: {
1892       nir_loop *loop = nir_cf_node_as_loop(node);
1893       return nir_loop_first_block(loop);
1894    }
1895 
1896    case nir_cf_node_block: {
1897       return nir_cf_node_as_block(node);
1898    }
1899 
1900    default:
1901       unreachable("unknown node type");
1902    }
1903 }
1904 
nir_cf_node_cf_tree_last(nir_cf_node * node)1905 nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node)
1906 {
1907    switch (node->type) {
1908    case nir_cf_node_function: {
1909       nir_function_impl *impl = nir_cf_node_as_function(node);
1910       return nir_impl_last_block(impl);
1911    }
1912 
1913    case nir_cf_node_if: {
1914       nir_if *if_stmt = nir_cf_node_as_if(node);
1915       return nir_if_last_else_block(if_stmt);
1916    }
1917 
1918    case nir_cf_node_loop: {
1919       nir_loop *loop = nir_cf_node_as_loop(node);
1920       return nir_loop_last_block(loop);
1921    }
1922 
1923    case nir_cf_node_block: {
1924       return nir_cf_node_as_block(node);
1925    }
1926 
1927    default:
1928       unreachable("unknown node type");
1929    }
1930 }
1931 
nir_cf_node_cf_tree_next(nir_cf_node * node)1932 nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node)
1933 {
1934    if (node->type == nir_cf_node_block)
1935       return nir_block_cf_tree_next(nir_cf_node_as_block(node));
1936    else if (node->type == nir_cf_node_function)
1937       return NULL;
1938    else
1939       return nir_cf_node_as_block(nir_cf_node_next(node));
1940 }
1941 
1942 nir_if *
nir_block_get_following_if(nir_block * block)1943 nir_block_get_following_if(nir_block *block)
1944 {
1945    if (exec_node_is_tail_sentinel(&block->cf_node.node))
1946       return NULL;
1947 
1948    if (nir_cf_node_is_last(&block->cf_node))
1949       return NULL;
1950 
1951    nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1952 
1953    if (next_node->type != nir_cf_node_if)
1954       return NULL;
1955 
1956    return nir_cf_node_as_if(next_node);
1957 }
1958 
1959 nir_loop *
nir_block_get_following_loop(nir_block * block)1960 nir_block_get_following_loop(nir_block *block)
1961 {
1962    if (exec_node_is_tail_sentinel(&block->cf_node.node))
1963       return NULL;
1964 
1965    if (nir_cf_node_is_last(&block->cf_node))
1966       return NULL;
1967 
1968    nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1969 
1970    if (next_node->type != nir_cf_node_loop)
1971       return NULL;
1972 
1973    return nir_cf_node_as_loop(next_node);
1974 }
1975 
1976 static int
compare_block_index(const void * p1,const void * p2)1977 compare_block_index(const void *p1, const void *p2)
1978 {
1979    const nir_block *block1 = *((const nir_block **) p1);
1980    const nir_block *block2 = *((const nir_block **) p2);
1981 
1982    return (int) block1->index - (int) block2->index;
1983 }
1984 
1985 nir_block **
nir_block_get_predecessors_sorted(const nir_block * block,void * mem_ctx)1986 nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx)
1987 {
1988    nir_block **preds =
1989       ralloc_array(mem_ctx, nir_block *, block->predecessors->entries);
1990 
1991    unsigned i = 0;
1992    set_foreach(block->predecessors, entry)
1993       preds[i++] = (nir_block *) entry->key;
1994    assert(i == block->predecessors->entries);
1995 
1996    qsort(preds, block->predecessors->entries, sizeof(nir_block *),
1997          compare_block_index);
1998 
1999    return preds;
2000 }
2001 
2002 void
nir_index_blocks(nir_function_impl * impl)2003 nir_index_blocks(nir_function_impl *impl)
2004 {
2005    unsigned index = 0;
2006 
2007    if (impl->valid_metadata & nir_metadata_block_index)
2008       return;
2009 
2010    nir_foreach_block_unstructured(block, impl) {
2011       block->index = index++;
2012    }
2013 
2014    /* The end_block isn't really part of the program, which is why its index
2015     * is >= num_blocks.
2016     */
2017    impl->num_blocks = impl->end_block->index = index;
2018 }
2019 
2020 static bool
index_ssa_def_cb(nir_ssa_def * def,void * state)2021 index_ssa_def_cb(nir_ssa_def *def, void *state)
2022 {
2023    unsigned *index = (unsigned *) state;
2024    def->index = (*index)++;
2025 
2026    return true;
2027 }
2028 
2029 /**
2030  * The indices are applied top-to-bottom which has the very nice property
2031  * that, if A dominates B, then A->index <= B->index.
2032  */
2033 void
nir_index_ssa_defs(nir_function_impl * impl)2034 nir_index_ssa_defs(nir_function_impl *impl)
2035 {
2036    unsigned index = 0;
2037 
2038    impl->valid_metadata &= ~nir_metadata_live_ssa_defs;
2039 
2040    nir_foreach_block_unstructured(block, impl) {
2041       nir_foreach_instr(instr, block)
2042          nir_foreach_ssa_def(instr, index_ssa_def_cb, &index);
2043    }
2044 
2045    impl->ssa_alloc = index;
2046 }
2047 
2048 /**
2049  * The indices are applied top-to-bottom which has the very nice property
2050  * that, if A dominates B, then A->index <= B->index.
2051  */
2052 unsigned
nir_index_instrs(nir_function_impl * impl)2053 nir_index_instrs(nir_function_impl *impl)
2054 {
2055    unsigned index = 0;
2056 
2057    nir_foreach_block(block, impl) {
2058       block->start_ip = index++;
2059 
2060       nir_foreach_instr(instr, block)
2061          instr->index = index++;
2062 
2063       block->end_ip = index++;
2064    }
2065 
2066    return index;
2067 }
2068 
2069 unsigned
nir_shader_index_vars(nir_shader * shader,nir_variable_mode modes)2070 nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes)
2071 {
2072    unsigned count = 0;
2073    nir_foreach_variable_with_modes(var, shader, modes)
2074       var->index = count++;
2075    return count;
2076 }
2077 
2078 unsigned
nir_function_impl_index_vars(nir_function_impl * impl)2079 nir_function_impl_index_vars(nir_function_impl *impl)
2080 {
2081    unsigned count = 0;
2082    nir_foreach_function_temp_variable(var, impl)
2083       var->index = count++;
2084    return count;
2085 }
2086 
2087 static nir_instr *
cursor_next_instr(nir_cursor cursor)2088 cursor_next_instr(nir_cursor cursor)
2089 {
2090    switch (cursor.option) {
2091    case nir_cursor_before_block:
2092       for (nir_block *block = cursor.block; block;
2093            block = nir_block_cf_tree_next(block)) {
2094          nir_instr *instr = nir_block_first_instr(block);
2095          if (instr)
2096             return instr;
2097       }
2098       return NULL;
2099 
2100    case nir_cursor_after_block:
2101       cursor.block = nir_block_cf_tree_next(cursor.block);
2102       if (cursor.block == NULL)
2103          return NULL;
2104 
2105       cursor.option = nir_cursor_before_block;
2106       return cursor_next_instr(cursor);
2107 
2108    case nir_cursor_before_instr:
2109       return cursor.instr;
2110 
2111    case nir_cursor_after_instr:
2112       if (nir_instr_next(cursor.instr))
2113          return nir_instr_next(cursor.instr);
2114 
2115       cursor.option = nir_cursor_after_block;
2116       cursor.block = cursor.instr->block;
2117       return cursor_next_instr(cursor);
2118    }
2119 
2120    unreachable("Inavlid cursor option");
2121 }
2122 
2123 ASSERTED static bool
dest_is_ssa(nir_dest * dest,void * _state)2124 dest_is_ssa(nir_dest *dest, void *_state)
2125 {
2126    (void) _state;
2127    return dest->is_ssa;
2128 }
2129 
2130 bool
nir_function_impl_lower_instructions(nir_function_impl * impl,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2131 nir_function_impl_lower_instructions(nir_function_impl *impl,
2132                                      nir_instr_filter_cb filter,
2133                                      nir_lower_instr_cb lower,
2134                                      void *cb_data)
2135 {
2136    nir_builder b;
2137    nir_builder_init(&b, impl);
2138 
2139    nir_metadata preserved = nir_metadata_block_index |
2140                             nir_metadata_dominance;
2141 
2142    bool progress = false;
2143    nir_cursor iter = nir_before_cf_list(&impl->body);
2144    nir_instr *instr;
2145    while ((instr = cursor_next_instr(iter)) != NULL) {
2146       if (filter && !filter(instr, cb_data)) {
2147          iter = nir_after_instr(instr);
2148          continue;
2149       }
2150 
2151       assert(nir_foreach_dest(instr, dest_is_ssa, NULL));
2152       nir_ssa_def *old_def = nir_instr_ssa_def(instr);
2153       struct list_head old_uses, old_if_uses;
2154       if (old_def != NULL) {
2155          /* We're about to ask the callback to generate a replacement for instr.
2156           * Save off the uses from instr's SSA def so we know what uses to
2157           * rewrite later.  If we use nir_ssa_def_rewrite_uses, it fails in the
2158           * case where the generated replacement code uses the result of instr
2159           * itself.  If we use nir_ssa_def_rewrite_uses_after (which is the
2160           * normal solution to this problem), it doesn't work well if control-
2161           * flow is inserted as part of the replacement, doesn't handle cases
2162           * where the replacement is something consumed by instr, and suffers
2163           * from performance issues.  This is the only way to 100% guarantee
2164           * that we rewrite the correct set efficiently.
2165           */
2166 
2167          list_replace(&old_def->uses, &old_uses);
2168          list_inithead(&old_def->uses);
2169          list_replace(&old_def->if_uses, &old_if_uses);
2170          list_inithead(&old_def->if_uses);
2171       }
2172 
2173       b.cursor = nir_after_instr(instr);
2174       nir_ssa_def *new_def = lower(&b, instr, cb_data);
2175       if (new_def && new_def != NIR_LOWER_INSTR_PROGRESS &&
2176           new_def != NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2177          assert(old_def != NULL);
2178          if (new_def->parent_instr->block != instr->block)
2179             preserved = nir_metadata_none;
2180 
2181          nir_src new_src = nir_src_for_ssa(new_def);
2182          list_for_each_entry_safe(nir_src, use_src, &old_uses, use_link)
2183             nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
2184 
2185          list_for_each_entry_safe(nir_src, use_src, &old_if_uses, use_link)
2186             nir_if_rewrite_condition(use_src->parent_if, new_src);
2187 
2188          if (nir_ssa_def_is_unused(old_def)) {
2189             iter = nir_instr_free_and_dce(instr);
2190          } else {
2191             iter = nir_after_instr(instr);
2192          }
2193          progress = true;
2194       } else {
2195          /* We didn't end up lowering after all.  Put the uses back */
2196          if (old_def) {
2197             list_replace(&old_uses, &old_def->uses);
2198             list_replace(&old_if_uses, &old_def->if_uses);
2199          }
2200          if (new_def == NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2201             /* Only instructions without a return value can be removed like this */
2202             assert(!old_def);
2203             iter = nir_instr_free_and_dce(instr);
2204             progress = true;
2205          } else
2206             iter = nir_after_instr(instr);
2207 
2208          if (new_def == NIR_LOWER_INSTR_PROGRESS)
2209             progress = true;
2210       }
2211    }
2212 
2213    if (progress) {
2214       nir_metadata_preserve(impl, preserved);
2215    } else {
2216       nir_metadata_preserve(impl, nir_metadata_all);
2217    }
2218 
2219    return progress;
2220 }
2221 
2222 bool
nir_shader_lower_instructions(nir_shader * shader,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2223 nir_shader_lower_instructions(nir_shader *shader,
2224                               nir_instr_filter_cb filter,
2225                               nir_lower_instr_cb lower,
2226                               void *cb_data)
2227 {
2228    bool progress = false;
2229 
2230    nir_foreach_function(function, shader) {
2231       if (function->impl &&
2232           nir_function_impl_lower_instructions(function->impl,
2233                                                filter, lower, cb_data))
2234          progress = true;
2235    }
2236 
2237    return progress;
2238 }
2239 
2240 /**
2241  * Returns true if the shader supports quad-based implicit derivatives on
2242  * texture sampling.
2243  */
nir_shader_supports_implicit_lod(nir_shader * shader)2244 bool nir_shader_supports_implicit_lod(nir_shader *shader)
2245 {
2246    return (shader->info.stage == MESA_SHADER_FRAGMENT ||
2247            (shader->info.stage == MESA_SHADER_COMPUTE &&
2248             shader->info.cs.derivative_group != DERIVATIVE_GROUP_NONE));
2249 }
2250 
2251 nir_intrinsic_op
nir_intrinsic_from_system_value(gl_system_value val)2252 nir_intrinsic_from_system_value(gl_system_value val)
2253 {
2254    switch (val) {
2255    case SYSTEM_VALUE_VERTEX_ID:
2256       return nir_intrinsic_load_vertex_id;
2257    case SYSTEM_VALUE_INSTANCE_ID:
2258       return nir_intrinsic_load_instance_id;
2259    case SYSTEM_VALUE_DRAW_ID:
2260       return nir_intrinsic_load_draw_id;
2261    case SYSTEM_VALUE_BASE_INSTANCE:
2262       return nir_intrinsic_load_base_instance;
2263    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
2264       return nir_intrinsic_load_vertex_id_zero_base;
2265    case SYSTEM_VALUE_IS_INDEXED_DRAW:
2266       return nir_intrinsic_load_is_indexed_draw;
2267    case SYSTEM_VALUE_FIRST_VERTEX:
2268       return nir_intrinsic_load_first_vertex;
2269    case SYSTEM_VALUE_BASE_VERTEX:
2270       return nir_intrinsic_load_base_vertex;
2271    case SYSTEM_VALUE_INVOCATION_ID:
2272       return nir_intrinsic_load_invocation_id;
2273    case SYSTEM_VALUE_FRAG_COORD:
2274       return nir_intrinsic_load_frag_coord;
2275    case SYSTEM_VALUE_POINT_COORD:
2276       return nir_intrinsic_load_point_coord;
2277    case SYSTEM_VALUE_LINE_COORD:
2278       return nir_intrinsic_load_line_coord;
2279    case SYSTEM_VALUE_FRONT_FACE:
2280       return nir_intrinsic_load_front_face;
2281    case SYSTEM_VALUE_SAMPLE_ID:
2282       return nir_intrinsic_load_sample_id;
2283    case SYSTEM_VALUE_SAMPLE_POS:
2284       return nir_intrinsic_load_sample_pos;
2285    case SYSTEM_VALUE_SAMPLE_MASK_IN:
2286       return nir_intrinsic_load_sample_mask_in;
2287    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
2288       return nir_intrinsic_load_local_invocation_id;
2289    case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
2290       return nir_intrinsic_load_local_invocation_index;
2291    case SYSTEM_VALUE_WORKGROUP_ID:
2292       return nir_intrinsic_load_workgroup_id;
2293    case SYSTEM_VALUE_NUM_WORKGROUPS:
2294       return nir_intrinsic_load_num_workgroups;
2295    case SYSTEM_VALUE_PRIMITIVE_ID:
2296       return nir_intrinsic_load_primitive_id;
2297    case SYSTEM_VALUE_TESS_COORD:
2298       return nir_intrinsic_load_tess_coord;
2299    case SYSTEM_VALUE_TESS_LEVEL_OUTER:
2300       return nir_intrinsic_load_tess_level_outer;
2301    case SYSTEM_VALUE_TESS_LEVEL_INNER:
2302       return nir_intrinsic_load_tess_level_inner;
2303    case SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT:
2304       return nir_intrinsic_load_tess_level_outer_default;
2305    case SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT:
2306       return nir_intrinsic_load_tess_level_inner_default;
2307    case SYSTEM_VALUE_VERTICES_IN:
2308       return nir_intrinsic_load_patch_vertices_in;
2309    case SYSTEM_VALUE_HELPER_INVOCATION:
2310       return nir_intrinsic_load_helper_invocation;
2311    case SYSTEM_VALUE_COLOR0:
2312       return nir_intrinsic_load_color0;
2313    case SYSTEM_VALUE_COLOR1:
2314       return nir_intrinsic_load_color1;
2315    case SYSTEM_VALUE_VIEW_INDEX:
2316       return nir_intrinsic_load_view_index;
2317    case SYSTEM_VALUE_SUBGROUP_SIZE:
2318       return nir_intrinsic_load_subgroup_size;
2319    case SYSTEM_VALUE_SUBGROUP_INVOCATION:
2320       return nir_intrinsic_load_subgroup_invocation;
2321    case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
2322       return nir_intrinsic_load_subgroup_eq_mask;
2323    case SYSTEM_VALUE_SUBGROUP_GE_MASK:
2324       return nir_intrinsic_load_subgroup_ge_mask;
2325    case SYSTEM_VALUE_SUBGROUP_GT_MASK:
2326       return nir_intrinsic_load_subgroup_gt_mask;
2327    case SYSTEM_VALUE_SUBGROUP_LE_MASK:
2328       return nir_intrinsic_load_subgroup_le_mask;
2329    case SYSTEM_VALUE_SUBGROUP_LT_MASK:
2330       return nir_intrinsic_load_subgroup_lt_mask;
2331    case SYSTEM_VALUE_NUM_SUBGROUPS:
2332       return nir_intrinsic_load_num_subgroups;
2333    case SYSTEM_VALUE_SUBGROUP_ID:
2334       return nir_intrinsic_load_subgroup_id;
2335    case SYSTEM_VALUE_WORKGROUP_SIZE:
2336       return nir_intrinsic_load_workgroup_size;
2337    case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
2338       return nir_intrinsic_load_global_invocation_id;
2339    case SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID:
2340       return nir_intrinsic_load_base_global_invocation_id;
2341    case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX:
2342       return nir_intrinsic_load_global_invocation_index;
2343    case SYSTEM_VALUE_WORK_DIM:
2344       return nir_intrinsic_load_work_dim;
2345    case SYSTEM_VALUE_USER_DATA_AMD:
2346       return nir_intrinsic_load_user_data_amd;
2347    case SYSTEM_VALUE_RAY_LAUNCH_ID:
2348       return nir_intrinsic_load_ray_launch_id;
2349    case SYSTEM_VALUE_RAY_LAUNCH_SIZE:
2350       return nir_intrinsic_load_ray_launch_size;
2351    case SYSTEM_VALUE_RAY_WORLD_ORIGIN:
2352       return nir_intrinsic_load_ray_world_origin;
2353    case SYSTEM_VALUE_RAY_WORLD_DIRECTION:
2354       return nir_intrinsic_load_ray_world_direction;
2355    case SYSTEM_VALUE_RAY_OBJECT_ORIGIN:
2356       return nir_intrinsic_load_ray_object_origin;
2357    case SYSTEM_VALUE_RAY_OBJECT_DIRECTION:
2358       return nir_intrinsic_load_ray_object_direction;
2359    case SYSTEM_VALUE_RAY_T_MIN:
2360       return nir_intrinsic_load_ray_t_min;
2361    case SYSTEM_VALUE_RAY_T_MAX:
2362       return nir_intrinsic_load_ray_t_max;
2363    case SYSTEM_VALUE_RAY_OBJECT_TO_WORLD:
2364       return nir_intrinsic_load_ray_object_to_world;
2365    case SYSTEM_VALUE_RAY_WORLD_TO_OBJECT:
2366       return nir_intrinsic_load_ray_world_to_object;
2367    case SYSTEM_VALUE_RAY_HIT_KIND:
2368       return nir_intrinsic_load_ray_hit_kind;
2369    case SYSTEM_VALUE_RAY_FLAGS:
2370       return nir_intrinsic_load_ray_flags;
2371    case SYSTEM_VALUE_RAY_GEOMETRY_INDEX:
2372       return nir_intrinsic_load_ray_geometry_index;
2373    case SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX:
2374       return nir_intrinsic_load_ray_instance_custom_index;
2375    case SYSTEM_VALUE_FRAG_SHADING_RATE:
2376       return nir_intrinsic_load_frag_shading_rate;
2377    default:
2378       unreachable("system value does not directly correspond to intrinsic");
2379    }
2380 }
2381 
2382 gl_system_value
nir_system_value_from_intrinsic(nir_intrinsic_op intrin)2383 nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
2384 {
2385    switch (intrin) {
2386    case nir_intrinsic_load_vertex_id:
2387       return SYSTEM_VALUE_VERTEX_ID;
2388    case nir_intrinsic_load_instance_id:
2389       return SYSTEM_VALUE_INSTANCE_ID;
2390    case nir_intrinsic_load_draw_id:
2391       return SYSTEM_VALUE_DRAW_ID;
2392    case nir_intrinsic_load_base_instance:
2393       return SYSTEM_VALUE_BASE_INSTANCE;
2394    case nir_intrinsic_load_vertex_id_zero_base:
2395       return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
2396    case nir_intrinsic_load_first_vertex:
2397       return SYSTEM_VALUE_FIRST_VERTEX;
2398    case nir_intrinsic_load_is_indexed_draw:
2399       return SYSTEM_VALUE_IS_INDEXED_DRAW;
2400    case nir_intrinsic_load_base_vertex:
2401       return SYSTEM_VALUE_BASE_VERTEX;
2402    case nir_intrinsic_load_invocation_id:
2403       return SYSTEM_VALUE_INVOCATION_ID;
2404    case nir_intrinsic_load_frag_coord:
2405       return SYSTEM_VALUE_FRAG_COORD;
2406    case nir_intrinsic_load_point_coord:
2407       return SYSTEM_VALUE_POINT_COORD;
2408    case nir_intrinsic_load_line_coord:
2409       return SYSTEM_VALUE_LINE_COORD;
2410    case nir_intrinsic_load_front_face:
2411       return SYSTEM_VALUE_FRONT_FACE;
2412    case nir_intrinsic_load_sample_id:
2413       return SYSTEM_VALUE_SAMPLE_ID;
2414    case nir_intrinsic_load_sample_pos:
2415       return SYSTEM_VALUE_SAMPLE_POS;
2416    case nir_intrinsic_load_sample_mask_in:
2417       return SYSTEM_VALUE_SAMPLE_MASK_IN;
2418    case nir_intrinsic_load_local_invocation_id:
2419       return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
2420    case nir_intrinsic_load_local_invocation_index:
2421       return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
2422    case nir_intrinsic_load_num_workgroups:
2423       return SYSTEM_VALUE_NUM_WORKGROUPS;
2424    case nir_intrinsic_load_workgroup_id:
2425       return SYSTEM_VALUE_WORKGROUP_ID;
2426    case nir_intrinsic_load_primitive_id:
2427       return SYSTEM_VALUE_PRIMITIVE_ID;
2428    case nir_intrinsic_load_tess_coord:
2429       return SYSTEM_VALUE_TESS_COORD;
2430    case nir_intrinsic_load_tess_level_outer:
2431       return SYSTEM_VALUE_TESS_LEVEL_OUTER;
2432    case nir_intrinsic_load_tess_level_inner:
2433       return SYSTEM_VALUE_TESS_LEVEL_INNER;
2434    case nir_intrinsic_load_tess_level_outer_default:
2435       return SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT;
2436    case nir_intrinsic_load_tess_level_inner_default:
2437       return SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT;
2438    case nir_intrinsic_load_patch_vertices_in:
2439       return SYSTEM_VALUE_VERTICES_IN;
2440    case nir_intrinsic_load_helper_invocation:
2441       return SYSTEM_VALUE_HELPER_INVOCATION;
2442    case nir_intrinsic_load_color0:
2443       return SYSTEM_VALUE_COLOR0;
2444    case nir_intrinsic_load_color1:
2445       return SYSTEM_VALUE_COLOR1;
2446    case nir_intrinsic_load_view_index:
2447       return SYSTEM_VALUE_VIEW_INDEX;
2448    case nir_intrinsic_load_subgroup_size:
2449       return SYSTEM_VALUE_SUBGROUP_SIZE;
2450    case nir_intrinsic_load_subgroup_invocation:
2451       return SYSTEM_VALUE_SUBGROUP_INVOCATION;
2452    case nir_intrinsic_load_subgroup_eq_mask:
2453       return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
2454    case nir_intrinsic_load_subgroup_ge_mask:
2455       return SYSTEM_VALUE_SUBGROUP_GE_MASK;
2456    case nir_intrinsic_load_subgroup_gt_mask:
2457       return SYSTEM_VALUE_SUBGROUP_GT_MASK;
2458    case nir_intrinsic_load_subgroup_le_mask:
2459       return SYSTEM_VALUE_SUBGROUP_LE_MASK;
2460    case nir_intrinsic_load_subgroup_lt_mask:
2461       return SYSTEM_VALUE_SUBGROUP_LT_MASK;
2462    case nir_intrinsic_load_num_subgroups:
2463       return SYSTEM_VALUE_NUM_SUBGROUPS;
2464    case nir_intrinsic_load_subgroup_id:
2465       return SYSTEM_VALUE_SUBGROUP_ID;
2466    case nir_intrinsic_load_workgroup_size:
2467       return SYSTEM_VALUE_WORKGROUP_SIZE;
2468    case nir_intrinsic_load_global_invocation_id:
2469       return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
2470    case nir_intrinsic_load_base_global_invocation_id:
2471       return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID;
2472    case nir_intrinsic_load_global_invocation_index:
2473       return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX;
2474    case nir_intrinsic_load_work_dim:
2475       return SYSTEM_VALUE_WORK_DIM;
2476    case nir_intrinsic_load_user_data_amd:
2477       return SYSTEM_VALUE_USER_DATA_AMD;
2478    case nir_intrinsic_load_barycentric_model:
2479       return SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL;
2480    case nir_intrinsic_load_gs_header_ir3:
2481       return SYSTEM_VALUE_GS_HEADER_IR3;
2482    case nir_intrinsic_load_tcs_header_ir3:
2483       return SYSTEM_VALUE_TCS_HEADER_IR3;
2484    case nir_intrinsic_load_ray_launch_id:
2485       return SYSTEM_VALUE_RAY_LAUNCH_ID;
2486    case nir_intrinsic_load_ray_launch_size:
2487       return SYSTEM_VALUE_RAY_LAUNCH_SIZE;
2488    case nir_intrinsic_load_ray_world_origin:
2489       return SYSTEM_VALUE_RAY_WORLD_ORIGIN;
2490    case nir_intrinsic_load_ray_world_direction:
2491       return SYSTEM_VALUE_RAY_WORLD_DIRECTION;
2492    case nir_intrinsic_load_ray_object_origin:
2493       return SYSTEM_VALUE_RAY_OBJECT_ORIGIN;
2494    case nir_intrinsic_load_ray_object_direction:
2495       return SYSTEM_VALUE_RAY_OBJECT_DIRECTION;
2496    case nir_intrinsic_load_ray_t_min:
2497       return SYSTEM_VALUE_RAY_T_MIN;
2498    case nir_intrinsic_load_ray_t_max:
2499       return SYSTEM_VALUE_RAY_T_MAX;
2500    case nir_intrinsic_load_ray_object_to_world:
2501       return SYSTEM_VALUE_RAY_OBJECT_TO_WORLD;
2502    case nir_intrinsic_load_ray_world_to_object:
2503       return SYSTEM_VALUE_RAY_WORLD_TO_OBJECT;
2504    case nir_intrinsic_load_ray_hit_kind:
2505       return SYSTEM_VALUE_RAY_HIT_KIND;
2506    case nir_intrinsic_load_ray_flags:
2507       return SYSTEM_VALUE_RAY_FLAGS;
2508    case nir_intrinsic_load_ray_geometry_index:
2509       return SYSTEM_VALUE_RAY_GEOMETRY_INDEX;
2510    case nir_intrinsic_load_ray_instance_custom_index:
2511       return SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX;
2512    case nir_intrinsic_load_frag_shading_rate:
2513       return SYSTEM_VALUE_FRAG_SHADING_RATE;
2514    default:
2515       unreachable("intrinsic doesn't produce a system value");
2516    }
2517 }
2518 
2519 /* OpenGL utility method that remaps the location attributes if they are
2520  * doubles. Not needed for vulkan due the differences on the input location
2521  * count for doubles on vulkan vs OpenGL
2522  *
2523  * The bitfield returned in dual_slot is one bit for each double input slot in
2524  * the original OpenGL single-slot input numbering.  The mapping from old
2525  * locations to new locations is as follows:
2526  *
2527  *    new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc))
2528  */
2529 void
nir_remap_dual_slot_attributes(nir_shader * shader,uint64_t * dual_slot)2530 nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot)
2531 {
2532    assert(shader->info.stage == MESA_SHADER_VERTEX);
2533 
2534    *dual_slot = 0;
2535    nir_foreach_shader_in_variable(var, shader) {
2536       if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
2537          unsigned slots = glsl_count_attribute_slots(var->type, true);
2538          *dual_slot |= BITFIELD64_MASK(slots) << var->data.location;
2539       }
2540    }
2541 
2542    nir_foreach_shader_in_variable(var, shader) {
2543       var->data.location +=
2544          util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location));
2545    }
2546 }
2547 
2548 /* Returns an attribute mask that has been re-compacted using the given
2549  * dual_slot mask.
2550  */
2551 uint64_t
nir_get_single_slot_attribs_mask(uint64_t attribs,uint64_t dual_slot)2552 nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot)
2553 {
2554    while (dual_slot) {
2555       unsigned loc = u_bit_scan64(&dual_slot);
2556       /* mask of all bits up to and including loc */
2557       uint64_t mask = BITFIELD64_MASK(loc + 1);
2558       attribs = (attribs & mask) | ((attribs & ~mask) >> 1);
2559    }
2560    return attribs;
2561 }
2562 
2563 void
nir_rewrite_image_intrinsic(nir_intrinsic_instr * intrin,nir_ssa_def * src,bool bindless)2564 nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *src,
2565                             bool bindless)
2566 {
2567    enum gl_access_qualifier access = nir_intrinsic_access(intrin);
2568 
2569    /* Image intrinsics only have one of these */
2570    assert(!nir_intrinsic_has_src_type(intrin) ||
2571           !nir_intrinsic_has_dest_type(intrin));
2572 
2573    nir_alu_type data_type = nir_type_invalid;
2574    if (nir_intrinsic_has_src_type(intrin))
2575       data_type = nir_intrinsic_src_type(intrin);
2576    if (nir_intrinsic_has_dest_type(intrin))
2577       data_type = nir_intrinsic_dest_type(intrin);
2578 
2579    switch (intrin->intrinsic) {
2580 #define CASE(op) \
2581    case nir_intrinsic_image_deref_##op: \
2582       intrin->intrinsic = bindless ? nir_intrinsic_bindless_image_##op \
2583                                    : nir_intrinsic_image_##op; \
2584       break;
2585    CASE(load)
2586    CASE(sparse_load)
2587    CASE(store)
2588    CASE(atomic_add)
2589    CASE(atomic_imin)
2590    CASE(atomic_umin)
2591    CASE(atomic_imax)
2592    CASE(atomic_umax)
2593    CASE(atomic_and)
2594    CASE(atomic_or)
2595    CASE(atomic_xor)
2596    CASE(atomic_exchange)
2597    CASE(atomic_comp_swap)
2598    CASE(atomic_fadd)
2599    CASE(atomic_fmin)
2600    CASE(atomic_fmax)
2601    CASE(atomic_inc_wrap)
2602    CASE(atomic_dec_wrap)
2603    CASE(size)
2604    CASE(samples)
2605    CASE(load_raw_intel)
2606    CASE(store_raw_intel)
2607 #undef CASE
2608    default:
2609       unreachable("Unhanded image intrinsic");
2610    }
2611 
2612    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2613    nir_variable *var = nir_deref_instr_get_variable(deref);
2614 
2615    /* Only update the format if the intrinsic doesn't have one set */
2616    if (nir_intrinsic_format(intrin) == PIPE_FORMAT_NONE)
2617       nir_intrinsic_set_format(intrin, var->data.image.format);
2618 
2619    nir_intrinsic_set_access(intrin, access | var->data.access);
2620    if (nir_intrinsic_has_src_type(intrin))
2621       nir_intrinsic_set_src_type(intrin, data_type);
2622    if (nir_intrinsic_has_dest_type(intrin))
2623       nir_intrinsic_set_dest_type(intrin, data_type);
2624 
2625    nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
2626                          nir_src_for_ssa(src));
2627 }
2628 
2629 unsigned
nir_image_intrinsic_coord_components(const nir_intrinsic_instr * instr)2630 nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr)
2631 {
2632    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
2633    int coords = glsl_get_sampler_dim_coordinate_components(dim);
2634    if (dim == GLSL_SAMPLER_DIM_CUBE)
2635       return coords;
2636    else
2637       return coords + nir_intrinsic_image_array(instr);
2638 }
2639 
2640 nir_src *
nir_get_shader_call_payload_src(nir_intrinsic_instr * call)2641 nir_get_shader_call_payload_src(nir_intrinsic_instr *call)
2642 {
2643    switch (call->intrinsic) {
2644    case nir_intrinsic_trace_ray:
2645    case nir_intrinsic_rt_trace_ray:
2646       return &call->src[10];
2647    case nir_intrinsic_execute_callable:
2648    case nir_intrinsic_rt_execute_callable:
2649       return &call->src[1];
2650    default:
2651       unreachable("Not a call intrinsic");
2652       return NULL;
2653    }
2654 }
2655 
nir_chase_binding(nir_src rsrc)2656 nir_binding nir_chase_binding(nir_src rsrc)
2657 {
2658    nir_binding res = {0};
2659    if (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2660       const struct glsl_type *type = glsl_without_array(nir_src_as_deref(rsrc)->type);
2661       bool is_image = glsl_type_is_image(type) || glsl_type_is_sampler(type);
2662       while (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2663          nir_deref_instr *deref = nir_src_as_deref(rsrc);
2664 
2665          if (deref->deref_type == nir_deref_type_var) {
2666             res.success = true;
2667             res.var = deref->var;
2668             res.desc_set = deref->var->data.descriptor_set;
2669             res.binding = deref->var->data.binding;
2670             return res;
2671          } else if (deref->deref_type == nir_deref_type_array && is_image) {
2672             if (res.num_indices == ARRAY_SIZE(res.indices))
2673                return (nir_binding){0};
2674             res.indices[res.num_indices++] = deref->arr.index;
2675          }
2676 
2677          rsrc = deref->parent;
2678       }
2679    }
2680 
2681    /* Skip copies and trimming. Trimming can appear as nir_op_mov instructions
2682     * when removing the offset from addresses. We also consider nir_op_is_vec()
2683     * instructions to skip trimming of vec2_index_32bit_offset addresses after
2684     * lowering ALU to scalar.
2685     */
2686    while (true) {
2687       nir_alu_instr *alu = nir_src_as_alu_instr(rsrc);
2688       nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2689       if (alu && alu->op == nir_op_mov) {
2690          for (unsigned i = 0; i < alu->dest.dest.ssa.num_components; i++) {
2691             if (alu->src[0].swizzle[i] != i)
2692                return (nir_binding){0};
2693          }
2694          rsrc = alu->src[0].src;
2695       } else if (alu && nir_op_is_vec(alu->op)) {
2696          for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
2697             if (alu->src[i].swizzle[0] != i || alu->src[i].src.ssa != alu->src[0].src.ssa)
2698                return (nir_binding){0};
2699          }
2700          rsrc = alu->src[0].src;
2701       } else if (intrin && intrin->intrinsic == nir_intrinsic_read_first_invocation) {
2702          /* The caller might want to be aware if only the first invocation of
2703           * the indices are used.
2704           */
2705          res.read_first_invocation = true;
2706          rsrc = intrin->src[0];
2707       } else {
2708          break;
2709       }
2710    }
2711 
2712    if (nir_src_is_const(rsrc)) {
2713       /* GL binding model after deref lowering */
2714       res.success = true;
2715       res.binding = nir_src_as_uint(rsrc);
2716       return res;
2717    }
2718 
2719    /* otherwise, must be Vulkan binding model after deref lowering or GL bindless */
2720 
2721    nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2722    if (!intrin)
2723       return (nir_binding){0};
2724 
2725    /* skip load_vulkan_descriptor */
2726    if (intrin->intrinsic == nir_intrinsic_load_vulkan_descriptor) {
2727       intrin = nir_src_as_intrinsic(intrin->src[0]);
2728       if (!intrin)
2729          return (nir_binding){0};
2730    }
2731 
2732    if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
2733       return (nir_binding){0};
2734 
2735    assert(res.num_indices == 0);
2736    res.success = true;
2737    res.desc_set = nir_intrinsic_desc_set(intrin);
2738    res.binding = nir_intrinsic_binding(intrin);
2739    res.num_indices = 1;
2740    res.indices[0] = intrin->src[0];
2741    return res;
2742 }
2743 
nir_get_binding_variable(nir_shader * shader,nir_binding binding)2744 nir_variable *nir_get_binding_variable(nir_shader *shader, nir_binding binding)
2745 {
2746    nir_variable *binding_var = NULL;
2747    unsigned count = 0;
2748 
2749    if (!binding.success)
2750       return NULL;
2751 
2752    if (binding.var)
2753       return binding.var;
2754 
2755    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ubo | nir_var_mem_ssbo) {
2756       if (var->data.descriptor_set == binding.desc_set && var->data.binding == binding.binding) {
2757          binding_var = var;
2758          count++;
2759       }
2760    }
2761 
2762    /* Be conservative if another variable is using the same binding/desc_set
2763     * because the access mask might be different and we can't get it reliably.
2764     */
2765    if (count > 1)
2766       return NULL;
2767 
2768    return binding_var;
2769 }
2770 
2771 bool
nir_alu_instr_is_copy(nir_alu_instr * instr)2772 nir_alu_instr_is_copy(nir_alu_instr *instr)
2773 {
2774    assert(instr->src[0].src.is_ssa);
2775 
2776    if (instr->op == nir_op_mov) {
2777       return !instr->dest.saturate &&
2778              !instr->src[0].abs &&
2779              !instr->src[0].negate;
2780    } else if (nir_op_is_vec(instr->op)) {
2781       for (unsigned i = 0; i < instr->dest.dest.ssa.num_components; i++) {
2782          if (instr->src[i].abs || instr->src[i].negate)
2783             return false;
2784       }
2785       return !instr->dest.saturate;
2786    } else {
2787       return false;
2788    }
2789 }
2790 
2791 nir_ssa_scalar
nir_ssa_scalar_chase_movs(nir_ssa_scalar s)2792 nir_ssa_scalar_chase_movs(nir_ssa_scalar s)
2793 {
2794    while (nir_ssa_scalar_is_alu(s)) {
2795       nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
2796       if (!nir_alu_instr_is_copy(alu))
2797          break;
2798 
2799       if (alu->op == nir_op_mov) {
2800          s.def = alu->src[0].src.ssa;
2801          s.comp = alu->src[0].swizzle[s.comp];
2802       } else {
2803          assert(nir_op_is_vec(alu->op));
2804          s.def = alu->src[s.comp].src.ssa;
2805          s.comp = alu->src[s.comp].swizzle[0];
2806       }
2807    }
2808 
2809    return s;
2810 }
2811