1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 *
26 */
27
28 #include "nir.h"
29 #include "nir_builder.h"
30 #include "nir_control_flow_private.h"
31 #include "nir_worklist.h"
32 #include "util/half_float.h"
33 #include <limits.h>
34 #include <assert.h>
35 #include <math.h>
36 #include "util/u_math.h"
37 #include "util/u_qsort.h"
38
39 #include "main/menums.h" /* BITFIELD64_MASK */
40
41
42 /** Return true if the component mask "mask" with bit size "old_bit_size" can
43 * be re-interpreted to be used with "new_bit_size".
44 */
45 bool
nir_component_mask_can_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)46 nir_component_mask_can_reinterpret(nir_component_mask_t mask,
47 unsigned old_bit_size,
48 unsigned new_bit_size)
49 {
50 assert(util_is_power_of_two_nonzero(old_bit_size));
51 assert(util_is_power_of_two_nonzero(new_bit_size));
52
53 if (old_bit_size == new_bit_size)
54 return true;
55
56 if (old_bit_size == 1 || new_bit_size == 1)
57 return false;
58
59 if (old_bit_size > new_bit_size) {
60 unsigned ratio = old_bit_size / new_bit_size;
61 return util_last_bit(mask) * ratio <= NIR_MAX_VEC_COMPONENTS;
62 }
63
64 unsigned iter = mask;
65 while (iter) {
66 int start, count;
67 u_bit_scan_consecutive_range(&iter, &start, &count);
68 start *= old_bit_size;
69 count *= old_bit_size;
70 if (start % new_bit_size != 0)
71 return false;
72 if (count % new_bit_size != 0)
73 return false;
74 }
75 return true;
76 }
77
78 /** Re-interprets a component mask "mask" with bit size "old_bit_size" so that
79 * it can be used can be used with "new_bit_size".
80 */
81 nir_component_mask_t
nir_component_mask_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)82 nir_component_mask_reinterpret(nir_component_mask_t mask,
83 unsigned old_bit_size,
84 unsigned new_bit_size)
85 {
86 assert(nir_component_mask_can_reinterpret(mask, old_bit_size, new_bit_size));
87
88 if (old_bit_size == new_bit_size)
89 return mask;
90
91 nir_component_mask_t new_mask = 0;
92 unsigned iter = mask;
93 while (iter) {
94 int start, count;
95 u_bit_scan_consecutive_range(&iter, &start, &count);
96 start = start * old_bit_size / new_bit_size;
97 count = count * old_bit_size / new_bit_size;
98 new_mask |= BITFIELD_RANGE(start, count);
99 }
100 return new_mask;
101 }
102
103 static void
nir_shader_destructor(void * ptr)104 nir_shader_destructor(void *ptr)
105 {
106 nir_shader *shader = ptr;
107
108 /* Free all instrs from the shader, since they're not ralloced. */
109 list_for_each_entry_safe(nir_instr, instr, &shader->gc_list, gc_node) {
110 nir_instr_free(instr);
111 }
112 }
113
114 nir_shader *
nir_shader_create(void * mem_ctx,gl_shader_stage stage,const nir_shader_compiler_options * options,shader_info * si)115 nir_shader_create(void *mem_ctx,
116 gl_shader_stage stage,
117 const nir_shader_compiler_options *options,
118 shader_info *si)
119 {
120 nir_shader *shader = rzalloc(mem_ctx, nir_shader);
121 ralloc_set_destructor(shader, nir_shader_destructor);
122
123 exec_list_make_empty(&shader->variables);
124
125 shader->options = options;
126
127 if (si) {
128 assert(si->stage == stage);
129 shader->info = *si;
130 } else {
131 shader->info.stage = stage;
132 }
133
134 exec_list_make_empty(&shader->functions);
135
136 list_inithead(&shader->gc_list);
137
138 shader->num_inputs = 0;
139 shader->num_outputs = 0;
140 shader->num_uniforms = 0;
141
142 return shader;
143 }
144
145 static nir_register *
reg_create(void * mem_ctx,struct exec_list * list)146 reg_create(void *mem_ctx, struct exec_list *list)
147 {
148 nir_register *reg = ralloc(mem_ctx, nir_register);
149
150 list_inithead(®->uses);
151 list_inithead(®->defs);
152 list_inithead(®->if_uses);
153
154 reg->num_components = 0;
155 reg->bit_size = 32;
156 reg->num_array_elems = 0;
157 reg->divergent = false;
158
159 exec_list_push_tail(list, ®->node);
160
161 return reg;
162 }
163
164 nir_register *
nir_local_reg_create(nir_function_impl * impl)165 nir_local_reg_create(nir_function_impl *impl)
166 {
167 nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers);
168 reg->index = impl->reg_alloc++;
169
170 return reg;
171 }
172
173 void
nir_reg_remove(nir_register * reg)174 nir_reg_remove(nir_register *reg)
175 {
176 exec_node_remove(®->node);
177 }
178
179 void
nir_shader_add_variable(nir_shader * shader,nir_variable * var)180 nir_shader_add_variable(nir_shader *shader, nir_variable *var)
181 {
182 switch (var->data.mode) {
183 case nir_var_function_temp:
184 assert(!"nir_shader_add_variable cannot be used for local variables");
185 return;
186
187 case nir_var_shader_temp:
188 case nir_var_shader_in:
189 case nir_var_shader_out:
190 case nir_var_uniform:
191 case nir_var_mem_ubo:
192 case nir_var_mem_ssbo:
193 case nir_var_mem_shared:
194 case nir_var_system_value:
195 case nir_var_mem_push_const:
196 case nir_var_mem_constant:
197 case nir_var_shader_call_data:
198 case nir_var_ray_hit_attrib:
199 break;
200
201 case nir_var_mem_global:
202 assert(!"nir_shader_add_variable cannot be used for global memory");
203 return;
204
205 default:
206 assert(!"invalid mode");
207 return;
208 }
209
210 exec_list_push_tail(&shader->variables, &var->node);
211 }
212
213 nir_variable *
nir_variable_create(nir_shader * shader,nir_variable_mode mode,const struct glsl_type * type,const char * name)214 nir_variable_create(nir_shader *shader, nir_variable_mode mode,
215 const struct glsl_type *type, const char *name)
216 {
217 nir_variable *var = rzalloc(shader, nir_variable);
218 var->name = ralloc_strdup(var, name);
219 var->type = type;
220 var->data.mode = mode;
221 var->data.how_declared = nir_var_declared_normally;
222
223 if ((mode == nir_var_shader_in &&
224 shader->info.stage != MESA_SHADER_VERTEX &&
225 shader->info.stage != MESA_SHADER_KERNEL) ||
226 (mode == nir_var_shader_out &&
227 shader->info.stage != MESA_SHADER_FRAGMENT))
228 var->data.interpolation = INTERP_MODE_SMOOTH;
229
230 if (mode == nir_var_shader_in || mode == nir_var_uniform)
231 var->data.read_only = true;
232
233 nir_shader_add_variable(shader, var);
234
235 return var;
236 }
237
238 nir_variable *
nir_local_variable_create(nir_function_impl * impl,const struct glsl_type * type,const char * name)239 nir_local_variable_create(nir_function_impl *impl,
240 const struct glsl_type *type, const char *name)
241 {
242 nir_variable *var = rzalloc(impl->function->shader, nir_variable);
243 var->name = ralloc_strdup(var, name);
244 var->type = type;
245 var->data.mode = nir_var_function_temp;
246
247 nir_function_impl_add_variable(impl, var);
248
249 return var;
250 }
251
252 nir_variable *
nir_find_variable_with_location(nir_shader * shader,nir_variable_mode mode,unsigned location)253 nir_find_variable_with_location(nir_shader *shader,
254 nir_variable_mode mode,
255 unsigned location)
256 {
257 assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
258 nir_foreach_variable_with_modes(var, shader, mode) {
259 if (var->data.location == location)
260 return var;
261 }
262 return NULL;
263 }
264
265 nir_variable *
nir_find_variable_with_driver_location(nir_shader * shader,nir_variable_mode mode,unsigned location)266 nir_find_variable_with_driver_location(nir_shader *shader,
267 nir_variable_mode mode,
268 unsigned location)
269 {
270 assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
271 nir_foreach_variable_with_modes(var, shader, mode) {
272 if (var->data.driver_location == location)
273 return var;
274 }
275 return NULL;
276 }
277
278 /* Annoyingly, qsort_r is not in the C standard library and, in particular, we
279 * can't count on it on MSV and Android. So we stuff the CMP function into
280 * each array element. It's a bit messy and burns more memory but the list of
281 * variables should hever be all that long.
282 */
283 struct var_cmp {
284 nir_variable *var;
285 int (*cmp)(const nir_variable *, const nir_variable *);
286 };
287
288 static int
var_sort_cmp(const void * _a,const void * _b,void * _cmp)289 var_sort_cmp(const void *_a, const void *_b, void *_cmp)
290 {
291 const struct var_cmp *a = _a;
292 const struct var_cmp *b = _b;
293 assert(a->cmp == b->cmp);
294 return a->cmp(a->var, b->var);
295 }
296
297 void
nir_sort_variables_with_modes(nir_shader * shader,int (* cmp)(const nir_variable *,const nir_variable *),nir_variable_mode modes)298 nir_sort_variables_with_modes(nir_shader *shader,
299 int (*cmp)(const nir_variable *,
300 const nir_variable *),
301 nir_variable_mode modes)
302 {
303 unsigned num_vars = 0;
304 nir_foreach_variable_with_modes(var, shader, modes) {
305 ++num_vars;
306 }
307 struct var_cmp *vars = ralloc_array(shader, struct var_cmp, num_vars);
308 unsigned i = 0;
309 nir_foreach_variable_with_modes_safe(var, shader, modes) {
310 exec_node_remove(&var->node);
311 vars[i++] = (struct var_cmp){
312 .var = var,
313 .cmp = cmp,
314 };
315 }
316 assert(i == num_vars);
317
318 util_qsort_r(vars, num_vars, sizeof(*vars), var_sort_cmp, cmp);
319
320 for (i = 0; i < num_vars; i++)
321 exec_list_push_tail(&shader->variables, &vars[i].var->node);
322
323 ralloc_free(vars);
324 }
325
326 nir_function *
nir_function_create(nir_shader * shader,const char * name)327 nir_function_create(nir_shader *shader, const char *name)
328 {
329 nir_function *func = ralloc(shader, nir_function);
330
331 exec_list_push_tail(&shader->functions, &func->node);
332
333 func->name = ralloc_strdup(func, name);
334 func->shader = shader;
335 func->num_params = 0;
336 func->params = NULL;
337 func->impl = NULL;
338 func->is_entrypoint = false;
339
340 return func;
341 }
342
src_has_indirect(nir_src * src)343 static bool src_has_indirect(nir_src *src)
344 {
345 return !src->is_ssa && src->reg.indirect;
346 }
347
src_free_indirects(nir_src * src)348 static void src_free_indirects(nir_src *src)
349 {
350 if (src_has_indirect(src)) {
351 assert(src->reg.indirect->is_ssa || !src->reg.indirect->reg.indirect);
352 free(src->reg.indirect);
353 src->reg.indirect = NULL;
354 }
355 }
356
dest_free_indirects(nir_dest * dest)357 static void dest_free_indirects(nir_dest *dest)
358 {
359 if (!dest->is_ssa && dest->reg.indirect) {
360 assert(dest->reg.indirect->is_ssa || !dest->reg.indirect->reg.indirect);
361 free(dest->reg.indirect);
362 dest->reg.indirect = NULL;
363 }
364 }
365
366 /* NOTE: if the instruction you are copying a src to is already added
367 * to the IR, use nir_instr_rewrite_src() instead.
368 */
nir_src_copy(nir_src * dest,const nir_src * src)369 void nir_src_copy(nir_src *dest, const nir_src *src)
370 {
371 src_free_indirects(dest);
372
373 dest->is_ssa = src->is_ssa;
374 if (src->is_ssa) {
375 dest->ssa = src->ssa;
376 } else {
377 dest->reg.base_offset = src->reg.base_offset;
378 dest->reg.reg = src->reg.reg;
379 if (src->reg.indirect) {
380 dest->reg.indirect = calloc(1, sizeof(nir_src));
381 nir_src_copy(dest->reg.indirect, src->reg.indirect);
382 } else {
383 dest->reg.indirect = NULL;
384 }
385 }
386 }
387
nir_dest_copy(nir_dest * dest,const nir_dest * src)388 void nir_dest_copy(nir_dest *dest, const nir_dest *src)
389 {
390 /* Copying an SSA definition makes no sense whatsoever. */
391 assert(!src->is_ssa);
392
393 dest_free_indirects(dest);
394
395 dest->is_ssa = false;
396
397 dest->reg.base_offset = src->reg.base_offset;
398 dest->reg.reg = src->reg.reg;
399 if (src->reg.indirect) {
400 dest->reg.indirect = calloc(1, sizeof(nir_src));
401 nir_src_copy(dest->reg.indirect, src->reg.indirect);
402 } else {
403 dest->reg.indirect = NULL;
404 }
405 }
406
407 void
nir_alu_src_copy(nir_alu_src * dest,const nir_alu_src * src)408 nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src)
409 {
410 nir_src_copy(&dest->src, &src->src);
411 dest->abs = src->abs;
412 dest->negate = src->negate;
413 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
414 dest->swizzle[i] = src->swizzle[i];
415 }
416
417 void
nir_alu_dest_copy(nir_alu_dest * dest,const nir_alu_dest * src)418 nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src)
419 {
420 nir_dest_copy(&dest->dest, &src->dest);
421 dest->write_mask = src->write_mask;
422 dest->saturate = src->saturate;
423 }
424
425 bool
nir_alu_src_is_trivial_ssa(const nir_alu_instr * alu,unsigned srcn)426 nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn)
427 {
428 static uint8_t trivial_swizzle[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
429 STATIC_ASSERT(ARRAY_SIZE(trivial_swizzle) == NIR_MAX_VEC_COMPONENTS);
430
431 const nir_alu_src *src = &alu->src[srcn];
432 unsigned num_components = nir_ssa_alu_instr_src_components(alu, srcn);
433
434 return src->src.is_ssa && (src->src.ssa->num_components == num_components) &&
435 !src->abs && !src->negate &&
436 (memcmp(src->swizzle, trivial_swizzle, num_components) == 0);
437 }
438
439
440 static void
cf_init(nir_cf_node * node,nir_cf_node_type type)441 cf_init(nir_cf_node *node, nir_cf_node_type type)
442 {
443 exec_node_init(&node->node);
444 node->parent = NULL;
445 node->type = type;
446 }
447
448 nir_function_impl *
nir_function_impl_create_bare(nir_shader * shader)449 nir_function_impl_create_bare(nir_shader *shader)
450 {
451 nir_function_impl *impl = ralloc(shader, nir_function_impl);
452
453 impl->function = NULL;
454
455 cf_init(&impl->cf_node, nir_cf_node_function);
456
457 exec_list_make_empty(&impl->body);
458 exec_list_make_empty(&impl->registers);
459 exec_list_make_empty(&impl->locals);
460 impl->reg_alloc = 0;
461 impl->ssa_alloc = 0;
462 impl->num_blocks = 0;
463 impl->valid_metadata = nir_metadata_none;
464 impl->structured = true;
465
466 /* create start & end blocks */
467 nir_block *start_block = nir_block_create(shader);
468 nir_block *end_block = nir_block_create(shader);
469 start_block->cf_node.parent = &impl->cf_node;
470 end_block->cf_node.parent = &impl->cf_node;
471 impl->end_block = end_block;
472
473 exec_list_push_tail(&impl->body, &start_block->cf_node.node);
474
475 start_block->successors[0] = end_block;
476 _mesa_set_add(end_block->predecessors, start_block);
477 return impl;
478 }
479
480 nir_function_impl *
nir_function_impl_create(nir_function * function)481 nir_function_impl_create(nir_function *function)
482 {
483 assert(function->impl == NULL);
484
485 nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
486
487 function->impl = impl;
488 impl->function = function;
489
490 return impl;
491 }
492
493 nir_block *
nir_block_create(nir_shader * shader)494 nir_block_create(nir_shader *shader)
495 {
496 nir_block *block = rzalloc(shader, nir_block);
497
498 cf_init(&block->cf_node, nir_cf_node_block);
499
500 block->successors[0] = block->successors[1] = NULL;
501 block->predecessors = _mesa_pointer_set_create(block);
502 block->imm_dom = NULL;
503 /* XXX maybe it would be worth it to defer allocation? This
504 * way it doesn't get allocated for shader refs that never run
505 * nir_calc_dominance? For example, state-tracker creates an
506 * initial IR, clones that, runs appropriate lowering pass, passes
507 * to driver which does common lowering/opt, and then stores ref
508 * which is later used to do state specific lowering and futher
509 * opt. Do any of the references not need dominance metadata?
510 */
511 block->dom_frontier = _mesa_pointer_set_create(block);
512
513 exec_list_make_empty(&block->instr_list);
514
515 return block;
516 }
517
518 static inline void
src_init(nir_src * src)519 src_init(nir_src *src)
520 {
521 src->is_ssa = false;
522 src->reg.reg = NULL;
523 src->reg.indirect = NULL;
524 src->reg.base_offset = 0;
525 }
526
527 nir_if *
nir_if_create(nir_shader * shader)528 nir_if_create(nir_shader *shader)
529 {
530 nir_if *if_stmt = ralloc(shader, nir_if);
531
532 if_stmt->control = nir_selection_control_none;
533
534 cf_init(&if_stmt->cf_node, nir_cf_node_if);
535 src_init(&if_stmt->condition);
536
537 nir_block *then = nir_block_create(shader);
538 exec_list_make_empty(&if_stmt->then_list);
539 exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
540 then->cf_node.parent = &if_stmt->cf_node;
541
542 nir_block *else_stmt = nir_block_create(shader);
543 exec_list_make_empty(&if_stmt->else_list);
544 exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
545 else_stmt->cf_node.parent = &if_stmt->cf_node;
546
547 return if_stmt;
548 }
549
550 nir_loop *
nir_loop_create(nir_shader * shader)551 nir_loop_create(nir_shader *shader)
552 {
553 nir_loop *loop = rzalloc(shader, nir_loop);
554
555 cf_init(&loop->cf_node, nir_cf_node_loop);
556 /* Assume that loops are divergent until proven otherwise */
557 loop->divergent = true;
558
559 nir_block *body = nir_block_create(shader);
560 exec_list_make_empty(&loop->body);
561 exec_list_push_tail(&loop->body, &body->cf_node.node);
562 body->cf_node.parent = &loop->cf_node;
563
564 body->successors[0] = body;
565 _mesa_set_add(body->predecessors, body);
566
567 return loop;
568 }
569
570 static void
instr_init(nir_instr * instr,nir_instr_type type)571 instr_init(nir_instr *instr, nir_instr_type type)
572 {
573 instr->type = type;
574 instr->block = NULL;
575 exec_node_init(&instr->node);
576 }
577
578 static void
dest_init(nir_dest * dest)579 dest_init(nir_dest *dest)
580 {
581 dest->is_ssa = false;
582 dest->reg.reg = NULL;
583 dest->reg.indirect = NULL;
584 dest->reg.base_offset = 0;
585 }
586
587 static void
alu_dest_init(nir_alu_dest * dest)588 alu_dest_init(nir_alu_dest *dest)
589 {
590 dest_init(&dest->dest);
591 dest->saturate = false;
592 dest->write_mask = 0xf;
593 }
594
595 static void
alu_src_init(nir_alu_src * src)596 alu_src_init(nir_alu_src *src)
597 {
598 src_init(&src->src);
599 src->abs = src->negate = false;
600 for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
601 src->swizzle[i] = i;
602 }
603
604 nir_alu_instr *
nir_alu_instr_create(nir_shader * shader,nir_op op)605 nir_alu_instr_create(nir_shader *shader, nir_op op)
606 {
607 unsigned num_srcs = nir_op_infos[op].num_inputs;
608 /* TODO: don't use calloc */
609 nir_alu_instr *instr = calloc(1, sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
610
611 instr_init(&instr->instr, nir_instr_type_alu);
612 instr->op = op;
613 alu_dest_init(&instr->dest);
614 for (unsigned i = 0; i < num_srcs; i++)
615 alu_src_init(&instr->src[i]);
616
617 list_add(&instr->instr.gc_node, &shader->gc_list);
618
619 return instr;
620 }
621
622 nir_deref_instr *
nir_deref_instr_create(nir_shader * shader,nir_deref_type deref_type)623 nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
624 {
625 nir_deref_instr *instr = calloc(1, sizeof(*instr));
626
627 instr_init(&instr->instr, nir_instr_type_deref);
628
629 instr->deref_type = deref_type;
630 if (deref_type != nir_deref_type_var)
631 src_init(&instr->parent);
632
633 if (deref_type == nir_deref_type_array ||
634 deref_type == nir_deref_type_ptr_as_array)
635 src_init(&instr->arr.index);
636
637 dest_init(&instr->dest);
638
639 list_add(&instr->instr.gc_node, &shader->gc_list);
640
641 return instr;
642 }
643
644 nir_jump_instr *
nir_jump_instr_create(nir_shader * shader,nir_jump_type type)645 nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
646 {
647 nir_jump_instr *instr = malloc(sizeof(*instr));
648 instr_init(&instr->instr, nir_instr_type_jump);
649 src_init(&instr->condition);
650 instr->type = type;
651 instr->target = NULL;
652 instr->else_target = NULL;
653
654 list_add(&instr->instr.gc_node, &shader->gc_list);
655
656 return instr;
657 }
658
659 nir_load_const_instr *
nir_load_const_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)660 nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
661 unsigned bit_size)
662 {
663 nir_load_const_instr *instr =
664 calloc(1, sizeof(*instr) + num_components * sizeof(*instr->value));
665 instr_init(&instr->instr, nir_instr_type_load_const);
666
667 nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size);
668
669 list_add(&instr->instr.gc_node, &shader->gc_list);
670
671 return instr;
672 }
673
674 nir_intrinsic_instr *
nir_intrinsic_instr_create(nir_shader * shader,nir_intrinsic_op op)675 nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
676 {
677 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
678 /* TODO: don't use calloc */
679 nir_intrinsic_instr *instr =
680 calloc(1, sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
681
682 instr_init(&instr->instr, nir_instr_type_intrinsic);
683 instr->intrinsic = op;
684
685 if (nir_intrinsic_infos[op].has_dest)
686 dest_init(&instr->dest);
687
688 for (unsigned i = 0; i < num_srcs; i++)
689 src_init(&instr->src[i]);
690
691 list_add(&instr->instr.gc_node, &shader->gc_list);
692
693 return instr;
694 }
695
696 nir_call_instr *
nir_call_instr_create(nir_shader * shader,nir_function * callee)697 nir_call_instr_create(nir_shader *shader, nir_function *callee)
698 {
699 const unsigned num_params = callee->num_params;
700 nir_call_instr *instr =
701 calloc(1, sizeof(*instr) + num_params * sizeof(instr->params[0]));
702
703 instr_init(&instr->instr, nir_instr_type_call);
704 instr->callee = callee;
705 instr->num_params = num_params;
706 for (unsigned i = 0; i < num_params; i++)
707 src_init(&instr->params[i]);
708
709 list_add(&instr->instr.gc_node, &shader->gc_list);
710
711 return instr;
712 }
713
714 static int8_t default_tg4_offsets[4][2] =
715 {
716 { 0, 1 },
717 { 1, 1 },
718 { 1, 0 },
719 { 0, 0 },
720 };
721
722 nir_tex_instr *
nir_tex_instr_create(nir_shader * shader,unsigned num_srcs)723 nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
724 {
725 nir_tex_instr *instr = calloc(1, sizeof(*instr));
726 instr_init(&instr->instr, nir_instr_type_tex);
727
728 dest_init(&instr->dest);
729
730 instr->num_srcs = num_srcs;
731 instr->src = malloc(sizeof(nir_tex_src) * num_srcs);
732 for (unsigned i = 0; i < num_srcs; i++)
733 src_init(&instr->src[i].src);
734
735 instr->texture_index = 0;
736 instr->sampler_index = 0;
737 memcpy(instr->tg4_offsets, default_tg4_offsets, sizeof(instr->tg4_offsets));
738
739 list_add(&instr->instr.gc_node, &shader->gc_list);
740
741 return instr;
742 }
743
744 void
nir_tex_instr_add_src(nir_tex_instr * tex,nir_tex_src_type src_type,nir_src src)745 nir_tex_instr_add_src(nir_tex_instr *tex,
746 nir_tex_src_type src_type,
747 nir_src src)
748 {
749 nir_tex_src *new_srcs = calloc(sizeof(*new_srcs),
750 tex->num_srcs + 1);
751
752 for (unsigned i = 0; i < tex->num_srcs; i++) {
753 new_srcs[i].src_type = tex->src[i].src_type;
754 nir_instr_move_src(&tex->instr, &new_srcs[i].src,
755 &tex->src[i].src);
756 }
757
758 free(tex->src);
759 tex->src = new_srcs;
760
761 tex->src[tex->num_srcs].src_type = src_type;
762 nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
763 tex->num_srcs++;
764 }
765
766 void
nir_tex_instr_remove_src(nir_tex_instr * tex,unsigned src_idx)767 nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
768 {
769 assert(src_idx < tex->num_srcs);
770
771 /* First rewrite the source to NIR_SRC_INIT */
772 nir_instr_rewrite_src(&tex->instr, &tex->src[src_idx].src, NIR_SRC_INIT);
773
774 /* Now, move all of the other sources down */
775 for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
776 tex->src[i-1].src_type = tex->src[i].src_type;
777 nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
778 }
779 tex->num_srcs--;
780 }
781
782 bool
nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr * tex)783 nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex)
784 {
785 if (tex->op != nir_texop_tg4)
786 return false;
787 return memcmp(tex->tg4_offsets, default_tg4_offsets,
788 sizeof(tex->tg4_offsets)) != 0;
789 }
790
791 nir_phi_instr *
nir_phi_instr_create(nir_shader * shader)792 nir_phi_instr_create(nir_shader *shader)
793 {
794 nir_phi_instr *instr = malloc(sizeof(*instr));
795 instr_init(&instr->instr, nir_instr_type_phi);
796
797 dest_init(&instr->dest);
798 exec_list_make_empty(&instr->srcs);
799
800 list_add(&instr->instr.gc_node, &shader->gc_list);
801
802 return instr;
803 }
804
805 /**
806 * Adds a new source to a NIR instruction.
807 *
808 * Note that this does not update the def/use relationship for src, assuming
809 * that the instr is not in the shader. If it is, you have to do:
810 *
811 * list_addtail(&phi_src->src.use_link, &src.ssa->uses);
812 */
813 nir_phi_src *
nir_phi_instr_add_src(nir_phi_instr * instr,nir_block * pred,nir_src src)814 nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src)
815 {
816 nir_phi_src *phi_src;
817
818 phi_src = calloc(1, sizeof(nir_phi_src));
819 phi_src->pred = pred;
820 phi_src->src = src;
821 phi_src->src.parent_instr = &instr->instr;
822 exec_list_push_tail(&instr->srcs, &phi_src->node);
823
824 return phi_src;
825 }
826
827 nir_parallel_copy_instr *
nir_parallel_copy_instr_create(nir_shader * shader)828 nir_parallel_copy_instr_create(nir_shader *shader)
829 {
830 nir_parallel_copy_instr *instr = malloc(sizeof(*instr));
831 instr_init(&instr->instr, nir_instr_type_parallel_copy);
832
833 exec_list_make_empty(&instr->entries);
834
835 list_add(&instr->instr.gc_node, &shader->gc_list);
836
837 return instr;
838 }
839
840 nir_ssa_undef_instr *
nir_ssa_undef_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)841 nir_ssa_undef_instr_create(nir_shader *shader,
842 unsigned num_components,
843 unsigned bit_size)
844 {
845 nir_ssa_undef_instr *instr = malloc(sizeof(*instr));
846 instr_init(&instr->instr, nir_instr_type_ssa_undef);
847
848 nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size);
849
850 list_add(&instr->instr.gc_node, &shader->gc_list);
851
852 return instr;
853 }
854
855 static nir_const_value
const_value_float(double d,unsigned bit_size)856 const_value_float(double d, unsigned bit_size)
857 {
858 nir_const_value v;
859 memset(&v, 0, sizeof(v));
860 switch (bit_size) {
861 case 16: v.u16 = _mesa_float_to_half(d); break;
862 case 32: v.f32 = d; break;
863 case 64: v.f64 = d; break;
864 default:
865 unreachable("Invalid bit size");
866 }
867 return v;
868 }
869
870 static nir_const_value
const_value_int(int64_t i,unsigned bit_size)871 const_value_int(int64_t i, unsigned bit_size)
872 {
873 nir_const_value v;
874 memset(&v, 0, sizeof(v));
875 switch (bit_size) {
876 case 1: v.b = i & 1; break;
877 case 8: v.i8 = i; break;
878 case 16: v.i16 = i; break;
879 case 32: v.i32 = i; break;
880 case 64: v.i64 = i; break;
881 default:
882 unreachable("Invalid bit size");
883 }
884 return v;
885 }
886
887 nir_const_value
nir_alu_binop_identity(nir_op binop,unsigned bit_size)888 nir_alu_binop_identity(nir_op binop, unsigned bit_size)
889 {
890 const int64_t max_int = (1ull << (bit_size - 1)) - 1;
891 const int64_t min_int = -max_int - 1;
892 switch (binop) {
893 case nir_op_iadd:
894 return const_value_int(0, bit_size);
895 case nir_op_fadd:
896 return const_value_float(0, bit_size);
897 case nir_op_imul:
898 return const_value_int(1, bit_size);
899 case nir_op_fmul:
900 return const_value_float(1, bit_size);
901 case nir_op_imin:
902 return const_value_int(max_int, bit_size);
903 case nir_op_umin:
904 return const_value_int(~0ull, bit_size);
905 case nir_op_fmin:
906 return const_value_float(INFINITY, bit_size);
907 case nir_op_imax:
908 return const_value_int(min_int, bit_size);
909 case nir_op_umax:
910 return const_value_int(0, bit_size);
911 case nir_op_fmax:
912 return const_value_float(-INFINITY, bit_size);
913 case nir_op_iand:
914 return const_value_int(~0ull, bit_size);
915 case nir_op_ior:
916 return const_value_int(0, bit_size);
917 case nir_op_ixor:
918 return const_value_int(0, bit_size);
919 default:
920 unreachable("Invalid reduction operation");
921 }
922 }
923
924 nir_function_impl *
nir_cf_node_get_function(nir_cf_node * node)925 nir_cf_node_get_function(nir_cf_node *node)
926 {
927 while (node->type != nir_cf_node_function) {
928 node = node->parent;
929 }
930
931 return nir_cf_node_as_function(node);
932 }
933
934 /* Reduces a cursor by trying to convert everything to after and trying to
935 * go up to block granularity when possible.
936 */
937 static nir_cursor
reduce_cursor(nir_cursor cursor)938 reduce_cursor(nir_cursor cursor)
939 {
940 switch (cursor.option) {
941 case nir_cursor_before_block:
942 if (exec_list_is_empty(&cursor.block->instr_list)) {
943 /* Empty block. After is as good as before. */
944 cursor.option = nir_cursor_after_block;
945 }
946 return cursor;
947
948 case nir_cursor_after_block:
949 return cursor;
950
951 case nir_cursor_before_instr: {
952 nir_instr *prev_instr = nir_instr_prev(cursor.instr);
953 if (prev_instr) {
954 /* Before this instruction is after the previous */
955 cursor.instr = prev_instr;
956 cursor.option = nir_cursor_after_instr;
957 } else {
958 /* No previous instruction. Switch to before block */
959 cursor.block = cursor.instr->block;
960 cursor.option = nir_cursor_before_block;
961 }
962 return reduce_cursor(cursor);
963 }
964
965 case nir_cursor_after_instr:
966 if (nir_instr_next(cursor.instr) == NULL) {
967 /* This is the last instruction, switch to after block */
968 cursor.option = nir_cursor_after_block;
969 cursor.block = cursor.instr->block;
970 }
971 return cursor;
972
973 default:
974 unreachable("Inavlid cursor option");
975 }
976 }
977
978 bool
nir_cursors_equal(nir_cursor a,nir_cursor b)979 nir_cursors_equal(nir_cursor a, nir_cursor b)
980 {
981 /* Reduced cursors should be unique */
982 a = reduce_cursor(a);
983 b = reduce_cursor(b);
984
985 return a.block == b.block && a.option == b.option;
986 }
987
988 static bool
add_use_cb(nir_src * src,void * state)989 add_use_cb(nir_src *src, void *state)
990 {
991 nir_instr *instr = state;
992
993 src->parent_instr = instr;
994 list_addtail(&src->use_link,
995 src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses);
996
997 return true;
998 }
999
1000 static bool
add_ssa_def_cb(nir_ssa_def * def,void * state)1001 add_ssa_def_cb(nir_ssa_def *def, void *state)
1002 {
1003 nir_instr *instr = state;
1004
1005 if (instr->block && def->index == UINT_MAX) {
1006 nir_function_impl *impl =
1007 nir_cf_node_get_function(&instr->block->cf_node);
1008
1009 def->index = impl->ssa_alloc++;
1010
1011 impl->valid_metadata &= ~nir_metadata_live_ssa_defs;
1012 }
1013
1014 return true;
1015 }
1016
1017 static bool
add_reg_def_cb(nir_dest * dest,void * state)1018 add_reg_def_cb(nir_dest *dest, void *state)
1019 {
1020 nir_instr *instr = state;
1021
1022 if (!dest->is_ssa) {
1023 dest->reg.parent_instr = instr;
1024 list_addtail(&dest->reg.def_link, &dest->reg.reg->defs);
1025 }
1026
1027 return true;
1028 }
1029
1030 static void
add_defs_uses(nir_instr * instr)1031 add_defs_uses(nir_instr *instr)
1032 {
1033 nir_foreach_src(instr, add_use_cb, instr);
1034 nir_foreach_dest(instr, add_reg_def_cb, instr);
1035 nir_foreach_ssa_def(instr, add_ssa_def_cb, instr);
1036 }
1037
1038 void
nir_instr_insert(nir_cursor cursor,nir_instr * instr)1039 nir_instr_insert(nir_cursor cursor, nir_instr *instr)
1040 {
1041 switch (cursor.option) {
1042 case nir_cursor_before_block:
1043 /* Only allow inserting jumps into empty blocks. */
1044 if (instr->type == nir_instr_type_jump)
1045 assert(exec_list_is_empty(&cursor.block->instr_list));
1046
1047 instr->block = cursor.block;
1048 add_defs_uses(instr);
1049 exec_list_push_head(&cursor.block->instr_list, &instr->node);
1050 break;
1051 case nir_cursor_after_block: {
1052 /* Inserting instructions after a jump is illegal. */
1053 nir_instr *last = nir_block_last_instr(cursor.block);
1054 assert(last == NULL || last->type != nir_instr_type_jump);
1055 (void) last;
1056
1057 instr->block = cursor.block;
1058 add_defs_uses(instr);
1059 exec_list_push_tail(&cursor.block->instr_list, &instr->node);
1060 break;
1061 }
1062 case nir_cursor_before_instr:
1063 assert(instr->type != nir_instr_type_jump);
1064 instr->block = cursor.instr->block;
1065 add_defs_uses(instr);
1066 exec_node_insert_node_before(&cursor.instr->node, &instr->node);
1067 break;
1068 case nir_cursor_after_instr:
1069 /* Inserting instructions after a jump is illegal. */
1070 assert(cursor.instr->type != nir_instr_type_jump);
1071
1072 /* Only allow inserting jumps at the end of the block. */
1073 if (instr->type == nir_instr_type_jump)
1074 assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
1075
1076 instr->block = cursor.instr->block;
1077 add_defs_uses(instr);
1078 exec_node_insert_after(&cursor.instr->node, &instr->node);
1079 break;
1080 }
1081
1082 if (instr->type == nir_instr_type_jump)
1083 nir_handle_add_jump(instr->block);
1084
1085 nir_function_impl *impl = nir_cf_node_get_function(&instr->block->cf_node);
1086 impl->valid_metadata &= ~nir_metadata_instr_index;
1087 }
1088
1089 bool
nir_instr_move(nir_cursor cursor,nir_instr * instr)1090 nir_instr_move(nir_cursor cursor, nir_instr *instr)
1091 {
1092 /* If the cursor happens to refer to this instruction (either before or
1093 * after), don't do anything.
1094 */
1095 if ((cursor.option == nir_cursor_before_instr ||
1096 cursor.option == nir_cursor_after_instr) &&
1097 cursor.instr == instr)
1098 return false;
1099
1100 nir_instr_remove(instr);
1101 nir_instr_insert(cursor, instr);
1102 return true;
1103 }
1104
1105 static bool
src_is_valid(const nir_src * src)1106 src_is_valid(const nir_src *src)
1107 {
1108 return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
1109 }
1110
1111 static bool
remove_use_cb(nir_src * src,void * state)1112 remove_use_cb(nir_src *src, void *state)
1113 {
1114 (void) state;
1115
1116 if (src_is_valid(src))
1117 list_del(&src->use_link);
1118
1119 return true;
1120 }
1121
1122 static bool
remove_def_cb(nir_dest * dest,void * state)1123 remove_def_cb(nir_dest *dest, void *state)
1124 {
1125 (void) state;
1126
1127 if (!dest->is_ssa)
1128 list_del(&dest->reg.def_link);
1129
1130 return true;
1131 }
1132
1133 static void
remove_defs_uses(nir_instr * instr)1134 remove_defs_uses(nir_instr *instr)
1135 {
1136 nir_foreach_dest(instr, remove_def_cb, instr);
1137 nir_foreach_src(instr, remove_use_cb, instr);
1138 }
1139
nir_instr_remove_v(nir_instr * instr)1140 void nir_instr_remove_v(nir_instr *instr)
1141 {
1142 remove_defs_uses(instr);
1143 exec_node_remove(&instr->node);
1144
1145 if (instr->type == nir_instr_type_jump) {
1146 nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
1147 nir_handle_remove_jump(instr->block, jump_instr->type);
1148 }
1149 }
1150
free_src_indirects_cb(nir_src * src,void * state)1151 static bool free_src_indirects_cb(nir_src *src, void *state)
1152 {
1153 src_free_indirects(src);
1154 return true;
1155 }
1156
free_dest_indirects_cb(nir_dest * dest,void * state)1157 static bool free_dest_indirects_cb(nir_dest *dest, void *state)
1158 {
1159 dest_free_indirects(dest);
1160 return true;
1161 }
1162
nir_instr_free(nir_instr * instr)1163 void nir_instr_free(nir_instr *instr)
1164 {
1165 nir_foreach_src(instr, free_src_indirects_cb, NULL);
1166 nir_foreach_dest(instr, free_dest_indirects_cb, NULL);
1167
1168 switch (instr->type) {
1169 case nir_instr_type_tex:
1170 free(nir_instr_as_tex(instr)->src);
1171 break;
1172
1173 case nir_instr_type_phi: {
1174 nir_phi_instr *phi = nir_instr_as_phi(instr);
1175 nir_foreach_phi_src_safe(phi_src, phi) {
1176 free(phi_src);
1177 }
1178 break;
1179 }
1180
1181 default:
1182 break;
1183 }
1184
1185 list_del(&instr->gc_node);
1186 free(instr);
1187 }
1188
1189 void
nir_instr_free_list(struct exec_list * list)1190 nir_instr_free_list(struct exec_list *list)
1191 {
1192 struct exec_node *node;
1193 while ((node = exec_list_pop_head(list))) {
1194 nir_instr *removed_instr = exec_node_data(nir_instr, node, node);
1195 nir_instr_free(removed_instr);
1196 }
1197 }
1198
nir_instr_free_and_dce_live_cb(nir_ssa_def * def,void * state)1199 static bool nir_instr_free_and_dce_live_cb(nir_ssa_def *def, void *state)
1200 {
1201 bool *live = state;
1202
1203 if (!nir_ssa_def_is_unused(def)) {
1204 *live = true;
1205 return false;
1206 } else {
1207 return true;
1208 }
1209 }
1210
nir_instr_free_and_dce_is_live(nir_instr * instr)1211 static bool nir_instr_free_and_dce_is_live(nir_instr *instr)
1212 {
1213 /* Note: don't have to worry about jumps because they don't have dests to
1214 * become unused.
1215 */
1216 if (instr->type == nir_instr_type_intrinsic) {
1217 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1218 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
1219 if (!(info->flags & NIR_INTRINSIC_CAN_ELIMINATE))
1220 return true;
1221 }
1222
1223 bool live = false;
1224 nir_foreach_ssa_def(instr, nir_instr_free_and_dce_live_cb, &live);
1225 return live;
1226 }
1227
1228 static bool
nir_instr_dce_add_dead_srcs_cb(nir_src * src,void * state)1229 nir_instr_dce_add_dead_srcs_cb(nir_src *src, void *state)
1230 {
1231 nir_instr_worklist *wl = state;
1232
1233 if (src->is_ssa) {
1234 list_del(&src->use_link);
1235 if (!nir_instr_free_and_dce_is_live(src->ssa->parent_instr))
1236 nir_instr_worklist_push_tail(wl, src->ssa->parent_instr);
1237
1238 /* Stop nir_instr_remove from trying to delete the link again. */
1239 src->ssa = NULL;
1240 }
1241
1242 return true;
1243 }
1244
1245 static void
nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist * wl,nir_instr * instr)1246 nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist *wl, nir_instr *instr)
1247 {
1248 nir_foreach_src(instr, nir_instr_dce_add_dead_srcs_cb, wl);
1249 }
1250
1251 /**
1252 * Frees an instruction and any SSA defs that it used that are now dead,
1253 * returning a nir_cursor where the instruction previously was.
1254 */
1255 nir_cursor
nir_instr_free_and_dce(nir_instr * instr)1256 nir_instr_free_and_dce(nir_instr *instr)
1257 {
1258 nir_instr_worklist *worklist = nir_instr_worklist_create();
1259
1260 nir_instr_dce_add_dead_ssa_srcs(worklist, instr);
1261 nir_cursor c = nir_instr_remove(instr);
1262
1263 struct exec_list to_free;
1264 exec_list_make_empty(&to_free);
1265
1266 nir_instr *dce_instr;
1267 while ((dce_instr = nir_instr_worklist_pop_head(worklist))) {
1268 nir_instr_dce_add_dead_ssa_srcs(worklist, dce_instr);
1269
1270 /* If we're removing the instr where our cursor is, then we have to
1271 * point the cursor elsewhere.
1272 */
1273 if ((c.option == nir_cursor_before_instr ||
1274 c.option == nir_cursor_after_instr) &&
1275 c.instr == dce_instr)
1276 c = nir_instr_remove(dce_instr);
1277 else
1278 nir_instr_remove(dce_instr);
1279 exec_list_push_tail(&to_free, &dce_instr->node);
1280 }
1281
1282 nir_instr_free_list(&to_free);
1283
1284 nir_instr_worklist_destroy(worklist);
1285
1286 return c;
1287 }
1288
1289 /*@}*/
1290
1291 void
nir_index_local_regs(nir_function_impl * impl)1292 nir_index_local_regs(nir_function_impl *impl)
1293 {
1294 unsigned index = 0;
1295 foreach_list_typed(nir_register, reg, node, &impl->registers) {
1296 reg->index = index++;
1297 }
1298 impl->reg_alloc = index;
1299 }
1300
1301 struct foreach_ssa_def_state {
1302 nir_foreach_ssa_def_cb cb;
1303 void *client_state;
1304 };
1305
1306 static inline bool
nir_ssa_def_visitor(nir_dest * dest,void * void_state)1307 nir_ssa_def_visitor(nir_dest *dest, void *void_state)
1308 {
1309 struct foreach_ssa_def_state *state = void_state;
1310
1311 if (dest->is_ssa)
1312 return state->cb(&dest->ssa, state->client_state);
1313 else
1314 return true;
1315 }
1316
1317 bool
nir_foreach_ssa_def(nir_instr * instr,nir_foreach_ssa_def_cb cb,void * state)1318 nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
1319 {
1320 switch (instr->type) {
1321 case nir_instr_type_alu:
1322 case nir_instr_type_deref:
1323 case nir_instr_type_tex:
1324 case nir_instr_type_intrinsic:
1325 case nir_instr_type_phi:
1326 case nir_instr_type_parallel_copy: {
1327 struct foreach_ssa_def_state foreach_state = {cb, state};
1328 return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state);
1329 }
1330
1331 case nir_instr_type_load_const:
1332 return cb(&nir_instr_as_load_const(instr)->def, state);
1333 case nir_instr_type_ssa_undef:
1334 return cb(&nir_instr_as_ssa_undef(instr)->def, state);
1335 case nir_instr_type_call:
1336 case nir_instr_type_jump:
1337 return true;
1338 default:
1339 unreachable("Invalid instruction type");
1340 }
1341 }
1342
1343 nir_ssa_def *
nir_instr_ssa_def(nir_instr * instr)1344 nir_instr_ssa_def(nir_instr *instr)
1345 {
1346 switch (instr->type) {
1347 case nir_instr_type_alu:
1348 assert(nir_instr_as_alu(instr)->dest.dest.is_ssa);
1349 return &nir_instr_as_alu(instr)->dest.dest.ssa;
1350
1351 case nir_instr_type_deref:
1352 assert(nir_instr_as_deref(instr)->dest.is_ssa);
1353 return &nir_instr_as_deref(instr)->dest.ssa;
1354
1355 case nir_instr_type_tex:
1356 assert(nir_instr_as_tex(instr)->dest.is_ssa);
1357 return &nir_instr_as_tex(instr)->dest.ssa;
1358
1359 case nir_instr_type_intrinsic: {
1360 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1361 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
1362 assert(intrin->dest.is_ssa);
1363 return &intrin->dest.ssa;
1364 } else {
1365 return NULL;
1366 }
1367 }
1368
1369 case nir_instr_type_phi:
1370 assert(nir_instr_as_phi(instr)->dest.is_ssa);
1371 return &nir_instr_as_phi(instr)->dest.ssa;
1372
1373 case nir_instr_type_parallel_copy:
1374 unreachable("Parallel copies are unsupported by this function");
1375
1376 case nir_instr_type_load_const:
1377 return &nir_instr_as_load_const(instr)->def;
1378
1379 case nir_instr_type_ssa_undef:
1380 return &nir_instr_as_ssa_undef(instr)->def;
1381
1382 case nir_instr_type_call:
1383 case nir_instr_type_jump:
1384 return NULL;
1385 }
1386
1387 unreachable("Invalid instruction type");
1388 }
1389
1390 bool
nir_foreach_phi_src_leaving_block(nir_block * block,nir_foreach_src_cb cb,void * state)1391 nir_foreach_phi_src_leaving_block(nir_block *block,
1392 nir_foreach_src_cb cb,
1393 void *state)
1394 {
1395 for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
1396 if (block->successors[i] == NULL)
1397 continue;
1398
1399 nir_foreach_instr(instr, block->successors[i]) {
1400 if (instr->type != nir_instr_type_phi)
1401 break;
1402
1403 nir_phi_instr *phi = nir_instr_as_phi(instr);
1404 nir_foreach_phi_src(phi_src, phi) {
1405 if (phi_src->pred == block) {
1406 if (!cb(&phi_src->src, state))
1407 return false;
1408 }
1409 }
1410 }
1411 }
1412
1413 return true;
1414 }
1415
1416 nir_const_value
nir_const_value_for_float(double f,unsigned bit_size)1417 nir_const_value_for_float(double f, unsigned bit_size)
1418 {
1419 nir_const_value v;
1420 memset(&v, 0, sizeof(v));
1421
1422 switch (bit_size) {
1423 case 16:
1424 v.u16 = _mesa_float_to_half(f);
1425 break;
1426 case 32:
1427 v.f32 = f;
1428 break;
1429 case 64:
1430 v.f64 = f;
1431 break;
1432 default:
1433 unreachable("Invalid bit size");
1434 }
1435
1436 return v;
1437 }
1438
1439 double
nir_const_value_as_float(nir_const_value value,unsigned bit_size)1440 nir_const_value_as_float(nir_const_value value, unsigned bit_size)
1441 {
1442 switch (bit_size) {
1443 case 16: return _mesa_half_to_float(value.u16);
1444 case 32: return value.f32;
1445 case 64: return value.f64;
1446 default:
1447 unreachable("Invalid bit size");
1448 }
1449 }
1450
1451 nir_const_value *
nir_src_as_const_value(nir_src src)1452 nir_src_as_const_value(nir_src src)
1453 {
1454 if (!src.is_ssa)
1455 return NULL;
1456
1457 if (src.ssa->parent_instr->type != nir_instr_type_load_const)
1458 return NULL;
1459
1460 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1461
1462 return load->value;
1463 }
1464
1465 /**
1466 * Returns true if the source is known to be dynamically uniform. Otherwise it
1467 * returns false which means it may or may not be dynamically uniform but it
1468 * can't be determined.
1469 */
1470 bool
nir_src_is_dynamically_uniform(nir_src src)1471 nir_src_is_dynamically_uniform(nir_src src)
1472 {
1473 if (!src.is_ssa)
1474 return false;
1475
1476 /* Constants are trivially dynamically uniform */
1477 if (src.ssa->parent_instr->type == nir_instr_type_load_const)
1478 return true;
1479
1480 if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
1481 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
1482 /* As are uniform variables */
1483 if (intr->intrinsic == nir_intrinsic_load_uniform &&
1484 nir_src_is_dynamically_uniform(intr->src[0]))
1485 return true;
1486 /* Push constant loads always use uniform offsets. */
1487 if (intr->intrinsic == nir_intrinsic_load_push_constant)
1488 return true;
1489 if (intr->intrinsic == nir_intrinsic_load_deref &&
1490 nir_deref_mode_is(nir_src_as_deref(intr->src[0]), nir_var_mem_push_const))
1491 return true;
1492 }
1493
1494 /* Operating together dynamically uniform expressions produces a
1495 * dynamically uniform result
1496 */
1497 if (src.ssa->parent_instr->type == nir_instr_type_alu) {
1498 nir_alu_instr *alu = nir_instr_as_alu(src.ssa->parent_instr);
1499 for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
1500 if (!nir_src_is_dynamically_uniform(alu->src[i].src))
1501 return false;
1502 }
1503
1504 return true;
1505 }
1506
1507 /* XXX: this could have many more tests, such as when a sampler function is
1508 * called with dynamically uniform arguments.
1509 */
1510 return false;
1511 }
1512
1513 static void
src_remove_all_uses(nir_src * src)1514 src_remove_all_uses(nir_src *src)
1515 {
1516 for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1517 if (!src_is_valid(src))
1518 continue;
1519
1520 list_del(&src->use_link);
1521 }
1522 }
1523
1524 static void
src_add_all_uses(nir_src * src,nir_instr * parent_instr,nir_if * parent_if)1525 src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
1526 {
1527 for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1528 if (!src_is_valid(src))
1529 continue;
1530
1531 if (parent_instr) {
1532 src->parent_instr = parent_instr;
1533 if (src->is_ssa)
1534 list_addtail(&src->use_link, &src->ssa->uses);
1535 else
1536 list_addtail(&src->use_link, &src->reg.reg->uses);
1537 } else {
1538 assert(parent_if);
1539 src->parent_if = parent_if;
1540 if (src->is_ssa)
1541 list_addtail(&src->use_link, &src->ssa->if_uses);
1542 else
1543 list_addtail(&src->use_link, &src->reg.reg->if_uses);
1544 }
1545 }
1546 }
1547
1548 void
nir_instr_rewrite_src(nir_instr * instr,nir_src * src,nir_src new_src)1549 nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src)
1550 {
1551 assert(!src_is_valid(src) || src->parent_instr == instr);
1552
1553 src_remove_all_uses(src);
1554 nir_src_copy(src, &new_src);
1555 src_add_all_uses(src, instr, NULL);
1556 }
1557
1558 void
nir_instr_move_src(nir_instr * dest_instr,nir_src * dest,nir_src * src)1559 nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
1560 {
1561 assert(!src_is_valid(dest) || dest->parent_instr == dest_instr);
1562
1563 src_remove_all_uses(dest);
1564 src_free_indirects(dest);
1565 src_remove_all_uses(src);
1566 *dest = *src;
1567 *src = NIR_SRC_INIT;
1568 src_add_all_uses(dest, dest_instr, NULL);
1569 }
1570
1571 void
nir_if_rewrite_condition(nir_if * if_stmt,nir_src new_src)1572 nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
1573 {
1574 nir_src *src = &if_stmt->condition;
1575 assert(!src_is_valid(src) || src->parent_if == if_stmt);
1576
1577 src_remove_all_uses(src);
1578 nir_src_copy(src, &new_src);
1579 src_add_all_uses(src, NULL, if_stmt);
1580 }
1581
1582 void
nir_instr_rewrite_dest(nir_instr * instr,nir_dest * dest,nir_dest new_dest)1583 nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
1584 {
1585 if (dest->is_ssa) {
1586 /* We can only overwrite an SSA destination if it has no uses. */
1587 assert(nir_ssa_def_is_unused(&dest->ssa));
1588 } else {
1589 list_del(&dest->reg.def_link);
1590 if (dest->reg.indirect)
1591 src_remove_all_uses(dest->reg.indirect);
1592 }
1593
1594 /* We can't re-write with an SSA def */
1595 assert(!new_dest.is_ssa);
1596
1597 nir_dest_copy(dest, &new_dest);
1598
1599 dest->reg.parent_instr = instr;
1600 list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs);
1601
1602 if (dest->reg.indirect)
1603 src_add_all_uses(dest->reg.indirect, instr, NULL);
1604 }
1605
1606 /* note: does *not* take ownership of 'name' */
1607 void
nir_ssa_def_init(nir_instr * instr,nir_ssa_def * def,unsigned num_components,unsigned bit_size)1608 nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
1609 unsigned num_components,
1610 unsigned bit_size)
1611 {
1612 def->parent_instr = instr;
1613 list_inithead(&def->uses);
1614 list_inithead(&def->if_uses);
1615 def->num_components = num_components;
1616 def->bit_size = bit_size;
1617 def->divergent = true; /* This is the safer default */
1618
1619 if (instr->block) {
1620 nir_function_impl *impl =
1621 nir_cf_node_get_function(&instr->block->cf_node);
1622
1623 def->index = impl->ssa_alloc++;
1624
1625 impl->valid_metadata &= ~nir_metadata_live_ssa_defs;
1626 } else {
1627 def->index = UINT_MAX;
1628 }
1629 }
1630
1631 /* note: does *not* take ownership of 'name' */
1632 void
nir_ssa_dest_init(nir_instr * instr,nir_dest * dest,unsigned num_components,unsigned bit_size,const char * name)1633 nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
1634 unsigned num_components, unsigned bit_size,
1635 const char *name)
1636 {
1637 dest->is_ssa = true;
1638 nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size);
1639 }
1640
1641 void
nir_ssa_def_rewrite_uses(nir_ssa_def * def,nir_ssa_def * new_ssa)1642 nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_ssa_def *new_ssa)
1643 {
1644 assert(def != new_ssa);
1645 nir_foreach_use_safe(use_src, def)
1646 nir_instr_rewrite_src_ssa(use_src->parent_instr, use_src, new_ssa);
1647
1648 nir_foreach_if_use_safe(use_src, def)
1649 nir_if_rewrite_condition_ssa(use_src->parent_if, use_src, new_ssa);
1650 }
1651
1652 void
nir_ssa_def_rewrite_uses_src(nir_ssa_def * def,nir_src new_src)1653 nir_ssa_def_rewrite_uses_src(nir_ssa_def *def, nir_src new_src)
1654 {
1655 if (new_src.is_ssa) {
1656 nir_ssa_def_rewrite_uses(def, new_src.ssa);
1657 } else {
1658 nir_foreach_use_safe(use_src, def)
1659 nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
1660
1661 nir_foreach_if_use_safe(use_src, def)
1662 nir_if_rewrite_condition(use_src->parent_if, new_src);
1663 }
1664 }
1665
1666 static bool
is_instr_between(nir_instr * start,nir_instr * end,nir_instr * between)1667 is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
1668 {
1669 assert(start->block == end->block);
1670
1671 if (between->block != start->block)
1672 return false;
1673
1674 /* Search backwards looking for "between" */
1675 while (start != end) {
1676 if (between == end)
1677 return true;
1678
1679 end = nir_instr_prev(end);
1680 assert(end);
1681 }
1682
1683 return false;
1684 }
1685
1686 /* Replaces all uses of the given SSA def with the given source but only if
1687 * the use comes after the after_me instruction. This can be useful if you
1688 * are emitting code to fix up the result of some instruction: you can freely
1689 * use the result in that code and then call rewrite_uses_after and pass the
1690 * last fixup instruction as after_me and it will replace all of the uses you
1691 * want without touching the fixup code.
1692 *
1693 * This function assumes that after_me is in the same block as
1694 * def->parent_instr and that after_me comes after def->parent_instr.
1695 */
1696 void
nir_ssa_def_rewrite_uses_after(nir_ssa_def * def,nir_ssa_def * new_ssa,nir_instr * after_me)1697 nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_ssa_def *new_ssa,
1698 nir_instr *after_me)
1699 {
1700 if (def == new_ssa)
1701 return;
1702
1703 nir_foreach_use_safe(use_src, def) {
1704 assert(use_src->parent_instr != def->parent_instr);
1705 /* Since def already dominates all of its uses, the only way a use can
1706 * not be dominated by after_me is if it is between def and after_me in
1707 * the instruction list.
1708 */
1709 if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr))
1710 nir_instr_rewrite_src_ssa(use_src->parent_instr, use_src, new_ssa);
1711 }
1712
1713 nir_foreach_if_use_safe(use_src, def) {
1714 nir_if_rewrite_condition_ssa(use_src->parent_if,
1715 &use_src->parent_if->condition,
1716 new_ssa);
1717 }
1718 }
1719
1720 static nir_ssa_def *
get_store_value(nir_intrinsic_instr * intrin)1721 get_store_value(nir_intrinsic_instr *intrin)
1722 {
1723 assert(nir_intrinsic_has_write_mask(intrin));
1724 /* deref stores have the deref in src[0] and the store value in src[1] */
1725 if (intrin->intrinsic == nir_intrinsic_store_deref ||
1726 intrin->intrinsic == nir_intrinsic_store_deref_block_intel)
1727 return intrin->src[1].ssa;
1728
1729 /* all other stores have the store value in src[0] */
1730 return intrin->src[0].ssa;
1731 }
1732
1733 nir_component_mask_t
nir_src_components_read(const nir_src * src)1734 nir_src_components_read(const nir_src *src)
1735 {
1736 assert(src->is_ssa && src->parent_instr);
1737
1738 if (src->parent_instr->type == nir_instr_type_alu) {
1739 nir_alu_instr *alu = nir_instr_as_alu(src->parent_instr);
1740 nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src);
1741 int src_idx = alu_src - &alu->src[0];
1742 assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
1743 return nir_alu_instr_src_read_mask(alu, src_idx);
1744 } else if (src->parent_instr->type == nir_instr_type_intrinsic) {
1745 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src->parent_instr);
1746 if (nir_intrinsic_has_write_mask(intrin) && src->ssa == get_store_value(intrin))
1747 return nir_intrinsic_write_mask(intrin);
1748 else
1749 return (1 << src->ssa->num_components) - 1;
1750 } else {
1751 return (1 << src->ssa->num_components) - 1;
1752 }
1753 }
1754
1755 nir_component_mask_t
nir_ssa_def_components_read(const nir_ssa_def * def)1756 nir_ssa_def_components_read(const nir_ssa_def *def)
1757 {
1758 nir_component_mask_t read_mask = 0;
1759
1760 if (!list_is_empty(&def->if_uses))
1761 read_mask |= 1;
1762
1763 nir_foreach_use(use, def) {
1764 read_mask |= nir_src_components_read(use);
1765 if (read_mask == (1 << def->num_components) - 1)
1766 return read_mask;
1767 }
1768
1769 return read_mask;
1770 }
1771
1772 nir_block *
nir_block_unstructured_next(nir_block * block)1773 nir_block_unstructured_next(nir_block *block)
1774 {
1775 if (block == NULL) {
1776 /* nir_foreach_block_unstructured_safe() will call this function on a
1777 * NULL block after the last iteration, but it won't use the result so
1778 * just return NULL here.
1779 */
1780 return NULL;
1781 }
1782
1783 nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1784 if (cf_next == NULL && block->cf_node.parent->type == nir_cf_node_function)
1785 return NULL;
1786
1787 if (cf_next && cf_next->type == nir_cf_node_block)
1788 return nir_cf_node_as_block(cf_next);
1789
1790 return nir_block_cf_tree_next(block);
1791 }
1792
1793 nir_block *
nir_unstructured_start_block(nir_function_impl * impl)1794 nir_unstructured_start_block(nir_function_impl *impl)
1795 {
1796 return nir_start_block(impl);
1797 }
1798
1799 nir_block *
nir_block_cf_tree_next(nir_block * block)1800 nir_block_cf_tree_next(nir_block *block)
1801 {
1802 if (block == NULL) {
1803 /* nir_foreach_block_safe() will call this function on a NULL block
1804 * after the last iteration, but it won't use the result so just return
1805 * NULL here.
1806 */
1807 return NULL;
1808 }
1809
1810 assert(nir_cf_node_get_function(&block->cf_node)->structured);
1811
1812 nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1813 if (cf_next)
1814 return nir_cf_node_cf_tree_first(cf_next);
1815
1816 nir_cf_node *parent = block->cf_node.parent;
1817
1818 switch (parent->type) {
1819 case nir_cf_node_if: {
1820 /* Are we at the end of the if? Go to the beginning of the else */
1821 nir_if *if_stmt = nir_cf_node_as_if(parent);
1822 if (block == nir_if_last_then_block(if_stmt))
1823 return nir_if_first_else_block(if_stmt);
1824
1825 assert(block == nir_if_last_else_block(if_stmt));
1826 }
1827 FALLTHROUGH;
1828
1829 case nir_cf_node_loop:
1830 return nir_cf_node_as_block(nir_cf_node_next(parent));
1831
1832 case nir_cf_node_function:
1833 return NULL;
1834
1835 default:
1836 unreachable("unknown cf node type");
1837 }
1838 }
1839
1840 nir_block *
nir_block_cf_tree_prev(nir_block * block)1841 nir_block_cf_tree_prev(nir_block *block)
1842 {
1843 if (block == NULL) {
1844 /* do this for consistency with nir_block_cf_tree_next() */
1845 return NULL;
1846 }
1847
1848 assert(nir_cf_node_get_function(&block->cf_node)->structured);
1849
1850 nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
1851 if (cf_prev)
1852 return nir_cf_node_cf_tree_last(cf_prev);
1853
1854 nir_cf_node *parent = block->cf_node.parent;
1855
1856 switch (parent->type) {
1857 case nir_cf_node_if: {
1858 /* Are we at the beginning of the else? Go to the end of the if */
1859 nir_if *if_stmt = nir_cf_node_as_if(parent);
1860 if (block == nir_if_first_else_block(if_stmt))
1861 return nir_if_last_then_block(if_stmt);
1862
1863 assert(block == nir_if_first_then_block(if_stmt));
1864 }
1865 FALLTHROUGH;
1866
1867 case nir_cf_node_loop:
1868 return nir_cf_node_as_block(nir_cf_node_prev(parent));
1869
1870 case nir_cf_node_function:
1871 return NULL;
1872
1873 default:
1874 unreachable("unknown cf node type");
1875 }
1876 }
1877
nir_cf_node_cf_tree_first(nir_cf_node * node)1878 nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node)
1879 {
1880 switch (node->type) {
1881 case nir_cf_node_function: {
1882 nir_function_impl *impl = nir_cf_node_as_function(node);
1883 return nir_start_block(impl);
1884 }
1885
1886 case nir_cf_node_if: {
1887 nir_if *if_stmt = nir_cf_node_as_if(node);
1888 return nir_if_first_then_block(if_stmt);
1889 }
1890
1891 case nir_cf_node_loop: {
1892 nir_loop *loop = nir_cf_node_as_loop(node);
1893 return nir_loop_first_block(loop);
1894 }
1895
1896 case nir_cf_node_block: {
1897 return nir_cf_node_as_block(node);
1898 }
1899
1900 default:
1901 unreachable("unknown node type");
1902 }
1903 }
1904
nir_cf_node_cf_tree_last(nir_cf_node * node)1905 nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node)
1906 {
1907 switch (node->type) {
1908 case nir_cf_node_function: {
1909 nir_function_impl *impl = nir_cf_node_as_function(node);
1910 return nir_impl_last_block(impl);
1911 }
1912
1913 case nir_cf_node_if: {
1914 nir_if *if_stmt = nir_cf_node_as_if(node);
1915 return nir_if_last_else_block(if_stmt);
1916 }
1917
1918 case nir_cf_node_loop: {
1919 nir_loop *loop = nir_cf_node_as_loop(node);
1920 return nir_loop_last_block(loop);
1921 }
1922
1923 case nir_cf_node_block: {
1924 return nir_cf_node_as_block(node);
1925 }
1926
1927 default:
1928 unreachable("unknown node type");
1929 }
1930 }
1931
nir_cf_node_cf_tree_next(nir_cf_node * node)1932 nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node)
1933 {
1934 if (node->type == nir_cf_node_block)
1935 return nir_block_cf_tree_next(nir_cf_node_as_block(node));
1936 else if (node->type == nir_cf_node_function)
1937 return NULL;
1938 else
1939 return nir_cf_node_as_block(nir_cf_node_next(node));
1940 }
1941
1942 nir_if *
nir_block_get_following_if(nir_block * block)1943 nir_block_get_following_if(nir_block *block)
1944 {
1945 if (exec_node_is_tail_sentinel(&block->cf_node.node))
1946 return NULL;
1947
1948 if (nir_cf_node_is_last(&block->cf_node))
1949 return NULL;
1950
1951 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1952
1953 if (next_node->type != nir_cf_node_if)
1954 return NULL;
1955
1956 return nir_cf_node_as_if(next_node);
1957 }
1958
1959 nir_loop *
nir_block_get_following_loop(nir_block * block)1960 nir_block_get_following_loop(nir_block *block)
1961 {
1962 if (exec_node_is_tail_sentinel(&block->cf_node.node))
1963 return NULL;
1964
1965 if (nir_cf_node_is_last(&block->cf_node))
1966 return NULL;
1967
1968 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1969
1970 if (next_node->type != nir_cf_node_loop)
1971 return NULL;
1972
1973 return nir_cf_node_as_loop(next_node);
1974 }
1975
1976 static int
compare_block_index(const void * p1,const void * p2)1977 compare_block_index(const void *p1, const void *p2)
1978 {
1979 const nir_block *block1 = *((const nir_block **) p1);
1980 const nir_block *block2 = *((const nir_block **) p2);
1981
1982 return (int) block1->index - (int) block2->index;
1983 }
1984
1985 nir_block **
nir_block_get_predecessors_sorted(const nir_block * block,void * mem_ctx)1986 nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx)
1987 {
1988 nir_block **preds =
1989 ralloc_array(mem_ctx, nir_block *, block->predecessors->entries);
1990
1991 unsigned i = 0;
1992 set_foreach(block->predecessors, entry)
1993 preds[i++] = (nir_block *) entry->key;
1994 assert(i == block->predecessors->entries);
1995
1996 qsort(preds, block->predecessors->entries, sizeof(nir_block *),
1997 compare_block_index);
1998
1999 return preds;
2000 }
2001
2002 void
nir_index_blocks(nir_function_impl * impl)2003 nir_index_blocks(nir_function_impl *impl)
2004 {
2005 unsigned index = 0;
2006
2007 if (impl->valid_metadata & nir_metadata_block_index)
2008 return;
2009
2010 nir_foreach_block_unstructured(block, impl) {
2011 block->index = index++;
2012 }
2013
2014 /* The end_block isn't really part of the program, which is why its index
2015 * is >= num_blocks.
2016 */
2017 impl->num_blocks = impl->end_block->index = index;
2018 }
2019
2020 static bool
index_ssa_def_cb(nir_ssa_def * def,void * state)2021 index_ssa_def_cb(nir_ssa_def *def, void *state)
2022 {
2023 unsigned *index = (unsigned *) state;
2024 def->index = (*index)++;
2025
2026 return true;
2027 }
2028
2029 /**
2030 * The indices are applied top-to-bottom which has the very nice property
2031 * that, if A dominates B, then A->index <= B->index.
2032 */
2033 void
nir_index_ssa_defs(nir_function_impl * impl)2034 nir_index_ssa_defs(nir_function_impl *impl)
2035 {
2036 unsigned index = 0;
2037
2038 impl->valid_metadata &= ~nir_metadata_live_ssa_defs;
2039
2040 nir_foreach_block_unstructured(block, impl) {
2041 nir_foreach_instr(instr, block)
2042 nir_foreach_ssa_def(instr, index_ssa_def_cb, &index);
2043 }
2044
2045 impl->ssa_alloc = index;
2046 }
2047
2048 /**
2049 * The indices are applied top-to-bottom which has the very nice property
2050 * that, if A dominates B, then A->index <= B->index.
2051 */
2052 unsigned
nir_index_instrs(nir_function_impl * impl)2053 nir_index_instrs(nir_function_impl *impl)
2054 {
2055 unsigned index = 0;
2056
2057 nir_foreach_block(block, impl) {
2058 block->start_ip = index++;
2059
2060 nir_foreach_instr(instr, block)
2061 instr->index = index++;
2062
2063 block->end_ip = index++;
2064 }
2065
2066 return index;
2067 }
2068
2069 unsigned
nir_shader_index_vars(nir_shader * shader,nir_variable_mode modes)2070 nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes)
2071 {
2072 unsigned count = 0;
2073 nir_foreach_variable_with_modes(var, shader, modes)
2074 var->index = count++;
2075 return count;
2076 }
2077
2078 unsigned
nir_function_impl_index_vars(nir_function_impl * impl)2079 nir_function_impl_index_vars(nir_function_impl *impl)
2080 {
2081 unsigned count = 0;
2082 nir_foreach_function_temp_variable(var, impl)
2083 var->index = count++;
2084 return count;
2085 }
2086
2087 static nir_instr *
cursor_next_instr(nir_cursor cursor)2088 cursor_next_instr(nir_cursor cursor)
2089 {
2090 switch (cursor.option) {
2091 case nir_cursor_before_block:
2092 for (nir_block *block = cursor.block; block;
2093 block = nir_block_cf_tree_next(block)) {
2094 nir_instr *instr = nir_block_first_instr(block);
2095 if (instr)
2096 return instr;
2097 }
2098 return NULL;
2099
2100 case nir_cursor_after_block:
2101 cursor.block = nir_block_cf_tree_next(cursor.block);
2102 if (cursor.block == NULL)
2103 return NULL;
2104
2105 cursor.option = nir_cursor_before_block;
2106 return cursor_next_instr(cursor);
2107
2108 case nir_cursor_before_instr:
2109 return cursor.instr;
2110
2111 case nir_cursor_after_instr:
2112 if (nir_instr_next(cursor.instr))
2113 return nir_instr_next(cursor.instr);
2114
2115 cursor.option = nir_cursor_after_block;
2116 cursor.block = cursor.instr->block;
2117 return cursor_next_instr(cursor);
2118 }
2119
2120 unreachable("Inavlid cursor option");
2121 }
2122
2123 ASSERTED static bool
dest_is_ssa(nir_dest * dest,void * _state)2124 dest_is_ssa(nir_dest *dest, void *_state)
2125 {
2126 (void) _state;
2127 return dest->is_ssa;
2128 }
2129
2130 bool
nir_function_impl_lower_instructions(nir_function_impl * impl,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2131 nir_function_impl_lower_instructions(nir_function_impl *impl,
2132 nir_instr_filter_cb filter,
2133 nir_lower_instr_cb lower,
2134 void *cb_data)
2135 {
2136 nir_builder b;
2137 nir_builder_init(&b, impl);
2138
2139 nir_metadata preserved = nir_metadata_block_index |
2140 nir_metadata_dominance;
2141
2142 bool progress = false;
2143 nir_cursor iter = nir_before_cf_list(&impl->body);
2144 nir_instr *instr;
2145 while ((instr = cursor_next_instr(iter)) != NULL) {
2146 if (filter && !filter(instr, cb_data)) {
2147 iter = nir_after_instr(instr);
2148 continue;
2149 }
2150
2151 assert(nir_foreach_dest(instr, dest_is_ssa, NULL));
2152 nir_ssa_def *old_def = nir_instr_ssa_def(instr);
2153 struct list_head old_uses, old_if_uses;
2154 if (old_def != NULL) {
2155 /* We're about to ask the callback to generate a replacement for instr.
2156 * Save off the uses from instr's SSA def so we know what uses to
2157 * rewrite later. If we use nir_ssa_def_rewrite_uses, it fails in the
2158 * case where the generated replacement code uses the result of instr
2159 * itself. If we use nir_ssa_def_rewrite_uses_after (which is the
2160 * normal solution to this problem), it doesn't work well if control-
2161 * flow is inserted as part of the replacement, doesn't handle cases
2162 * where the replacement is something consumed by instr, and suffers
2163 * from performance issues. This is the only way to 100% guarantee
2164 * that we rewrite the correct set efficiently.
2165 */
2166
2167 list_replace(&old_def->uses, &old_uses);
2168 list_inithead(&old_def->uses);
2169 list_replace(&old_def->if_uses, &old_if_uses);
2170 list_inithead(&old_def->if_uses);
2171 }
2172
2173 b.cursor = nir_after_instr(instr);
2174 nir_ssa_def *new_def = lower(&b, instr, cb_data);
2175 if (new_def && new_def != NIR_LOWER_INSTR_PROGRESS &&
2176 new_def != NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2177 assert(old_def != NULL);
2178 if (new_def->parent_instr->block != instr->block)
2179 preserved = nir_metadata_none;
2180
2181 nir_src new_src = nir_src_for_ssa(new_def);
2182 list_for_each_entry_safe(nir_src, use_src, &old_uses, use_link)
2183 nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
2184
2185 list_for_each_entry_safe(nir_src, use_src, &old_if_uses, use_link)
2186 nir_if_rewrite_condition(use_src->parent_if, new_src);
2187
2188 if (nir_ssa_def_is_unused(old_def)) {
2189 iter = nir_instr_free_and_dce(instr);
2190 } else {
2191 iter = nir_after_instr(instr);
2192 }
2193 progress = true;
2194 } else {
2195 /* We didn't end up lowering after all. Put the uses back */
2196 if (old_def) {
2197 list_replace(&old_uses, &old_def->uses);
2198 list_replace(&old_if_uses, &old_def->if_uses);
2199 }
2200 if (new_def == NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2201 /* Only instructions without a return value can be removed like this */
2202 assert(!old_def);
2203 iter = nir_instr_free_and_dce(instr);
2204 progress = true;
2205 } else
2206 iter = nir_after_instr(instr);
2207
2208 if (new_def == NIR_LOWER_INSTR_PROGRESS)
2209 progress = true;
2210 }
2211 }
2212
2213 if (progress) {
2214 nir_metadata_preserve(impl, preserved);
2215 } else {
2216 nir_metadata_preserve(impl, nir_metadata_all);
2217 }
2218
2219 return progress;
2220 }
2221
2222 bool
nir_shader_lower_instructions(nir_shader * shader,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2223 nir_shader_lower_instructions(nir_shader *shader,
2224 nir_instr_filter_cb filter,
2225 nir_lower_instr_cb lower,
2226 void *cb_data)
2227 {
2228 bool progress = false;
2229
2230 nir_foreach_function(function, shader) {
2231 if (function->impl &&
2232 nir_function_impl_lower_instructions(function->impl,
2233 filter, lower, cb_data))
2234 progress = true;
2235 }
2236
2237 return progress;
2238 }
2239
2240 /**
2241 * Returns true if the shader supports quad-based implicit derivatives on
2242 * texture sampling.
2243 */
nir_shader_supports_implicit_lod(nir_shader * shader)2244 bool nir_shader_supports_implicit_lod(nir_shader *shader)
2245 {
2246 return (shader->info.stage == MESA_SHADER_FRAGMENT ||
2247 (shader->info.stage == MESA_SHADER_COMPUTE &&
2248 shader->info.cs.derivative_group != DERIVATIVE_GROUP_NONE));
2249 }
2250
2251 nir_intrinsic_op
nir_intrinsic_from_system_value(gl_system_value val)2252 nir_intrinsic_from_system_value(gl_system_value val)
2253 {
2254 switch (val) {
2255 case SYSTEM_VALUE_VERTEX_ID:
2256 return nir_intrinsic_load_vertex_id;
2257 case SYSTEM_VALUE_INSTANCE_ID:
2258 return nir_intrinsic_load_instance_id;
2259 case SYSTEM_VALUE_DRAW_ID:
2260 return nir_intrinsic_load_draw_id;
2261 case SYSTEM_VALUE_BASE_INSTANCE:
2262 return nir_intrinsic_load_base_instance;
2263 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
2264 return nir_intrinsic_load_vertex_id_zero_base;
2265 case SYSTEM_VALUE_IS_INDEXED_DRAW:
2266 return nir_intrinsic_load_is_indexed_draw;
2267 case SYSTEM_VALUE_FIRST_VERTEX:
2268 return nir_intrinsic_load_first_vertex;
2269 case SYSTEM_VALUE_BASE_VERTEX:
2270 return nir_intrinsic_load_base_vertex;
2271 case SYSTEM_VALUE_INVOCATION_ID:
2272 return nir_intrinsic_load_invocation_id;
2273 case SYSTEM_VALUE_FRAG_COORD:
2274 return nir_intrinsic_load_frag_coord;
2275 case SYSTEM_VALUE_POINT_COORD:
2276 return nir_intrinsic_load_point_coord;
2277 case SYSTEM_VALUE_LINE_COORD:
2278 return nir_intrinsic_load_line_coord;
2279 case SYSTEM_VALUE_FRONT_FACE:
2280 return nir_intrinsic_load_front_face;
2281 case SYSTEM_VALUE_SAMPLE_ID:
2282 return nir_intrinsic_load_sample_id;
2283 case SYSTEM_VALUE_SAMPLE_POS:
2284 return nir_intrinsic_load_sample_pos;
2285 case SYSTEM_VALUE_SAMPLE_MASK_IN:
2286 return nir_intrinsic_load_sample_mask_in;
2287 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
2288 return nir_intrinsic_load_local_invocation_id;
2289 case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
2290 return nir_intrinsic_load_local_invocation_index;
2291 case SYSTEM_VALUE_WORKGROUP_ID:
2292 return nir_intrinsic_load_workgroup_id;
2293 case SYSTEM_VALUE_NUM_WORKGROUPS:
2294 return nir_intrinsic_load_num_workgroups;
2295 case SYSTEM_VALUE_PRIMITIVE_ID:
2296 return nir_intrinsic_load_primitive_id;
2297 case SYSTEM_VALUE_TESS_COORD:
2298 return nir_intrinsic_load_tess_coord;
2299 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
2300 return nir_intrinsic_load_tess_level_outer;
2301 case SYSTEM_VALUE_TESS_LEVEL_INNER:
2302 return nir_intrinsic_load_tess_level_inner;
2303 case SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT:
2304 return nir_intrinsic_load_tess_level_outer_default;
2305 case SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT:
2306 return nir_intrinsic_load_tess_level_inner_default;
2307 case SYSTEM_VALUE_VERTICES_IN:
2308 return nir_intrinsic_load_patch_vertices_in;
2309 case SYSTEM_VALUE_HELPER_INVOCATION:
2310 return nir_intrinsic_load_helper_invocation;
2311 case SYSTEM_VALUE_COLOR0:
2312 return nir_intrinsic_load_color0;
2313 case SYSTEM_VALUE_COLOR1:
2314 return nir_intrinsic_load_color1;
2315 case SYSTEM_VALUE_VIEW_INDEX:
2316 return nir_intrinsic_load_view_index;
2317 case SYSTEM_VALUE_SUBGROUP_SIZE:
2318 return nir_intrinsic_load_subgroup_size;
2319 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
2320 return nir_intrinsic_load_subgroup_invocation;
2321 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
2322 return nir_intrinsic_load_subgroup_eq_mask;
2323 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
2324 return nir_intrinsic_load_subgroup_ge_mask;
2325 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
2326 return nir_intrinsic_load_subgroup_gt_mask;
2327 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
2328 return nir_intrinsic_load_subgroup_le_mask;
2329 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
2330 return nir_intrinsic_load_subgroup_lt_mask;
2331 case SYSTEM_VALUE_NUM_SUBGROUPS:
2332 return nir_intrinsic_load_num_subgroups;
2333 case SYSTEM_VALUE_SUBGROUP_ID:
2334 return nir_intrinsic_load_subgroup_id;
2335 case SYSTEM_VALUE_WORKGROUP_SIZE:
2336 return nir_intrinsic_load_workgroup_size;
2337 case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
2338 return nir_intrinsic_load_global_invocation_id;
2339 case SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID:
2340 return nir_intrinsic_load_base_global_invocation_id;
2341 case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX:
2342 return nir_intrinsic_load_global_invocation_index;
2343 case SYSTEM_VALUE_WORK_DIM:
2344 return nir_intrinsic_load_work_dim;
2345 case SYSTEM_VALUE_USER_DATA_AMD:
2346 return nir_intrinsic_load_user_data_amd;
2347 case SYSTEM_VALUE_RAY_LAUNCH_ID:
2348 return nir_intrinsic_load_ray_launch_id;
2349 case SYSTEM_VALUE_RAY_LAUNCH_SIZE:
2350 return nir_intrinsic_load_ray_launch_size;
2351 case SYSTEM_VALUE_RAY_WORLD_ORIGIN:
2352 return nir_intrinsic_load_ray_world_origin;
2353 case SYSTEM_VALUE_RAY_WORLD_DIRECTION:
2354 return nir_intrinsic_load_ray_world_direction;
2355 case SYSTEM_VALUE_RAY_OBJECT_ORIGIN:
2356 return nir_intrinsic_load_ray_object_origin;
2357 case SYSTEM_VALUE_RAY_OBJECT_DIRECTION:
2358 return nir_intrinsic_load_ray_object_direction;
2359 case SYSTEM_VALUE_RAY_T_MIN:
2360 return nir_intrinsic_load_ray_t_min;
2361 case SYSTEM_VALUE_RAY_T_MAX:
2362 return nir_intrinsic_load_ray_t_max;
2363 case SYSTEM_VALUE_RAY_OBJECT_TO_WORLD:
2364 return nir_intrinsic_load_ray_object_to_world;
2365 case SYSTEM_VALUE_RAY_WORLD_TO_OBJECT:
2366 return nir_intrinsic_load_ray_world_to_object;
2367 case SYSTEM_VALUE_RAY_HIT_KIND:
2368 return nir_intrinsic_load_ray_hit_kind;
2369 case SYSTEM_VALUE_RAY_FLAGS:
2370 return nir_intrinsic_load_ray_flags;
2371 case SYSTEM_VALUE_RAY_GEOMETRY_INDEX:
2372 return nir_intrinsic_load_ray_geometry_index;
2373 case SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX:
2374 return nir_intrinsic_load_ray_instance_custom_index;
2375 case SYSTEM_VALUE_FRAG_SHADING_RATE:
2376 return nir_intrinsic_load_frag_shading_rate;
2377 default:
2378 unreachable("system value does not directly correspond to intrinsic");
2379 }
2380 }
2381
2382 gl_system_value
nir_system_value_from_intrinsic(nir_intrinsic_op intrin)2383 nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
2384 {
2385 switch (intrin) {
2386 case nir_intrinsic_load_vertex_id:
2387 return SYSTEM_VALUE_VERTEX_ID;
2388 case nir_intrinsic_load_instance_id:
2389 return SYSTEM_VALUE_INSTANCE_ID;
2390 case nir_intrinsic_load_draw_id:
2391 return SYSTEM_VALUE_DRAW_ID;
2392 case nir_intrinsic_load_base_instance:
2393 return SYSTEM_VALUE_BASE_INSTANCE;
2394 case nir_intrinsic_load_vertex_id_zero_base:
2395 return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
2396 case nir_intrinsic_load_first_vertex:
2397 return SYSTEM_VALUE_FIRST_VERTEX;
2398 case nir_intrinsic_load_is_indexed_draw:
2399 return SYSTEM_VALUE_IS_INDEXED_DRAW;
2400 case nir_intrinsic_load_base_vertex:
2401 return SYSTEM_VALUE_BASE_VERTEX;
2402 case nir_intrinsic_load_invocation_id:
2403 return SYSTEM_VALUE_INVOCATION_ID;
2404 case nir_intrinsic_load_frag_coord:
2405 return SYSTEM_VALUE_FRAG_COORD;
2406 case nir_intrinsic_load_point_coord:
2407 return SYSTEM_VALUE_POINT_COORD;
2408 case nir_intrinsic_load_line_coord:
2409 return SYSTEM_VALUE_LINE_COORD;
2410 case nir_intrinsic_load_front_face:
2411 return SYSTEM_VALUE_FRONT_FACE;
2412 case nir_intrinsic_load_sample_id:
2413 return SYSTEM_VALUE_SAMPLE_ID;
2414 case nir_intrinsic_load_sample_pos:
2415 return SYSTEM_VALUE_SAMPLE_POS;
2416 case nir_intrinsic_load_sample_mask_in:
2417 return SYSTEM_VALUE_SAMPLE_MASK_IN;
2418 case nir_intrinsic_load_local_invocation_id:
2419 return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
2420 case nir_intrinsic_load_local_invocation_index:
2421 return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
2422 case nir_intrinsic_load_num_workgroups:
2423 return SYSTEM_VALUE_NUM_WORKGROUPS;
2424 case nir_intrinsic_load_workgroup_id:
2425 return SYSTEM_VALUE_WORKGROUP_ID;
2426 case nir_intrinsic_load_primitive_id:
2427 return SYSTEM_VALUE_PRIMITIVE_ID;
2428 case nir_intrinsic_load_tess_coord:
2429 return SYSTEM_VALUE_TESS_COORD;
2430 case nir_intrinsic_load_tess_level_outer:
2431 return SYSTEM_VALUE_TESS_LEVEL_OUTER;
2432 case nir_intrinsic_load_tess_level_inner:
2433 return SYSTEM_VALUE_TESS_LEVEL_INNER;
2434 case nir_intrinsic_load_tess_level_outer_default:
2435 return SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT;
2436 case nir_intrinsic_load_tess_level_inner_default:
2437 return SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT;
2438 case nir_intrinsic_load_patch_vertices_in:
2439 return SYSTEM_VALUE_VERTICES_IN;
2440 case nir_intrinsic_load_helper_invocation:
2441 return SYSTEM_VALUE_HELPER_INVOCATION;
2442 case nir_intrinsic_load_color0:
2443 return SYSTEM_VALUE_COLOR0;
2444 case nir_intrinsic_load_color1:
2445 return SYSTEM_VALUE_COLOR1;
2446 case nir_intrinsic_load_view_index:
2447 return SYSTEM_VALUE_VIEW_INDEX;
2448 case nir_intrinsic_load_subgroup_size:
2449 return SYSTEM_VALUE_SUBGROUP_SIZE;
2450 case nir_intrinsic_load_subgroup_invocation:
2451 return SYSTEM_VALUE_SUBGROUP_INVOCATION;
2452 case nir_intrinsic_load_subgroup_eq_mask:
2453 return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
2454 case nir_intrinsic_load_subgroup_ge_mask:
2455 return SYSTEM_VALUE_SUBGROUP_GE_MASK;
2456 case nir_intrinsic_load_subgroup_gt_mask:
2457 return SYSTEM_VALUE_SUBGROUP_GT_MASK;
2458 case nir_intrinsic_load_subgroup_le_mask:
2459 return SYSTEM_VALUE_SUBGROUP_LE_MASK;
2460 case nir_intrinsic_load_subgroup_lt_mask:
2461 return SYSTEM_VALUE_SUBGROUP_LT_MASK;
2462 case nir_intrinsic_load_num_subgroups:
2463 return SYSTEM_VALUE_NUM_SUBGROUPS;
2464 case nir_intrinsic_load_subgroup_id:
2465 return SYSTEM_VALUE_SUBGROUP_ID;
2466 case nir_intrinsic_load_workgroup_size:
2467 return SYSTEM_VALUE_WORKGROUP_SIZE;
2468 case nir_intrinsic_load_global_invocation_id:
2469 return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
2470 case nir_intrinsic_load_base_global_invocation_id:
2471 return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID;
2472 case nir_intrinsic_load_global_invocation_index:
2473 return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX;
2474 case nir_intrinsic_load_work_dim:
2475 return SYSTEM_VALUE_WORK_DIM;
2476 case nir_intrinsic_load_user_data_amd:
2477 return SYSTEM_VALUE_USER_DATA_AMD;
2478 case nir_intrinsic_load_barycentric_model:
2479 return SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL;
2480 case nir_intrinsic_load_gs_header_ir3:
2481 return SYSTEM_VALUE_GS_HEADER_IR3;
2482 case nir_intrinsic_load_tcs_header_ir3:
2483 return SYSTEM_VALUE_TCS_HEADER_IR3;
2484 case nir_intrinsic_load_ray_launch_id:
2485 return SYSTEM_VALUE_RAY_LAUNCH_ID;
2486 case nir_intrinsic_load_ray_launch_size:
2487 return SYSTEM_VALUE_RAY_LAUNCH_SIZE;
2488 case nir_intrinsic_load_ray_world_origin:
2489 return SYSTEM_VALUE_RAY_WORLD_ORIGIN;
2490 case nir_intrinsic_load_ray_world_direction:
2491 return SYSTEM_VALUE_RAY_WORLD_DIRECTION;
2492 case nir_intrinsic_load_ray_object_origin:
2493 return SYSTEM_VALUE_RAY_OBJECT_ORIGIN;
2494 case nir_intrinsic_load_ray_object_direction:
2495 return SYSTEM_VALUE_RAY_OBJECT_DIRECTION;
2496 case nir_intrinsic_load_ray_t_min:
2497 return SYSTEM_VALUE_RAY_T_MIN;
2498 case nir_intrinsic_load_ray_t_max:
2499 return SYSTEM_VALUE_RAY_T_MAX;
2500 case nir_intrinsic_load_ray_object_to_world:
2501 return SYSTEM_VALUE_RAY_OBJECT_TO_WORLD;
2502 case nir_intrinsic_load_ray_world_to_object:
2503 return SYSTEM_VALUE_RAY_WORLD_TO_OBJECT;
2504 case nir_intrinsic_load_ray_hit_kind:
2505 return SYSTEM_VALUE_RAY_HIT_KIND;
2506 case nir_intrinsic_load_ray_flags:
2507 return SYSTEM_VALUE_RAY_FLAGS;
2508 case nir_intrinsic_load_ray_geometry_index:
2509 return SYSTEM_VALUE_RAY_GEOMETRY_INDEX;
2510 case nir_intrinsic_load_ray_instance_custom_index:
2511 return SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX;
2512 case nir_intrinsic_load_frag_shading_rate:
2513 return SYSTEM_VALUE_FRAG_SHADING_RATE;
2514 default:
2515 unreachable("intrinsic doesn't produce a system value");
2516 }
2517 }
2518
2519 /* OpenGL utility method that remaps the location attributes if they are
2520 * doubles. Not needed for vulkan due the differences on the input location
2521 * count for doubles on vulkan vs OpenGL
2522 *
2523 * The bitfield returned in dual_slot is one bit for each double input slot in
2524 * the original OpenGL single-slot input numbering. The mapping from old
2525 * locations to new locations is as follows:
2526 *
2527 * new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc))
2528 */
2529 void
nir_remap_dual_slot_attributes(nir_shader * shader,uint64_t * dual_slot)2530 nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot)
2531 {
2532 assert(shader->info.stage == MESA_SHADER_VERTEX);
2533
2534 *dual_slot = 0;
2535 nir_foreach_shader_in_variable(var, shader) {
2536 if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
2537 unsigned slots = glsl_count_attribute_slots(var->type, true);
2538 *dual_slot |= BITFIELD64_MASK(slots) << var->data.location;
2539 }
2540 }
2541
2542 nir_foreach_shader_in_variable(var, shader) {
2543 var->data.location +=
2544 util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location));
2545 }
2546 }
2547
2548 /* Returns an attribute mask that has been re-compacted using the given
2549 * dual_slot mask.
2550 */
2551 uint64_t
nir_get_single_slot_attribs_mask(uint64_t attribs,uint64_t dual_slot)2552 nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot)
2553 {
2554 while (dual_slot) {
2555 unsigned loc = u_bit_scan64(&dual_slot);
2556 /* mask of all bits up to and including loc */
2557 uint64_t mask = BITFIELD64_MASK(loc + 1);
2558 attribs = (attribs & mask) | ((attribs & ~mask) >> 1);
2559 }
2560 return attribs;
2561 }
2562
2563 void
nir_rewrite_image_intrinsic(nir_intrinsic_instr * intrin,nir_ssa_def * src,bool bindless)2564 nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *src,
2565 bool bindless)
2566 {
2567 enum gl_access_qualifier access = nir_intrinsic_access(intrin);
2568
2569 /* Image intrinsics only have one of these */
2570 assert(!nir_intrinsic_has_src_type(intrin) ||
2571 !nir_intrinsic_has_dest_type(intrin));
2572
2573 nir_alu_type data_type = nir_type_invalid;
2574 if (nir_intrinsic_has_src_type(intrin))
2575 data_type = nir_intrinsic_src_type(intrin);
2576 if (nir_intrinsic_has_dest_type(intrin))
2577 data_type = nir_intrinsic_dest_type(intrin);
2578
2579 switch (intrin->intrinsic) {
2580 #define CASE(op) \
2581 case nir_intrinsic_image_deref_##op: \
2582 intrin->intrinsic = bindless ? nir_intrinsic_bindless_image_##op \
2583 : nir_intrinsic_image_##op; \
2584 break;
2585 CASE(load)
2586 CASE(sparse_load)
2587 CASE(store)
2588 CASE(atomic_add)
2589 CASE(atomic_imin)
2590 CASE(atomic_umin)
2591 CASE(atomic_imax)
2592 CASE(atomic_umax)
2593 CASE(atomic_and)
2594 CASE(atomic_or)
2595 CASE(atomic_xor)
2596 CASE(atomic_exchange)
2597 CASE(atomic_comp_swap)
2598 CASE(atomic_fadd)
2599 CASE(atomic_fmin)
2600 CASE(atomic_fmax)
2601 CASE(atomic_inc_wrap)
2602 CASE(atomic_dec_wrap)
2603 CASE(size)
2604 CASE(samples)
2605 CASE(load_raw_intel)
2606 CASE(store_raw_intel)
2607 #undef CASE
2608 default:
2609 unreachable("Unhanded image intrinsic");
2610 }
2611
2612 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2613 nir_variable *var = nir_deref_instr_get_variable(deref);
2614
2615 /* Only update the format if the intrinsic doesn't have one set */
2616 if (nir_intrinsic_format(intrin) == PIPE_FORMAT_NONE)
2617 nir_intrinsic_set_format(intrin, var->data.image.format);
2618
2619 nir_intrinsic_set_access(intrin, access | var->data.access);
2620 if (nir_intrinsic_has_src_type(intrin))
2621 nir_intrinsic_set_src_type(intrin, data_type);
2622 if (nir_intrinsic_has_dest_type(intrin))
2623 nir_intrinsic_set_dest_type(intrin, data_type);
2624
2625 nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
2626 nir_src_for_ssa(src));
2627 }
2628
2629 unsigned
nir_image_intrinsic_coord_components(const nir_intrinsic_instr * instr)2630 nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr)
2631 {
2632 enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
2633 int coords = glsl_get_sampler_dim_coordinate_components(dim);
2634 if (dim == GLSL_SAMPLER_DIM_CUBE)
2635 return coords;
2636 else
2637 return coords + nir_intrinsic_image_array(instr);
2638 }
2639
2640 nir_src *
nir_get_shader_call_payload_src(nir_intrinsic_instr * call)2641 nir_get_shader_call_payload_src(nir_intrinsic_instr *call)
2642 {
2643 switch (call->intrinsic) {
2644 case nir_intrinsic_trace_ray:
2645 case nir_intrinsic_rt_trace_ray:
2646 return &call->src[10];
2647 case nir_intrinsic_execute_callable:
2648 case nir_intrinsic_rt_execute_callable:
2649 return &call->src[1];
2650 default:
2651 unreachable("Not a call intrinsic");
2652 return NULL;
2653 }
2654 }
2655
nir_chase_binding(nir_src rsrc)2656 nir_binding nir_chase_binding(nir_src rsrc)
2657 {
2658 nir_binding res = {0};
2659 if (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2660 const struct glsl_type *type = glsl_without_array(nir_src_as_deref(rsrc)->type);
2661 bool is_image = glsl_type_is_image(type) || glsl_type_is_sampler(type);
2662 while (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2663 nir_deref_instr *deref = nir_src_as_deref(rsrc);
2664
2665 if (deref->deref_type == nir_deref_type_var) {
2666 res.success = true;
2667 res.var = deref->var;
2668 res.desc_set = deref->var->data.descriptor_set;
2669 res.binding = deref->var->data.binding;
2670 return res;
2671 } else if (deref->deref_type == nir_deref_type_array && is_image) {
2672 if (res.num_indices == ARRAY_SIZE(res.indices))
2673 return (nir_binding){0};
2674 res.indices[res.num_indices++] = deref->arr.index;
2675 }
2676
2677 rsrc = deref->parent;
2678 }
2679 }
2680
2681 /* Skip copies and trimming. Trimming can appear as nir_op_mov instructions
2682 * when removing the offset from addresses. We also consider nir_op_is_vec()
2683 * instructions to skip trimming of vec2_index_32bit_offset addresses after
2684 * lowering ALU to scalar.
2685 */
2686 while (true) {
2687 nir_alu_instr *alu = nir_src_as_alu_instr(rsrc);
2688 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2689 if (alu && alu->op == nir_op_mov) {
2690 for (unsigned i = 0; i < alu->dest.dest.ssa.num_components; i++) {
2691 if (alu->src[0].swizzle[i] != i)
2692 return (nir_binding){0};
2693 }
2694 rsrc = alu->src[0].src;
2695 } else if (alu && nir_op_is_vec(alu->op)) {
2696 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
2697 if (alu->src[i].swizzle[0] != i || alu->src[i].src.ssa != alu->src[0].src.ssa)
2698 return (nir_binding){0};
2699 }
2700 rsrc = alu->src[0].src;
2701 } else if (intrin && intrin->intrinsic == nir_intrinsic_read_first_invocation) {
2702 /* The caller might want to be aware if only the first invocation of
2703 * the indices are used.
2704 */
2705 res.read_first_invocation = true;
2706 rsrc = intrin->src[0];
2707 } else {
2708 break;
2709 }
2710 }
2711
2712 if (nir_src_is_const(rsrc)) {
2713 /* GL binding model after deref lowering */
2714 res.success = true;
2715 res.binding = nir_src_as_uint(rsrc);
2716 return res;
2717 }
2718
2719 /* otherwise, must be Vulkan binding model after deref lowering or GL bindless */
2720
2721 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2722 if (!intrin)
2723 return (nir_binding){0};
2724
2725 /* skip load_vulkan_descriptor */
2726 if (intrin->intrinsic == nir_intrinsic_load_vulkan_descriptor) {
2727 intrin = nir_src_as_intrinsic(intrin->src[0]);
2728 if (!intrin)
2729 return (nir_binding){0};
2730 }
2731
2732 if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
2733 return (nir_binding){0};
2734
2735 assert(res.num_indices == 0);
2736 res.success = true;
2737 res.desc_set = nir_intrinsic_desc_set(intrin);
2738 res.binding = nir_intrinsic_binding(intrin);
2739 res.num_indices = 1;
2740 res.indices[0] = intrin->src[0];
2741 return res;
2742 }
2743
nir_get_binding_variable(nir_shader * shader,nir_binding binding)2744 nir_variable *nir_get_binding_variable(nir_shader *shader, nir_binding binding)
2745 {
2746 nir_variable *binding_var = NULL;
2747 unsigned count = 0;
2748
2749 if (!binding.success)
2750 return NULL;
2751
2752 if (binding.var)
2753 return binding.var;
2754
2755 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ubo | nir_var_mem_ssbo) {
2756 if (var->data.descriptor_set == binding.desc_set && var->data.binding == binding.binding) {
2757 binding_var = var;
2758 count++;
2759 }
2760 }
2761
2762 /* Be conservative if another variable is using the same binding/desc_set
2763 * because the access mask might be different and we can't get it reliably.
2764 */
2765 if (count > 1)
2766 return NULL;
2767
2768 return binding_var;
2769 }
2770
2771 bool
nir_alu_instr_is_copy(nir_alu_instr * instr)2772 nir_alu_instr_is_copy(nir_alu_instr *instr)
2773 {
2774 assert(instr->src[0].src.is_ssa);
2775
2776 if (instr->op == nir_op_mov) {
2777 return !instr->dest.saturate &&
2778 !instr->src[0].abs &&
2779 !instr->src[0].negate;
2780 } else if (nir_op_is_vec(instr->op)) {
2781 for (unsigned i = 0; i < instr->dest.dest.ssa.num_components; i++) {
2782 if (instr->src[i].abs || instr->src[i].negate)
2783 return false;
2784 }
2785 return !instr->dest.saturate;
2786 } else {
2787 return false;
2788 }
2789 }
2790
2791 nir_ssa_scalar
nir_ssa_scalar_chase_movs(nir_ssa_scalar s)2792 nir_ssa_scalar_chase_movs(nir_ssa_scalar s)
2793 {
2794 while (nir_ssa_scalar_is_alu(s)) {
2795 nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
2796 if (!nir_alu_instr_is_copy(alu))
2797 break;
2798
2799 if (alu->op == nir_op_mov) {
2800 s.def = alu->src[0].src.ssa;
2801 s.comp = alu->src[0].swizzle[s.comp];
2802 } else {
2803 assert(nir_op_is_vec(alu->op));
2804 s.def = alu->src[s.comp].src.ssa;
2805 s.comp = alu->src[s.comp].swizzle[0];
2806 }
2807 }
2808
2809 return s;
2810 }
2811