1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 *
26 */
27
28 #include "nir.h"
29 #include <assert.h>
30 #include <limits.h>
31 #include <math.h>
32 #include "util/half_float.h"
33 #include "util/macros.h"
34 #include "util/u_math.h"
35 #include "util/u_qsort.h"
36 #include "nir_builder.h"
37 #include "nir_control_flow_private.h"
38 #include "nir_worklist.h"
39
40 #ifndef NDEBUG
41 uint32_t nir_debug = 0;
42 bool nir_debug_print_shader[MESA_SHADER_KERNEL + 1] = { 0 };
43
44 static const struct debug_named_value nir_debug_control[] = {
45 { "clone", NIR_DEBUG_CLONE,
46 "Test cloning a shader at each successful lowering/optimization call" },
47 { "serialize", NIR_DEBUG_SERIALIZE,
48 "Test serialize and deserialize shader at each successful lowering/optimization call" },
49 { "novalidate", NIR_DEBUG_NOVALIDATE,
50 "Disable shader validation at each successful lowering/optimization call" },
51 { "tgsi", NIR_DEBUG_TGSI,
52 "Dump NIR/TGSI shaders when doing a NIR<->TGSI translation" },
53 { "print", NIR_DEBUG_PRINT,
54 "Dump resulting shader after each successful lowering/optimization call" },
55 { "print_vs", NIR_DEBUG_PRINT_VS,
56 "Dump resulting vertex shader after each successful lowering/optimization call" },
57 { "print_tcs", NIR_DEBUG_PRINT_TCS,
58 "Dump resulting tessellation control shader after each successful lowering/optimization call" },
59 { "print_tes", NIR_DEBUG_PRINT_TES,
60 "Dump resulting tessellation evaluation shader after each successful lowering/optimization call" },
61 { "print_gs", NIR_DEBUG_PRINT_GS,
62 "Dump resulting geometry shader after each successful lowering/optimization call" },
63 { "print_fs", NIR_DEBUG_PRINT_FS,
64 "Dump resulting fragment shader after each successful lowering/optimization call" },
65 { "print_cs", NIR_DEBUG_PRINT_CS,
66 "Dump resulting compute shader after each successful lowering/optimization call" },
67 { "print_ts", NIR_DEBUG_PRINT_TS,
68 "Dump resulting task shader after each successful lowering/optimization call" },
69 { "print_ms", NIR_DEBUG_PRINT_MS,
70 "Dump resulting mesh shader after each successful lowering/optimization call" },
71 { "print_rgs", NIR_DEBUG_PRINT_RGS,
72 "Dump resulting raygen shader after each successful lowering/optimization call" },
73 { "print_ahs", NIR_DEBUG_PRINT_AHS,
74 "Dump resulting any-hit shader after each successful lowering/optimization call" },
75 { "print_chs", NIR_DEBUG_PRINT_CHS,
76 "Dump resulting closest-hit shader after each successful lowering/optimization call" },
77 { "print_mhs", NIR_DEBUG_PRINT_MHS,
78 "Dump resulting miss-hit shader after each successful lowering/optimization call" },
79 { "print_is", NIR_DEBUG_PRINT_IS,
80 "Dump resulting intersection shader after each successful lowering/optimization call" },
81 { "print_cbs", NIR_DEBUG_PRINT_CBS,
82 "Dump resulting callable shader after each successful lowering/optimization call" },
83 { "print_ks", NIR_DEBUG_PRINT_KS,
84 "Dump resulting kernel shader after each successful lowering/optimization call" },
85 { "print_no_inline_consts", NIR_DEBUG_PRINT_NO_INLINE_CONSTS,
86 "Do not print const value near each use of const SSA variable" },
87 { "print_internal", NIR_DEBUG_PRINT_INTERNAL,
88 "Print shaders even if they are marked as internal" },
89 { "print_pass_flags", NIR_DEBUG_PRINT_PASS_FLAGS,
90 "Print pass_flags for every instruction when pass_flags are non-zero" },
91 DEBUG_NAMED_VALUE_END
92 };
93
94 DEBUG_GET_ONCE_FLAGS_OPTION(nir_debug, "NIR_DEBUG", nir_debug_control, 0)
95
96 static void
nir_process_debug_variable_once(void)97 nir_process_debug_variable_once(void)
98 {
99 nir_debug = debug_get_option_nir_debug();
100
101 /* clang-format off */
102 nir_debug_print_shader[MESA_SHADER_VERTEX] = NIR_DEBUG(PRINT_VS);
103 nir_debug_print_shader[MESA_SHADER_TESS_CTRL] = NIR_DEBUG(PRINT_TCS);
104 nir_debug_print_shader[MESA_SHADER_TESS_EVAL] = NIR_DEBUG(PRINT_TES);
105 nir_debug_print_shader[MESA_SHADER_GEOMETRY] = NIR_DEBUG(PRINT_GS);
106 nir_debug_print_shader[MESA_SHADER_FRAGMENT] = NIR_DEBUG(PRINT_FS);
107 nir_debug_print_shader[MESA_SHADER_COMPUTE] = NIR_DEBUG(PRINT_CS);
108 nir_debug_print_shader[MESA_SHADER_TASK] = NIR_DEBUG(PRINT_TS);
109 nir_debug_print_shader[MESA_SHADER_MESH] = NIR_DEBUG(PRINT_MS);
110 nir_debug_print_shader[MESA_SHADER_RAYGEN] = NIR_DEBUG(PRINT_RGS);
111 nir_debug_print_shader[MESA_SHADER_ANY_HIT] = NIR_DEBUG(PRINT_AHS);
112 nir_debug_print_shader[MESA_SHADER_CLOSEST_HIT] = NIR_DEBUG(PRINT_CHS);
113 nir_debug_print_shader[MESA_SHADER_MISS] = NIR_DEBUG(PRINT_MHS);
114 nir_debug_print_shader[MESA_SHADER_INTERSECTION] = NIR_DEBUG(PRINT_IS);
115 nir_debug_print_shader[MESA_SHADER_CALLABLE] = NIR_DEBUG(PRINT_CBS);
116 nir_debug_print_shader[MESA_SHADER_KERNEL] = NIR_DEBUG(PRINT_KS);
117 /* clang-format on */
118 }
119
120 void
nir_process_debug_variable(void)121 nir_process_debug_variable(void)
122 {
123 static once_flag flag = ONCE_FLAG_INIT;
124 call_once(&flag, nir_process_debug_variable_once);
125 }
126 #endif
127
128 /** Return true if the component mask "mask" with bit size "old_bit_size" can
129 * be re-interpreted to be used with "new_bit_size".
130 */
131 bool
nir_component_mask_can_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)132 nir_component_mask_can_reinterpret(nir_component_mask_t mask,
133 unsigned old_bit_size,
134 unsigned new_bit_size)
135 {
136 assert(util_is_power_of_two_nonzero(old_bit_size));
137 assert(util_is_power_of_two_nonzero(new_bit_size));
138
139 if (old_bit_size == new_bit_size)
140 return true;
141
142 if (old_bit_size == 1 || new_bit_size == 1)
143 return false;
144
145 if (old_bit_size > new_bit_size) {
146 unsigned ratio = old_bit_size / new_bit_size;
147 return util_last_bit(mask) * ratio <= NIR_MAX_VEC_COMPONENTS;
148 }
149
150 unsigned iter = mask;
151 while (iter) {
152 int start, count;
153 u_bit_scan_consecutive_range(&iter, &start, &count);
154 start *= old_bit_size;
155 count *= old_bit_size;
156 if (start % new_bit_size != 0)
157 return false;
158 if (count % new_bit_size != 0)
159 return false;
160 }
161 return true;
162 }
163
164 /** Re-interprets a component mask "mask" with bit size "old_bit_size" so that
165 * it can be used can be used with "new_bit_size".
166 */
167 nir_component_mask_t
nir_component_mask_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)168 nir_component_mask_reinterpret(nir_component_mask_t mask,
169 unsigned old_bit_size,
170 unsigned new_bit_size)
171 {
172 assert(nir_component_mask_can_reinterpret(mask, old_bit_size, new_bit_size));
173
174 if (old_bit_size == new_bit_size)
175 return mask;
176
177 nir_component_mask_t new_mask = 0;
178 unsigned iter = mask;
179 while (iter) {
180 int start, count;
181 u_bit_scan_consecutive_range(&iter, &start, &count);
182 start = start * old_bit_size / new_bit_size;
183 count = count * old_bit_size / new_bit_size;
184 new_mask |= BITFIELD_RANGE(start, count);
185 }
186 return new_mask;
187 }
188
189 nir_shader *
nir_shader_create(void * mem_ctx,gl_shader_stage stage,const nir_shader_compiler_options * options,shader_info * si)190 nir_shader_create(void *mem_ctx,
191 gl_shader_stage stage,
192 const nir_shader_compiler_options *options,
193 shader_info *si)
194 {
195 nir_shader *shader = rzalloc(mem_ctx, nir_shader);
196
197 shader->gctx = gc_context(shader);
198
199 #ifndef NDEBUG
200 nir_process_debug_variable();
201 #endif
202
203 exec_list_make_empty(&shader->variables);
204
205 shader->options = options;
206
207 if (si) {
208 assert(si->stage == stage);
209 shader->info = *si;
210 } else {
211 shader->info.stage = stage;
212 }
213
214 exec_list_make_empty(&shader->functions);
215
216 shader->num_inputs = 0;
217 shader->num_outputs = 0;
218 shader->num_uniforms = 0;
219
220 return shader;
221 }
222
223 void
nir_shader_add_variable(nir_shader * shader,nir_variable * var)224 nir_shader_add_variable(nir_shader *shader, nir_variable *var)
225 {
226 switch (var->data.mode) {
227 case nir_var_function_temp:
228 assert(!"nir_shader_add_variable cannot be used for local variables");
229 return;
230
231 case nir_var_shader_temp:
232 case nir_var_shader_in:
233 case nir_var_shader_out:
234 case nir_var_uniform:
235 case nir_var_mem_ubo:
236 case nir_var_mem_ssbo:
237 case nir_var_image:
238 case nir_var_mem_shared:
239 case nir_var_system_value:
240 case nir_var_mem_push_const:
241 case nir_var_mem_constant:
242 case nir_var_shader_call_data:
243 case nir_var_ray_hit_attrib:
244 case nir_var_mem_task_payload:
245 case nir_var_mem_node_payload:
246 case nir_var_mem_node_payload_in:
247 case nir_var_mem_global:
248 break;
249
250 default:
251 assert(!"invalid mode");
252 return;
253 }
254
255 exec_list_push_tail(&shader->variables, &var->node);
256 }
257
258 nir_variable *
nir_variable_create(nir_shader * shader,nir_variable_mode mode,const struct glsl_type * type,const char * name)259 nir_variable_create(nir_shader *shader, nir_variable_mode mode,
260 const struct glsl_type *type, const char *name)
261 {
262 nir_variable *var = rzalloc(shader, nir_variable);
263 var->name = ralloc_strdup(var, name);
264 var->type = type;
265 var->data.mode = mode;
266 var->data.how_declared = nir_var_declared_normally;
267
268 if ((mode == nir_var_shader_in &&
269 shader->info.stage != MESA_SHADER_VERTEX &&
270 shader->info.stage != MESA_SHADER_KERNEL) ||
271 (mode == nir_var_shader_out &&
272 shader->info.stage != MESA_SHADER_FRAGMENT))
273 var->data.interpolation = INTERP_MODE_SMOOTH;
274
275 if (mode == nir_var_shader_in || mode == nir_var_uniform)
276 var->data.read_only = true;
277
278 nir_shader_add_variable(shader, var);
279
280 return var;
281 }
282
283 nir_variable *
nir_local_variable_create(nir_function_impl * impl,const struct glsl_type * type,const char * name)284 nir_local_variable_create(nir_function_impl *impl,
285 const struct glsl_type *type, const char *name)
286 {
287 nir_variable *var = rzalloc(impl->function->shader, nir_variable);
288 var->name = ralloc_strdup(var, name);
289 var->type = type;
290 var->data.mode = nir_var_function_temp;
291
292 nir_function_impl_add_variable(impl, var);
293
294 return var;
295 }
296
297 nir_variable *
nir_state_variable_create(nir_shader * shader,const struct glsl_type * type,const char * name,const gl_state_index16 tokens[STATE_LENGTH])298 nir_state_variable_create(nir_shader *shader,
299 const struct glsl_type *type,
300 const char *name,
301 const gl_state_index16 tokens[STATE_LENGTH])
302 {
303 nir_variable *var = nir_variable_create(shader, nir_var_uniform, type, name);
304 var->num_state_slots = 1;
305 var->state_slots = rzalloc_array(var, nir_state_slot, 1);
306 memcpy(var->state_slots[0].tokens, tokens,
307 sizeof(var->state_slots[0].tokens));
308 shader->num_uniforms++;
309 return var;
310 }
311
312 nir_variable *
nir_create_variable_with_location(nir_shader * shader,nir_variable_mode mode,int location,const struct glsl_type * type)313 nir_create_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location,
314 const struct glsl_type *type)
315 {
316 /* Only supporting non-array, or arrayed-io types, because otherwise we don't
317 * know how much to increment num_inputs/outputs
318 */
319 assert(glsl_type_is_vector_or_scalar(type) || glsl_type_is_unsized_array(type));
320
321 const char *name;
322 switch (mode) {
323 case nir_var_shader_in:
324 if (shader->info.stage == MESA_SHADER_VERTEX)
325 name = gl_vert_attrib_name(location);
326 else
327 name = gl_varying_slot_name_for_stage(location, shader->info.stage);
328 break;
329
330 case nir_var_shader_out:
331 if (shader->info.stage == MESA_SHADER_FRAGMENT)
332 name = gl_frag_result_name(location);
333 else
334 name = gl_varying_slot_name_for_stage(location, shader->info.stage);
335 break;
336
337 case nir_var_system_value:
338 name = gl_system_value_name(location);
339 break;
340
341 default:
342 unreachable("Unsupported variable mode");
343 }
344
345 nir_variable *var = nir_variable_create(shader, mode, type, name);
346 var->data.location = location;
347
348 switch (mode) {
349 case nir_var_shader_in:
350 var->data.driver_location = shader->num_inputs++;
351 break;
352
353 case nir_var_shader_out:
354 var->data.driver_location = shader->num_outputs++;
355 break;
356
357 case nir_var_system_value:
358 break;
359
360 default:
361 unreachable("Unsupported variable mode");
362 }
363
364 return var;
365 }
366
367 nir_variable *
nir_get_variable_with_location(nir_shader * shader,nir_variable_mode mode,int location,const struct glsl_type * type)368 nir_get_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location,
369 const struct glsl_type *type)
370 {
371 nir_variable *var = nir_find_variable_with_location(shader, mode, location);
372 if (var) {
373 /* If this shader has location_fracs, this builder function is not suitable. */
374 assert(var->data.location_frac == 0);
375
376 /* The variable for the slot should match what we expected. */
377 assert(type == var->type);
378 return var;
379 }
380
381 return nir_create_variable_with_location(shader, mode, location, type);
382 }
383
384 nir_variable *
nir_find_variable_with_location(nir_shader * shader,nir_variable_mode mode,unsigned location)385 nir_find_variable_with_location(nir_shader *shader,
386 nir_variable_mode mode,
387 unsigned location)
388 {
389 assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
390 nir_foreach_variable_with_modes(var, shader, mode) {
391 if (var->data.location == location)
392 return var;
393 }
394 return NULL;
395 }
396
397 nir_variable *
nir_find_variable_with_driver_location(nir_shader * shader,nir_variable_mode mode,unsigned location)398 nir_find_variable_with_driver_location(nir_shader *shader,
399 nir_variable_mode mode,
400 unsigned location)
401 {
402 assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
403 nir_foreach_variable_with_modes(var, shader, mode) {
404 if (var->data.driver_location == location)
405 return var;
406 }
407 return NULL;
408 }
409
410 nir_variable *
nir_find_state_variable(nir_shader * s,gl_state_index16 tokens[STATE_LENGTH])411 nir_find_state_variable(nir_shader *s,
412 gl_state_index16 tokens[STATE_LENGTH])
413 {
414 nir_foreach_variable_with_modes(var, s, nir_var_uniform) {
415 if (var->num_state_slots == 1 &&
416 !memcmp(var->state_slots[0].tokens, tokens,
417 sizeof(var->state_slots[0].tokens)))
418 return var;
419 }
420 return NULL;
421 }
422
nir_find_sampler_variable_with_tex_index(nir_shader * shader,unsigned texture_index)423 nir_variable *nir_find_sampler_variable_with_tex_index(nir_shader *shader,
424 unsigned texture_index)
425 {
426 nir_foreach_variable_with_modes(var, shader, nir_var_uniform) {
427 unsigned size =
428 glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
429 if ((glsl_type_is_texture(glsl_without_array(var->type)) ||
430 glsl_type_is_sampler(glsl_without_array(var->type))) &&
431 (var->data.binding == texture_index ||
432 (var->data.binding < texture_index &&
433 var->data.binding + size > texture_index)))
434 return var;
435 }
436 return NULL;
437 }
438
439 /* Annoyingly, qsort_r is not in the C standard library and, in particular, we
440 * can't count on it on MSV and Android. So we stuff the CMP function into
441 * each array element. It's a bit messy and burns more memory but the list of
442 * variables should hever be all that long.
443 */
444 struct var_cmp {
445 nir_variable *var;
446 int (*cmp)(const nir_variable *, const nir_variable *);
447 };
448
449 static int
var_sort_cmp(const void * _a,const void * _b,void * _cmp)450 var_sort_cmp(const void *_a, const void *_b, void *_cmp)
451 {
452 const struct var_cmp *a = _a;
453 const struct var_cmp *b = _b;
454 assert(a->cmp == b->cmp);
455 return a->cmp(a->var, b->var);
456 }
457
458 void
nir_sort_variables_with_modes(nir_shader * shader,int (* cmp)(const nir_variable *,const nir_variable *),nir_variable_mode modes)459 nir_sort_variables_with_modes(nir_shader *shader,
460 int (*cmp)(const nir_variable *,
461 const nir_variable *),
462 nir_variable_mode modes)
463 {
464 unsigned num_vars = 0;
465 nir_foreach_variable_with_modes(var, shader, modes) {
466 ++num_vars;
467 }
468 struct var_cmp *vars = ralloc_array(shader, struct var_cmp, num_vars);
469 unsigned i = 0;
470 nir_foreach_variable_with_modes_safe(var, shader, modes) {
471 exec_node_remove(&var->node);
472 vars[i++] = (struct var_cmp){
473 .var = var,
474 .cmp = cmp,
475 };
476 }
477 assert(i == num_vars);
478
479 util_qsort_r(vars, num_vars, sizeof(*vars), var_sort_cmp, cmp);
480
481 for (i = 0; i < num_vars; i++)
482 exec_list_push_tail(&shader->variables, &vars[i].var->node);
483
484 ralloc_free(vars);
485 }
486
487 nir_function *
nir_function_create(nir_shader * shader,const char * name)488 nir_function_create(nir_shader *shader, const char *name)
489 {
490 nir_function *func = ralloc(shader, nir_function);
491
492 exec_list_push_tail(&shader->functions, &func->node);
493
494 func->name = ralloc_strdup(func, name);
495 func->shader = shader;
496 func->num_params = 0;
497 func->params = NULL;
498 func->impl = NULL;
499 func->is_entrypoint = false;
500 func->is_preamble = false;
501 func->dont_inline = false;
502 func->should_inline = false;
503 func->driver_attributes = 0;
504 func->is_subroutine = false;
505 func->is_tmp_globals_wrapper = false;
506 func->subroutine_index = 0;
507 func->num_subroutine_types = 0;
508 func->subroutine_types = NULL;
509 func->workgroup_size[0] = 0;
510 func->workgroup_size[1] = 0;
511 func->workgroup_size[2] = 0;
512
513 /* Only meaningful for shader libraries, so don't export by default. */
514 func->is_exported = false;
515
516 return func;
517 }
518
519 void
nir_alu_src_copy(nir_alu_src * dest,const nir_alu_src * src)520 nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src)
521 {
522 dest->src = nir_src_for_ssa(src->src.ssa);
523 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
524 dest->swizzle[i] = src->swizzle[i];
525 }
526
527 bool
nir_alu_src_is_trivial_ssa(const nir_alu_instr * alu,unsigned srcn)528 nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn)
529 {
530 static uint8_t trivial_swizzle[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
531 STATIC_ASSERT(ARRAY_SIZE(trivial_swizzle) == NIR_MAX_VEC_COMPONENTS);
532
533 const nir_alu_src *src = &alu->src[srcn];
534 unsigned num_components = nir_ssa_alu_instr_src_components(alu, srcn);
535
536 return (src->src.ssa->num_components == num_components) &&
537 (memcmp(src->swizzle, trivial_swizzle, num_components) == 0);
538 }
539
540 static void
cf_init(nir_cf_node * node,nir_cf_node_type type)541 cf_init(nir_cf_node *node, nir_cf_node_type type)
542 {
543 exec_node_init(&node->node);
544 node->parent = NULL;
545 node->type = type;
546 }
547
548 nir_function_impl *
nir_function_impl_create_bare(nir_shader * shader)549 nir_function_impl_create_bare(nir_shader *shader)
550 {
551 nir_function_impl *impl = ralloc(shader, nir_function_impl);
552
553 impl->function = NULL;
554 impl->preamble = NULL;
555
556 cf_init(&impl->cf_node, nir_cf_node_function);
557
558 exec_list_make_empty(&impl->body);
559 exec_list_make_empty(&impl->locals);
560 impl->ssa_alloc = 0;
561 impl->num_blocks = 0;
562 impl->valid_metadata = nir_metadata_none;
563 impl->structured = true;
564
565 /* create start & end blocks */
566 nir_block *start_block = nir_block_create(shader);
567 nir_block *end_block = nir_block_create(shader);
568 start_block->cf_node.parent = &impl->cf_node;
569 end_block->cf_node.parent = &impl->cf_node;
570 impl->end_block = end_block;
571
572 exec_list_push_tail(&impl->body, &start_block->cf_node.node);
573
574 start_block->successors[0] = end_block;
575 _mesa_set_add(end_block->predecessors, start_block);
576 return impl;
577 }
578
579 nir_function_impl *
nir_function_impl_create(nir_function * function)580 nir_function_impl_create(nir_function *function)
581 {
582 assert(function->impl == NULL);
583
584 nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
585 nir_function_set_impl(function, impl);
586 return impl;
587 }
588
589 nir_block *
nir_block_create(nir_shader * shader)590 nir_block_create(nir_shader *shader)
591 {
592 nir_block *block = rzalloc(shader, nir_block);
593
594 cf_init(&block->cf_node, nir_cf_node_block);
595
596 block->successors[0] = block->successors[1] = NULL;
597 block->predecessors = _mesa_pointer_set_create(block);
598 block->imm_dom = NULL;
599 /* XXX maybe it would be worth it to defer allocation? This
600 * way it doesn't get allocated for shader refs that never run
601 * nir_calc_dominance? For example, state-tracker creates an
602 * initial IR, clones that, runs appropriate lowering pass, passes
603 * to driver which does common lowering/opt, and then stores ref
604 * which is later used to do state specific lowering and futher
605 * opt. Do any of the references not need dominance metadata?
606 */
607 block->dom_frontier = _mesa_pointer_set_create(block);
608
609 exec_list_make_empty(&block->instr_list);
610
611 return block;
612 }
613
614 static inline void
src_init(nir_src * src)615 src_init(nir_src *src)
616 {
617 src->ssa = NULL;
618 }
619
620 nir_if *
nir_if_create(nir_shader * shader)621 nir_if_create(nir_shader *shader)
622 {
623 nir_if *if_stmt = ralloc(shader, nir_if);
624
625 if_stmt->control = nir_selection_control_none;
626
627 cf_init(&if_stmt->cf_node, nir_cf_node_if);
628 src_init(&if_stmt->condition);
629
630 nir_block *then = nir_block_create(shader);
631 exec_list_make_empty(&if_stmt->then_list);
632 exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
633 then->cf_node.parent = &if_stmt->cf_node;
634
635 nir_block *else_stmt = nir_block_create(shader);
636 exec_list_make_empty(&if_stmt->else_list);
637 exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
638 else_stmt->cf_node.parent = &if_stmt->cf_node;
639
640 return if_stmt;
641 }
642
643 nir_loop *
nir_loop_create(nir_shader * shader)644 nir_loop_create(nir_shader *shader)
645 {
646 nir_loop *loop = rzalloc(shader, nir_loop);
647
648 cf_init(&loop->cf_node, nir_cf_node_loop);
649 /* Assume that loops are divergent until proven otherwise */
650 loop->divergent_break = true;
651 loop->divergent_continue = true;
652
653 nir_block *body = nir_block_create(shader);
654 exec_list_make_empty(&loop->body);
655 exec_list_push_tail(&loop->body, &body->cf_node.node);
656 body->cf_node.parent = &loop->cf_node;
657
658 body->successors[0] = body;
659 _mesa_set_add(body->predecessors, body);
660
661 exec_list_make_empty(&loop->continue_list);
662
663 return loop;
664 }
665
666 static void
instr_init(nir_instr * instr,nir_instr_type type)667 instr_init(nir_instr *instr, nir_instr_type type)
668 {
669 instr->type = type;
670 instr->block = NULL;
671 exec_node_init(&instr->node);
672 }
673
674 static void
alu_src_init(nir_alu_src * src)675 alu_src_init(nir_alu_src *src)
676 {
677 src_init(&src->src);
678 for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
679 src->swizzle[i] = i;
680 }
681
682 nir_alu_instr *
nir_alu_instr_create(nir_shader * shader,nir_op op)683 nir_alu_instr_create(nir_shader *shader, nir_op op)
684 {
685 unsigned num_srcs = nir_op_infos[op].num_inputs;
686 nir_alu_instr *instr = gc_zalloc_zla(shader->gctx, nir_alu_instr, nir_alu_src, num_srcs);
687
688 instr_init(&instr->instr, nir_instr_type_alu);
689 instr->op = op;
690 for (unsigned i = 0; i < num_srcs; i++)
691 alu_src_init(&instr->src[i]);
692
693 return instr;
694 }
695
696 nir_deref_instr *
nir_deref_instr_create(nir_shader * shader,nir_deref_type deref_type)697 nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
698 {
699 nir_deref_instr *instr = gc_zalloc(shader->gctx, nir_deref_instr, 1);
700
701 instr_init(&instr->instr, nir_instr_type_deref);
702
703 instr->deref_type = deref_type;
704 if (deref_type != nir_deref_type_var)
705 src_init(&instr->parent);
706
707 if (deref_type == nir_deref_type_array ||
708 deref_type == nir_deref_type_ptr_as_array)
709 src_init(&instr->arr.index);
710
711 return instr;
712 }
713
714 nir_jump_instr *
nir_jump_instr_create(nir_shader * shader,nir_jump_type type)715 nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
716 {
717 nir_jump_instr *instr = gc_alloc(shader->gctx, nir_jump_instr, 1);
718 instr_init(&instr->instr, nir_instr_type_jump);
719 src_init(&instr->condition);
720 instr->type = type;
721 instr->target = NULL;
722 instr->else_target = NULL;
723
724 return instr;
725 }
726
727 nir_load_const_instr *
nir_load_const_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)728 nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
729 unsigned bit_size)
730 {
731 nir_load_const_instr *instr =
732 gc_zalloc_zla(shader->gctx, nir_load_const_instr, nir_const_value, num_components);
733 instr_init(&instr->instr, nir_instr_type_load_const);
734
735 nir_def_init(&instr->instr, &instr->def, num_components, bit_size);
736
737 return instr;
738 }
739
740 nir_intrinsic_instr *
nir_intrinsic_instr_create(nir_shader * shader,nir_intrinsic_op op)741 nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
742 {
743 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
744 nir_intrinsic_instr *instr =
745 gc_zalloc_zla(shader->gctx, nir_intrinsic_instr, nir_src, num_srcs);
746
747 instr_init(&instr->instr, nir_instr_type_intrinsic);
748 instr->intrinsic = op;
749
750 for (unsigned i = 0; i < num_srcs; i++)
751 src_init(&instr->src[i]);
752
753 return instr;
754 }
755
756 nir_call_instr *
nir_call_instr_create(nir_shader * shader,nir_function * callee)757 nir_call_instr_create(nir_shader *shader, nir_function *callee)
758 {
759 const unsigned num_params = callee->num_params;
760 nir_call_instr *instr =
761 gc_zalloc_zla(shader->gctx, nir_call_instr, nir_src, num_params);
762
763 instr_init(&instr->instr, nir_instr_type_call);
764 instr->callee = callee;
765 instr->num_params = num_params;
766 for (unsigned i = 0; i < num_params; i++)
767 src_init(&instr->params[i]);
768
769 return instr;
770 }
771
772 static int8_t default_tg4_offsets[4][2] = {
773 { 0, 1 },
774 { 1, 1 },
775 { 1, 0 },
776 { 0, 0 },
777 };
778
779 nir_tex_instr *
nir_tex_instr_create(nir_shader * shader,unsigned num_srcs)780 nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
781 {
782 nir_tex_instr *instr = gc_zalloc(shader->gctx, nir_tex_instr, 1);
783 instr_init(&instr->instr, nir_instr_type_tex);
784
785 instr->num_srcs = num_srcs;
786 instr->src = gc_alloc(shader->gctx, nir_tex_src, num_srcs);
787 for (unsigned i = 0; i < num_srcs; i++)
788 src_init(&instr->src[i].src);
789
790 instr->texture_index = 0;
791 instr->sampler_index = 0;
792 memcpy(instr->tg4_offsets, default_tg4_offsets, sizeof(instr->tg4_offsets));
793
794 return instr;
795 }
796
797 void
nir_tex_instr_add_src(nir_tex_instr * tex,nir_tex_src_type src_type,nir_def * src)798 nir_tex_instr_add_src(nir_tex_instr *tex,
799 nir_tex_src_type src_type,
800 nir_def *src)
801 {
802 nir_tex_src *new_srcs = gc_zalloc(gc_get_context(tex), nir_tex_src, tex->num_srcs + 1);
803
804 for (unsigned i = 0; i < tex->num_srcs; i++) {
805 new_srcs[i].src_type = tex->src[i].src_type;
806 nir_instr_move_src(&tex->instr, &new_srcs[i].src,
807 &tex->src[i].src);
808 }
809
810 gc_free(tex->src);
811 tex->src = new_srcs;
812
813 tex->src[tex->num_srcs].src_type = src_type;
814 nir_instr_init_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
815 tex->num_srcs++;
816 }
817
818 void
nir_tex_instr_remove_src(nir_tex_instr * tex,unsigned src_idx)819 nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
820 {
821 assert(src_idx < tex->num_srcs);
822
823 /* First rewrite the source to NIR_SRC_INIT */
824 nir_instr_clear_src(&tex->instr, &tex->src[src_idx].src);
825
826 /* Now, move all of the other sources down */
827 for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
828 tex->src[i - 1].src_type = tex->src[i].src_type;
829 nir_instr_move_src(&tex->instr, &tex->src[i - 1].src, &tex->src[i].src);
830 }
831 tex->num_srcs--;
832 }
833
834 bool
nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr * tex)835 nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex)
836 {
837 if (tex->op != nir_texop_tg4)
838 return false;
839 return memcmp(tex->tg4_offsets, default_tg4_offsets,
840 sizeof(tex->tg4_offsets)) != 0;
841 }
842
843 nir_phi_instr *
nir_phi_instr_create(nir_shader * shader)844 nir_phi_instr_create(nir_shader *shader)
845 {
846 nir_phi_instr *instr = gc_alloc(shader->gctx, nir_phi_instr, 1);
847 instr_init(&instr->instr, nir_instr_type_phi);
848
849 exec_list_make_empty(&instr->srcs);
850
851 return instr;
852 }
853
854 /**
855 * Adds a new source to a NIR instruction.
856 *
857 * Note that this does not update the def/use relationship for src, assuming
858 * that the instr is not in the shader. If it is, you have to do:
859 *
860 * list_addtail(&phi_src->src.use_link, &src.ssa->uses);
861 */
862 nir_phi_src *
nir_phi_instr_add_src(nir_phi_instr * instr,nir_block * pred,nir_def * src)863 nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_def *src)
864 {
865 nir_phi_src *phi_src;
866
867 phi_src = gc_zalloc(gc_get_context(instr), nir_phi_src, 1);
868 phi_src->pred = pred;
869 phi_src->src = nir_src_for_ssa(src);
870 nir_src_set_parent_instr(&phi_src->src, &instr->instr);
871 exec_list_push_tail(&instr->srcs, &phi_src->node);
872
873 return phi_src;
874 }
875
876 nir_parallel_copy_instr *
nir_parallel_copy_instr_create(nir_shader * shader)877 nir_parallel_copy_instr_create(nir_shader *shader)
878 {
879 nir_parallel_copy_instr *instr = gc_alloc(shader->gctx, nir_parallel_copy_instr, 1);
880 instr_init(&instr->instr, nir_instr_type_parallel_copy);
881
882 exec_list_make_empty(&instr->entries);
883
884 return instr;
885 }
886
887 nir_debug_info_instr *
nir_debug_info_instr_create(nir_shader * shader,nir_debug_info_type type,uint32_t string_length)888 nir_debug_info_instr_create(nir_shader *shader, nir_debug_info_type type,
889 uint32_t string_length)
890 {
891 uint32_t additional_size = 0;
892 if (type == nir_debug_info_string)
893 additional_size = string_length + 1;
894
895 nir_debug_info_instr *instr = gc_zalloc_size(
896 shader->gctx, sizeof(nir_debug_info_instr) + additional_size, 1);
897 instr_init(&instr->instr, nir_instr_type_debug_info);
898
899 instr->type = type;
900
901 if (type == nir_debug_info_string)
902 instr->string_length = string_length;
903
904 return instr;
905 }
906
907 nir_undef_instr *
nir_undef_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)908 nir_undef_instr_create(nir_shader *shader,
909 unsigned num_components,
910 unsigned bit_size)
911 {
912 nir_undef_instr *instr = gc_alloc(shader->gctx, nir_undef_instr, 1);
913 instr_init(&instr->instr, nir_instr_type_undef);
914
915 nir_def_init(&instr->instr, &instr->def, num_components, bit_size);
916
917 return instr;
918 }
919
920 static nir_const_value
const_value_float(double d,unsigned bit_size)921 const_value_float(double d, unsigned bit_size)
922 {
923 nir_const_value v;
924 memset(&v, 0, sizeof(v));
925
926 /* clang-format off */
927 switch (bit_size) {
928 case 16: v.u16 = _mesa_float_to_half(d); break;
929 case 32: v.f32 = d; break;
930 case 64: v.f64 = d; break;
931 default:
932 unreachable("Invalid bit size");
933 }
934 /* clang-format on */
935
936 return v;
937 }
938
939 static nir_const_value
const_value_int(int64_t i,unsigned bit_size)940 const_value_int(int64_t i, unsigned bit_size)
941 {
942 nir_const_value v;
943 memset(&v, 0, sizeof(v));
944
945 /* clang-format off */
946 switch (bit_size) {
947 case 1: v.b = i & 1; break;
948 case 8: v.i8 = i; break;
949 case 16: v.i16 = i; break;
950 case 32: v.i32 = i; break;
951 case 64: v.i64 = i; break;
952 default:
953 unreachable("Invalid bit size");
954 }
955 /* clang-format on */
956
957 return v;
958 }
959
960 nir_const_value
nir_alu_binop_identity(nir_op binop,unsigned bit_size)961 nir_alu_binop_identity(nir_op binop, unsigned bit_size)
962 {
963 const int64_t max_int = (1ull << (bit_size - 1)) - 1;
964 const int64_t min_int = -max_int - 1;
965 switch (binop) {
966 case nir_op_iadd:
967 return const_value_int(0, bit_size);
968 case nir_op_fadd:
969 return const_value_float(0, bit_size);
970 case nir_op_imul:
971 return const_value_int(1, bit_size);
972 case nir_op_fmul:
973 return const_value_float(1, bit_size);
974 case nir_op_imin:
975 return const_value_int(max_int, bit_size);
976 case nir_op_umin:
977 return const_value_int(~0ull, bit_size);
978 case nir_op_fmin:
979 return const_value_float(INFINITY, bit_size);
980 case nir_op_imax:
981 return const_value_int(min_int, bit_size);
982 case nir_op_umax:
983 return const_value_int(0, bit_size);
984 case nir_op_fmax:
985 return const_value_float(-INFINITY, bit_size);
986 case nir_op_iand:
987 return const_value_int(~0ull, bit_size);
988 case nir_op_ior:
989 return const_value_int(0, bit_size);
990 case nir_op_ixor:
991 return const_value_int(0, bit_size);
992 default:
993 unreachable("Invalid reduction operation");
994 }
995 }
996
997 nir_function_impl *
nir_cf_node_get_function(nir_cf_node * node)998 nir_cf_node_get_function(nir_cf_node *node)
999 {
1000 while (node->type != nir_cf_node_function) {
1001 node = node->parent;
1002 }
1003
1004 return nir_cf_node_as_function(node);
1005 }
1006
1007 /* Reduces a cursor by trying to convert everything to after and trying to
1008 * go up to block granularity when possible.
1009 */
1010 static nir_cursor
reduce_cursor(nir_cursor cursor)1011 reduce_cursor(nir_cursor cursor)
1012 {
1013 switch (cursor.option) {
1014 case nir_cursor_before_block:
1015 if (exec_list_is_empty(&cursor.block->instr_list)) {
1016 /* Empty block. After is as good as before. */
1017 cursor.option = nir_cursor_after_block;
1018 }
1019 return cursor;
1020
1021 case nir_cursor_after_block:
1022 return cursor;
1023
1024 case nir_cursor_before_instr: {
1025 nir_instr *prev_instr = nir_instr_prev(cursor.instr);
1026 if (prev_instr) {
1027 /* Before this instruction is after the previous */
1028 cursor.instr = prev_instr;
1029 cursor.option = nir_cursor_after_instr;
1030 } else {
1031 /* No previous instruction. Switch to before block */
1032 cursor.block = cursor.instr->block;
1033 cursor.option = nir_cursor_before_block;
1034 }
1035 return reduce_cursor(cursor);
1036 }
1037
1038 case nir_cursor_after_instr:
1039 if (nir_instr_next(cursor.instr) == NULL) {
1040 /* This is the last instruction, switch to after block */
1041 cursor.option = nir_cursor_after_block;
1042 cursor.block = cursor.instr->block;
1043 }
1044 return cursor;
1045
1046 default:
1047 unreachable("Inavlid cursor option");
1048 }
1049 }
1050
1051 bool
nir_cursors_equal(nir_cursor a,nir_cursor b)1052 nir_cursors_equal(nir_cursor a, nir_cursor b)
1053 {
1054 /* Reduced cursors should be unique */
1055 a = reduce_cursor(a);
1056 b = reduce_cursor(b);
1057
1058 return a.block == b.block && a.option == b.option;
1059 }
1060
1061 static bool
add_use_cb(nir_src * src,void * state)1062 add_use_cb(nir_src *src, void *state)
1063 {
1064 nir_instr *instr = state;
1065
1066 nir_src_set_parent_instr(src, instr);
1067 list_addtail(&src->use_link, &src->ssa->uses);
1068
1069 return true;
1070 }
1071
1072 static bool
add_ssa_def_cb(nir_def * def,void * state)1073 add_ssa_def_cb(nir_def *def, void *state)
1074 {
1075 nir_instr *instr = state;
1076
1077 if (instr->block && def->index == UINT_MAX) {
1078 nir_function_impl *impl =
1079 nir_cf_node_get_function(&instr->block->cf_node);
1080
1081 def->index = impl->ssa_alloc++;
1082
1083 impl->valid_metadata &= ~nir_metadata_live_defs;
1084 }
1085
1086 return true;
1087 }
1088
1089 static void
add_defs_uses(nir_instr * instr)1090 add_defs_uses(nir_instr *instr)
1091 {
1092 nir_foreach_src(instr, add_use_cb, instr);
1093 nir_foreach_def(instr, add_ssa_def_cb, instr);
1094 }
1095
1096 void
nir_instr_insert(nir_cursor cursor,nir_instr * instr)1097 nir_instr_insert(nir_cursor cursor, nir_instr *instr)
1098 {
1099 switch (cursor.option) {
1100 case nir_cursor_before_block:
1101 /* Only allow inserting jumps into empty blocks. */
1102 if (instr->type == nir_instr_type_jump)
1103 assert(exec_list_is_empty(&cursor.block->instr_list));
1104
1105 instr->block = cursor.block;
1106 add_defs_uses(instr);
1107 exec_list_push_head(&cursor.block->instr_list, &instr->node);
1108 break;
1109 case nir_cursor_after_block: {
1110 /* Inserting instructions after a jump is illegal. */
1111 nir_instr *last = nir_block_last_instr(cursor.block);
1112 assert(last == NULL || last->type != nir_instr_type_jump);
1113 (void)last;
1114
1115 instr->block = cursor.block;
1116 add_defs_uses(instr);
1117 exec_list_push_tail(&cursor.block->instr_list, &instr->node);
1118 break;
1119 }
1120 case nir_cursor_before_instr:
1121 assert(instr->type != nir_instr_type_jump);
1122 instr->block = cursor.instr->block;
1123 add_defs_uses(instr);
1124 exec_node_insert_node_before(&cursor.instr->node, &instr->node);
1125 break;
1126 case nir_cursor_after_instr:
1127 /* Inserting instructions after a jump is illegal. */
1128 assert(cursor.instr->type != nir_instr_type_jump);
1129
1130 /* Only allow inserting jumps at the end of the block. */
1131 if (instr->type == nir_instr_type_jump)
1132 assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
1133
1134 instr->block = cursor.instr->block;
1135 add_defs_uses(instr);
1136 exec_node_insert_after(&cursor.instr->node, &instr->node);
1137 break;
1138 }
1139
1140 if (instr->type == nir_instr_type_jump)
1141 nir_handle_add_jump(instr->block);
1142
1143 nir_function_impl *impl = nir_cf_node_get_function(&instr->block->cf_node);
1144 impl->valid_metadata &= ~nir_metadata_instr_index;
1145 }
1146
1147 bool
nir_instr_move(nir_cursor cursor,nir_instr * instr)1148 nir_instr_move(nir_cursor cursor, nir_instr *instr)
1149 {
1150 /* If the cursor happens to refer to this instruction (either before or
1151 * after), don't do anything.
1152 */
1153 switch (cursor.option) {
1154 case nir_cursor_before_instr:
1155 if (cursor.instr == instr || nir_instr_prev(cursor.instr) == instr)
1156 return false;
1157 break;
1158 case nir_cursor_after_instr:
1159 if (cursor.instr == instr || nir_instr_next(cursor.instr) == instr)
1160 return false;
1161 break;
1162 case nir_cursor_before_block:
1163 if (cursor.block == instr->block && nir_instr_is_first(instr))
1164 return false;
1165 break;
1166 case nir_cursor_after_block:
1167 if (cursor.block == instr->block && nir_instr_is_last(instr))
1168 return false;
1169 break;
1170 }
1171
1172 nir_instr_remove(instr);
1173 nir_instr_insert(cursor, instr);
1174 return true;
1175 }
1176
1177 static bool
src_is_valid(const nir_src * src)1178 src_is_valid(const nir_src *src)
1179 {
1180 return (src->ssa != NULL);
1181 }
1182
1183 static bool
remove_use_cb(nir_src * src,void * state)1184 remove_use_cb(nir_src *src, void *state)
1185 {
1186 (void)state;
1187
1188 if (src_is_valid(src))
1189 list_del(&src->use_link);
1190
1191 return true;
1192 }
1193
1194 static void
remove_defs_uses(nir_instr * instr)1195 remove_defs_uses(nir_instr *instr)
1196 {
1197 nir_foreach_src(instr, remove_use_cb, instr);
1198 }
1199
1200 void
nir_instr_remove_v(nir_instr * instr)1201 nir_instr_remove_v(nir_instr *instr)
1202 {
1203 remove_defs_uses(instr);
1204 exec_node_remove(&instr->node);
1205
1206 if (instr->type == nir_instr_type_jump) {
1207 nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
1208 nir_handle_remove_jump(instr->block, jump_instr->type);
1209 }
1210 }
1211
1212 void
nir_instr_free(nir_instr * instr)1213 nir_instr_free(nir_instr *instr)
1214 {
1215 switch (instr->type) {
1216 case nir_instr_type_tex:
1217 gc_free(nir_instr_as_tex(instr)->src);
1218 break;
1219
1220 case nir_instr_type_phi: {
1221 nir_phi_instr *phi = nir_instr_as_phi(instr);
1222 nir_foreach_phi_src_safe(phi_src, phi)
1223 gc_free(phi_src);
1224 break;
1225 }
1226
1227 default:
1228 break;
1229 }
1230
1231 gc_free(instr);
1232 }
1233
1234 void
nir_instr_free_list(struct exec_list * list)1235 nir_instr_free_list(struct exec_list *list)
1236 {
1237 struct exec_node *node;
1238 while ((node = exec_list_pop_head(list))) {
1239 nir_instr *removed_instr = exec_node_data(nir_instr, node, node);
1240 nir_instr_free(removed_instr);
1241 }
1242 }
1243
1244 static bool
nir_instr_free_and_dce_live_cb(nir_def * def,void * state)1245 nir_instr_free_and_dce_live_cb(nir_def *def, void *state)
1246 {
1247 bool *live = state;
1248
1249 if (!nir_def_is_unused(def)) {
1250 *live = true;
1251 return false;
1252 } else {
1253 return true;
1254 }
1255 }
1256
1257 static bool
nir_instr_free_and_dce_is_live(nir_instr * instr)1258 nir_instr_free_and_dce_is_live(nir_instr *instr)
1259 {
1260 /* Note: don't have to worry about jumps because they don't have dests to
1261 * become unused.
1262 */
1263 if (instr->type == nir_instr_type_intrinsic) {
1264 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1265 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
1266 if (!(info->flags & NIR_INTRINSIC_CAN_ELIMINATE))
1267 return true;
1268 }
1269
1270 bool live = false;
1271 nir_foreach_def(instr, nir_instr_free_and_dce_live_cb, &live);
1272 return live;
1273 }
1274
1275 static bool
nir_instr_dce_add_dead_srcs_cb(nir_src * src,void * state)1276 nir_instr_dce_add_dead_srcs_cb(nir_src *src, void *state)
1277 {
1278 nir_instr_worklist *wl = state;
1279
1280 list_del(&src->use_link);
1281 if (!nir_instr_free_and_dce_is_live(src->ssa->parent_instr))
1282 nir_instr_worklist_push_tail(wl, src->ssa->parent_instr);
1283
1284 /* Stop nir_instr_remove from trying to delete the link again. */
1285 src->ssa = NULL;
1286
1287 return true;
1288 }
1289
1290 static void
nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist * wl,nir_instr * instr)1291 nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist *wl, nir_instr *instr)
1292 {
1293 nir_foreach_src(instr, nir_instr_dce_add_dead_srcs_cb, wl);
1294 }
1295
1296 /**
1297 * Frees an instruction and any SSA defs that it used that are now dead,
1298 * returning a nir_cursor where the instruction previously was.
1299 */
1300 nir_cursor
nir_instr_free_and_dce(nir_instr * instr)1301 nir_instr_free_and_dce(nir_instr *instr)
1302 {
1303 nir_instr_worklist *worklist = nir_instr_worklist_create();
1304
1305 nir_instr_dce_add_dead_ssa_srcs(worklist, instr);
1306 nir_cursor c = nir_instr_remove(instr);
1307
1308 struct exec_list to_free;
1309 exec_list_make_empty(&to_free);
1310
1311 nir_instr *dce_instr;
1312 while ((dce_instr = nir_instr_worklist_pop_head(worklist))) {
1313 nir_instr_dce_add_dead_ssa_srcs(worklist, dce_instr);
1314
1315 /* If we're removing the instr where our cursor is, then we have to
1316 * point the cursor elsewhere.
1317 */
1318 if ((c.option == nir_cursor_before_instr ||
1319 c.option == nir_cursor_after_instr) &&
1320 c.instr == dce_instr)
1321 c = nir_instr_remove(dce_instr);
1322 else
1323 nir_instr_remove(dce_instr);
1324 exec_list_push_tail(&to_free, &dce_instr->node);
1325 }
1326
1327 nir_instr_free_list(&to_free);
1328
1329 nir_instr_worklist_destroy(worklist);
1330
1331 return c;
1332 }
1333
1334 /*@}*/
1335
1336 nir_def *
nir_instr_def(nir_instr * instr)1337 nir_instr_def(nir_instr *instr)
1338 {
1339 switch (instr->type) {
1340 case nir_instr_type_alu:
1341 return &nir_instr_as_alu(instr)->def;
1342
1343 case nir_instr_type_deref:
1344 return &nir_instr_as_deref(instr)->def;
1345
1346 case nir_instr_type_tex:
1347 return &nir_instr_as_tex(instr)->def;
1348
1349 case nir_instr_type_intrinsic: {
1350 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1351 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
1352 return &intrin->def;
1353 } else {
1354 return NULL;
1355 }
1356 }
1357
1358 case nir_instr_type_phi:
1359 return &nir_instr_as_phi(instr)->def;
1360
1361 case nir_instr_type_parallel_copy:
1362 unreachable("Parallel copies are unsupported by this function");
1363
1364 case nir_instr_type_load_const:
1365 return &nir_instr_as_load_const(instr)->def;
1366
1367 case nir_instr_type_undef:
1368 return &nir_instr_as_undef(instr)->def;
1369
1370 case nir_instr_type_debug_info:
1371 return &nir_instr_as_debug_info(instr)->def;
1372
1373 case nir_instr_type_call:
1374 case nir_instr_type_jump:
1375 return NULL;
1376 }
1377
1378 unreachable("Invalid instruction type");
1379 }
1380
1381 bool
nir_foreach_phi_src_leaving_block(nir_block * block,nir_foreach_src_cb cb,void * state)1382 nir_foreach_phi_src_leaving_block(nir_block *block,
1383 nir_foreach_src_cb cb,
1384 void *state)
1385 {
1386 for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
1387 if (block->successors[i] == NULL)
1388 continue;
1389
1390 nir_foreach_phi(phi, block->successors[i]) {
1391 nir_foreach_phi_src(phi_src, phi) {
1392 if (phi_src->pred == block) {
1393 if (!cb(&phi_src->src, state))
1394 return false;
1395 }
1396 }
1397 }
1398 }
1399
1400 return true;
1401 }
1402
1403 nir_const_value
nir_const_value_for_float(double f,unsigned bit_size)1404 nir_const_value_for_float(double f, unsigned bit_size)
1405 {
1406 nir_const_value v;
1407 memset(&v, 0, sizeof(v));
1408
1409 /* clang-format off */
1410 switch (bit_size) {
1411 case 16: v.u16 = _mesa_float_to_half(f); break;
1412 case 32: v.f32 = f; break;
1413 case 64: v.f64 = f; break;
1414 default: unreachable("Invalid bit size");
1415 }
1416 /* clang-format on */
1417
1418 return v;
1419 }
1420
1421 double
nir_const_value_as_float(nir_const_value value,unsigned bit_size)1422 nir_const_value_as_float(nir_const_value value, unsigned bit_size)
1423 {
1424 /* clang-format off */
1425 switch (bit_size) {
1426 case 16: return _mesa_half_to_float(value.u16);
1427 case 32: return value.f32;
1428 case 64: return value.f64;
1429 default: unreachable("Invalid bit size");
1430 }
1431 /* clang-format on */
1432 }
1433
1434 nir_const_value *
nir_src_as_const_value(nir_src src)1435 nir_src_as_const_value(nir_src src)
1436 {
1437 if (src.ssa->parent_instr->type != nir_instr_type_load_const)
1438 return NULL;
1439
1440 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1441
1442 return load->value;
1443 }
1444
1445 const char *
nir_src_as_string(nir_src src)1446 nir_src_as_string(nir_src src)
1447 {
1448 nir_debug_info_instr *di = nir_src_as_debug_info(src);
1449 if (di && di->type == nir_debug_info_string)
1450 return di->string;
1451
1452 return NULL;
1453 }
1454
1455 /**
1456 * Returns true if the source is known to be always uniform. Otherwise it
1457 * returns false which means it may or may not be uniform but it can't be
1458 * determined.
1459 *
1460 * For a more precise analysis of uniform values, use nir_divergence_analysis.
1461 */
1462 bool
nir_src_is_always_uniform(nir_src src)1463 nir_src_is_always_uniform(nir_src src)
1464 {
1465 /* Constants are trivially uniform */
1466 if (src.ssa->parent_instr->type == nir_instr_type_load_const)
1467 return true;
1468
1469 if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
1470 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
1471 /* As are uniform variables */
1472 if (intr->intrinsic == nir_intrinsic_load_uniform &&
1473 nir_src_is_always_uniform(intr->src[0]))
1474 return true;
1475 /* From the Vulkan specification 15.6.1. Push Constant Interface:
1476 * "Any member of a push constant block that is declared as an array must
1477 * only be accessed with dynamically uniform indices."
1478 */
1479 if (intr->intrinsic == nir_intrinsic_load_push_constant)
1480 return true;
1481 if (intr->intrinsic == nir_intrinsic_load_deref &&
1482 nir_deref_mode_is(nir_src_as_deref(intr->src[0]), nir_var_mem_push_const))
1483 return true;
1484 }
1485
1486 /* Operating together uniform expressions produces a uniform result */
1487 if (src.ssa->parent_instr->type == nir_instr_type_alu) {
1488 nir_alu_instr *alu = nir_instr_as_alu(src.ssa->parent_instr);
1489 for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
1490 if (!nir_src_is_always_uniform(alu->src[i].src))
1491 return false;
1492 }
1493
1494 return true;
1495 }
1496
1497 /* XXX: this could have many more tests, such as when a sampler function is
1498 * called with uniform arguments.
1499 */
1500 return false;
1501 }
1502
1503 nir_block *
nir_src_get_block(nir_src * src)1504 nir_src_get_block(nir_src *src)
1505 {
1506 if (nir_src_is_if(src))
1507 return nir_cf_node_cf_tree_prev(&nir_src_parent_if(src)->cf_node);
1508 else if (nir_src_parent_instr(src)->type == nir_instr_type_phi)
1509 return list_entry(src, nir_phi_src, src)->pred;
1510 else
1511 return nir_src_parent_instr(src)->block;
1512 }
1513
1514 static void
src_remove_all_uses(nir_src * src)1515 src_remove_all_uses(nir_src *src)
1516 {
1517 if (src && src_is_valid(src))
1518 list_del(&src->use_link);
1519 }
1520
1521 static void
src_add_all_uses(nir_src * src,nir_instr * parent_instr,nir_if * parent_if)1522 src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
1523 {
1524 if (!src)
1525 return;
1526
1527 if (!src_is_valid(src))
1528 return;
1529
1530 if (parent_instr) {
1531 nir_src_set_parent_instr(src, parent_instr);
1532 } else {
1533 assert(parent_if);
1534 nir_src_set_parent_if(src, parent_if);
1535 }
1536
1537 list_addtail(&src->use_link, &src->ssa->uses);
1538 }
1539
1540 void
nir_instr_init_src(nir_instr * instr,nir_src * src,nir_def * def)1541 nir_instr_init_src(nir_instr *instr, nir_src *src, nir_def *def)
1542 {
1543 *src = nir_src_for_ssa(def);
1544 src_add_all_uses(src, instr, NULL);
1545 }
1546
1547 void
nir_instr_clear_src(nir_instr * instr,nir_src * src)1548 nir_instr_clear_src(nir_instr *instr, nir_src *src)
1549 {
1550 src_remove_all_uses(src);
1551 *src = NIR_SRC_INIT;
1552 }
1553
1554 void
nir_instr_move_src(nir_instr * dest_instr,nir_src * dest,nir_src * src)1555 nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
1556 {
1557 assert(!src_is_valid(dest) || nir_src_parent_instr(dest) == dest_instr);
1558
1559 src_remove_all_uses(dest);
1560 src_remove_all_uses(src);
1561 *dest = *src;
1562 *src = NIR_SRC_INIT;
1563 src_add_all_uses(dest, dest_instr, NULL);
1564 }
1565
1566 void
nir_def_init(nir_instr * instr,nir_def * def,unsigned num_components,unsigned bit_size)1567 nir_def_init(nir_instr *instr, nir_def *def,
1568 unsigned num_components,
1569 unsigned bit_size)
1570 {
1571 def->parent_instr = instr;
1572 list_inithead(&def->uses);
1573 def->num_components = num_components;
1574 def->bit_size = bit_size;
1575 def->divergent = true; /* This is the safer default */
1576 def->loop_invariant = false;
1577
1578 if (instr->block) {
1579 nir_function_impl *impl =
1580 nir_cf_node_get_function(&instr->block->cf_node);
1581
1582 def->index = impl->ssa_alloc++;
1583
1584 impl->valid_metadata &= ~nir_metadata_live_defs;
1585 } else {
1586 def->index = UINT_MAX;
1587 }
1588 }
1589
1590 void
nir_def_rewrite_uses(nir_def * def,nir_def * new_ssa)1591 nir_def_rewrite_uses(nir_def *def, nir_def *new_ssa)
1592 {
1593 assert(def != new_ssa);
1594 nir_foreach_use_including_if_safe(use_src, def) {
1595 nir_src_rewrite(use_src, new_ssa);
1596 }
1597 }
1598
1599 void
nir_def_rewrite_uses_src(nir_def * def,nir_src new_src)1600 nir_def_rewrite_uses_src(nir_def *def, nir_src new_src)
1601 {
1602 nir_def_rewrite_uses(def, new_src.ssa);
1603 }
1604
1605 bool
nir_instr_is_before(nir_instr * first,nir_instr * second)1606 nir_instr_is_before(nir_instr *first, nir_instr *second)
1607 {
1608 if (first->block != second->block)
1609 return false;
1610
1611 /* Search backwards looking for "first" */
1612 while (second != nir_block_first_instr(second->block)) {
1613 second = nir_instr_prev(second);
1614 assert(second);
1615
1616 if (first == second)
1617 return true;
1618 }
1619
1620 return false;
1621 }
1622
1623 static bool
is_instr_between(nir_instr * start,nir_instr * end,nir_instr * between)1624 is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
1625 {
1626 assert(start->block == end->block);
1627
1628 if (between->block != start->block)
1629 return false;
1630
1631 /* Search backwards looking for "between" */
1632 while (start != end) {
1633 if (between == end)
1634 return true;
1635
1636 end = nir_instr_prev(end);
1637 assert(end);
1638 }
1639
1640 return false;
1641 }
1642
1643 /* Replaces all uses of the given SSA def with the given source but only if
1644 * the use comes after the after_me instruction. This can be useful if you
1645 * are emitting code to fix up the result of some instruction: you can freely
1646 * use the result in that code and then call rewrite_uses_after and pass the
1647 * last fixup instruction as after_me and it will replace all of the uses you
1648 * want without touching the fixup code.
1649 *
1650 * This function assumes that after_me is in the same block as
1651 * def->parent_instr and that after_me comes after def->parent_instr.
1652 */
1653 void
nir_def_rewrite_uses_after(nir_def * def,nir_def * new_ssa,nir_instr * after_me)1654 nir_def_rewrite_uses_after(nir_def *def, nir_def *new_ssa,
1655 nir_instr *after_me)
1656 {
1657 if (def == new_ssa)
1658 return;
1659
1660 nir_foreach_use_including_if_safe(use_src, def) {
1661 if (!nir_src_is_if(use_src)) {
1662 assert(nir_src_parent_instr(use_src) != def->parent_instr);
1663
1664 /* Since def already dominates all of its uses, the only way a use can
1665 * not be dominated by after_me is if it is between def and after_me in
1666 * the instruction list.
1667 */
1668 if (is_instr_between(def->parent_instr, after_me, nir_src_parent_instr(use_src)))
1669 continue;
1670 }
1671
1672 nir_src_rewrite(use_src, new_ssa);
1673 }
1674 }
1675
1676 static nir_def *
get_store_value(nir_intrinsic_instr * intrin)1677 get_store_value(nir_intrinsic_instr *intrin)
1678 {
1679 assert(nir_intrinsic_has_write_mask(intrin));
1680 /* deref stores have the deref in src[0] and the store value in src[1] */
1681 if (intrin->intrinsic == nir_intrinsic_store_deref ||
1682 intrin->intrinsic == nir_intrinsic_store_deref_block_intel)
1683 return intrin->src[1].ssa;
1684
1685 /* all other stores have the store value in src[0] */
1686 return intrin->src[0].ssa;
1687 }
1688
1689 nir_component_mask_t
nir_src_components_read(const nir_src * src)1690 nir_src_components_read(const nir_src *src)
1691 {
1692 assert(nir_src_parent_instr(src));
1693
1694 if (nir_src_parent_instr(src)->type == nir_instr_type_alu) {
1695 nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(src));
1696 nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src);
1697 int src_idx = alu_src - &alu->src[0];
1698 assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
1699 return nir_alu_instr_src_read_mask(alu, src_idx);
1700 } else if (nir_src_parent_instr(src)->type == nir_instr_type_intrinsic) {
1701 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(nir_src_parent_instr(src));
1702 if (nir_intrinsic_has_write_mask(intrin) && src->ssa == get_store_value(intrin))
1703 return nir_intrinsic_write_mask(intrin);
1704 else
1705 return (1 << src->ssa->num_components) - 1;
1706 } else {
1707 return (1 << src->ssa->num_components) - 1;
1708 }
1709 }
1710
1711 nir_component_mask_t
nir_def_components_read(const nir_def * def)1712 nir_def_components_read(const nir_def *def)
1713 {
1714 nir_component_mask_t read_mask = 0;
1715
1716 nir_foreach_use_including_if(use, def) {
1717 read_mask |= nir_src_is_if(use) ? 1 : nir_src_components_read(use);
1718
1719 if (read_mask == (1 << def->num_components) - 1)
1720 return read_mask;
1721 }
1722
1723 return read_mask;
1724 }
1725
1726 bool
nir_def_all_uses_are_fsat(const nir_def * def)1727 nir_def_all_uses_are_fsat(const nir_def *def)
1728 {
1729 nir_foreach_use(src, def) {
1730 if (nir_src_is_if(src))
1731 return false;
1732
1733 nir_instr *use = nir_src_parent_instr(src);
1734 if (use->type != nir_instr_type_alu)
1735 return false;
1736
1737 nir_alu_instr *alu = nir_instr_as_alu(use);
1738 if (alu->op != nir_op_fsat)
1739 return false;
1740 }
1741
1742 return true;
1743 }
1744
1745 bool
nir_def_all_uses_ignore_sign_bit(const nir_def * def)1746 nir_def_all_uses_ignore_sign_bit(const nir_def *def)
1747 {
1748 nir_foreach_use(use, def) {
1749 if (nir_src_is_if(use))
1750 return false;
1751 nir_instr *instr = nir_src_parent_instr(use);
1752
1753 if (instr->type != nir_instr_type_alu)
1754 return false;
1755
1756 nir_alu_instr *alu = nir_instr_as_alu(instr);
1757 if (alu->op == nir_op_fabs) {
1758 continue;
1759 } else if (alu->op == nir_op_fmul || alu->op == nir_op_ffma) {
1760 nir_alu_src *alu_src = list_entry(use, nir_alu_src, src);
1761 unsigned src_index = alu_src - alu->src;
1762 /* a * a doesn't care about sign of a. */
1763 if (src_index < 2 && nir_alu_srcs_equal(alu, alu, 0, 1))
1764 continue;
1765 }
1766
1767 return false;
1768 }
1769 return true;
1770 }
1771
1772 nir_block *
nir_block_unstructured_next(nir_block * block)1773 nir_block_unstructured_next(nir_block *block)
1774 {
1775 if (block == NULL) {
1776 /* nir_foreach_block_unstructured_safe() will call this function on a
1777 * NULL block after the last iteration, but it won't use the result so
1778 * just return NULL here.
1779 */
1780 return NULL;
1781 }
1782
1783 nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1784 if (cf_next == NULL && block->cf_node.parent->type == nir_cf_node_function)
1785 return NULL;
1786
1787 if (cf_next && cf_next->type == nir_cf_node_block)
1788 return nir_cf_node_as_block(cf_next);
1789
1790 return nir_block_cf_tree_next(block);
1791 }
1792
1793 nir_block *
nir_unstructured_start_block(nir_function_impl * impl)1794 nir_unstructured_start_block(nir_function_impl *impl)
1795 {
1796 return nir_start_block(impl);
1797 }
1798
1799 nir_block *
nir_block_cf_tree_next(nir_block * block)1800 nir_block_cf_tree_next(nir_block *block)
1801 {
1802 if (block == NULL) {
1803 /* nir_foreach_block_safe() will call this function on a NULL block
1804 * after the last iteration, but it won't use the result so just return
1805 * NULL here.
1806 */
1807 return NULL;
1808 }
1809
1810 assert(nir_cf_node_get_function(&block->cf_node)->structured);
1811
1812 nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1813 if (cf_next)
1814 return nir_cf_node_cf_tree_first(cf_next);
1815
1816 nir_cf_node *parent = block->cf_node.parent;
1817 if (parent->type == nir_cf_node_function)
1818 return NULL;
1819
1820 /* Is this the last block of a cf_node? Return the following block */
1821 if (block == nir_cf_node_cf_tree_last(parent))
1822 return nir_cf_node_as_block(nir_cf_node_next(parent));
1823
1824 switch (parent->type) {
1825 case nir_cf_node_if: {
1826 /* We are at the end of the if. Go to the beginning of the else */
1827 nir_if *if_stmt = nir_cf_node_as_if(parent);
1828 assert(block == nir_if_last_then_block(if_stmt));
1829 return nir_if_first_else_block(if_stmt);
1830 }
1831
1832 case nir_cf_node_loop: {
1833 /* We are at the end of the body and there is a continue construct */
1834 nir_loop *loop = nir_cf_node_as_loop(parent);
1835 assert(block == nir_loop_last_block(loop) &&
1836 nir_loop_has_continue_construct(loop));
1837 return nir_loop_first_continue_block(loop);
1838 }
1839
1840 default:
1841 unreachable("unknown cf node type");
1842 }
1843 }
1844
1845 nir_block *
nir_block_cf_tree_prev(nir_block * block)1846 nir_block_cf_tree_prev(nir_block *block)
1847 {
1848 if (block == NULL) {
1849 /* do this for consistency with nir_block_cf_tree_next() */
1850 return NULL;
1851 }
1852
1853 assert(nir_cf_node_get_function(&block->cf_node)->structured);
1854
1855 nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
1856 if (cf_prev)
1857 return nir_cf_node_cf_tree_last(cf_prev);
1858
1859 nir_cf_node *parent = block->cf_node.parent;
1860 if (parent->type == nir_cf_node_function)
1861 return NULL;
1862
1863 /* Is this the first block of a cf_node? Return the previous block */
1864 if (block == nir_cf_node_cf_tree_first(parent))
1865 return nir_cf_node_as_block(nir_cf_node_prev(parent));
1866
1867 switch (parent->type) {
1868 case nir_cf_node_if: {
1869 /* We are at the beginning of the else. Go to the end of the if */
1870 nir_if *if_stmt = nir_cf_node_as_if(parent);
1871 assert(block == nir_if_first_else_block(if_stmt));
1872 return nir_if_last_then_block(if_stmt);
1873 }
1874 case nir_cf_node_loop: {
1875 /* We are at the beginning of the continue construct. */
1876 nir_loop *loop = nir_cf_node_as_loop(parent);
1877 assert(nir_loop_has_continue_construct(loop) &&
1878 block == nir_loop_first_continue_block(loop));
1879 return nir_loop_last_block(loop);
1880 }
1881
1882 default:
1883 unreachable("unknown cf node type");
1884 }
1885 }
1886
1887 nir_block *
nir_cf_node_cf_tree_first(nir_cf_node * node)1888 nir_cf_node_cf_tree_first(nir_cf_node *node)
1889 {
1890 switch (node->type) {
1891 case nir_cf_node_function: {
1892 nir_function_impl *impl = nir_cf_node_as_function(node);
1893 return nir_start_block(impl);
1894 }
1895
1896 case nir_cf_node_if: {
1897 nir_if *if_stmt = nir_cf_node_as_if(node);
1898 return nir_if_first_then_block(if_stmt);
1899 }
1900
1901 case nir_cf_node_loop: {
1902 nir_loop *loop = nir_cf_node_as_loop(node);
1903 return nir_loop_first_block(loop);
1904 }
1905
1906 case nir_cf_node_block: {
1907 return nir_cf_node_as_block(node);
1908 }
1909
1910 default:
1911 unreachable("unknown node type");
1912 }
1913 }
1914
1915 nir_block *
nir_cf_node_cf_tree_last(nir_cf_node * node)1916 nir_cf_node_cf_tree_last(nir_cf_node *node)
1917 {
1918 switch (node->type) {
1919 case nir_cf_node_function: {
1920 nir_function_impl *impl = nir_cf_node_as_function(node);
1921 return nir_impl_last_block(impl);
1922 }
1923
1924 case nir_cf_node_if: {
1925 nir_if *if_stmt = nir_cf_node_as_if(node);
1926 return nir_if_last_else_block(if_stmt);
1927 }
1928
1929 case nir_cf_node_loop: {
1930 nir_loop *loop = nir_cf_node_as_loop(node);
1931 if (nir_loop_has_continue_construct(loop))
1932 return nir_loop_last_continue_block(loop);
1933 else
1934 return nir_loop_last_block(loop);
1935 }
1936
1937 case nir_cf_node_block: {
1938 return nir_cf_node_as_block(node);
1939 }
1940
1941 default:
1942 unreachable("unknown node type");
1943 }
1944 }
1945
1946 nir_block *
nir_cf_node_cf_tree_next(nir_cf_node * node)1947 nir_cf_node_cf_tree_next(nir_cf_node *node)
1948 {
1949 if (node->type == nir_cf_node_block)
1950 return nir_block_cf_tree_next(nir_cf_node_as_block(node));
1951 else if (node->type == nir_cf_node_function)
1952 return NULL;
1953 else
1954 return nir_cf_node_as_block(nir_cf_node_next(node));
1955 }
1956
1957 nir_block *
nir_cf_node_cf_tree_prev(nir_cf_node * node)1958 nir_cf_node_cf_tree_prev(nir_cf_node *node)
1959 {
1960 if (node->type == nir_cf_node_block)
1961 return nir_block_cf_tree_prev(nir_cf_node_as_block(node));
1962 else if (node->type == nir_cf_node_function)
1963 return NULL;
1964 else
1965 return nir_cf_node_as_block(nir_cf_node_prev(node));
1966 }
1967
1968 nir_if *
nir_block_get_following_if(nir_block * block)1969 nir_block_get_following_if(nir_block *block)
1970 {
1971 if (exec_node_is_tail_sentinel(&block->cf_node.node))
1972 return NULL;
1973
1974 if (nir_cf_node_is_last(&block->cf_node))
1975 return NULL;
1976
1977 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1978
1979 if (next_node->type != nir_cf_node_if)
1980 return NULL;
1981
1982 return nir_cf_node_as_if(next_node);
1983 }
1984
1985 nir_loop *
nir_block_get_following_loop(nir_block * block)1986 nir_block_get_following_loop(nir_block *block)
1987 {
1988 if (exec_node_is_tail_sentinel(&block->cf_node.node))
1989 return NULL;
1990
1991 if (nir_cf_node_is_last(&block->cf_node))
1992 return NULL;
1993
1994 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1995
1996 if (next_node->type != nir_cf_node_loop)
1997 return NULL;
1998
1999 return nir_cf_node_as_loop(next_node);
2000 }
2001
2002 static int
compare_block_index(const void * p1,const void * p2)2003 compare_block_index(const void *p1, const void *p2)
2004 {
2005 const nir_block *block1 = *((const nir_block **)p1);
2006 const nir_block *block2 = *((const nir_block **)p2);
2007
2008 return (int)block1->index - (int)block2->index;
2009 }
2010
2011 nir_block **
nir_block_get_predecessors_sorted(const nir_block * block,void * mem_ctx)2012 nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx)
2013 {
2014 nir_block **preds =
2015 ralloc_array(mem_ctx, nir_block *, block->predecessors->entries);
2016
2017 unsigned i = 0;
2018 set_foreach(block->predecessors, entry)
2019 preds[i++] = (nir_block *)entry->key;
2020 assert(i == block->predecessors->entries);
2021
2022 qsort(preds, block->predecessors->entries, sizeof(nir_block *),
2023 compare_block_index);
2024
2025 return preds;
2026 }
2027
2028 void
nir_index_blocks(nir_function_impl * impl)2029 nir_index_blocks(nir_function_impl *impl)
2030 {
2031 unsigned index = 0;
2032
2033 if (impl->valid_metadata & nir_metadata_block_index)
2034 return;
2035
2036 nir_foreach_block_unstructured(block, impl) {
2037 block->index = index++;
2038 }
2039
2040 /* The end_block isn't really part of the program, which is why its index
2041 * is >= num_blocks.
2042 */
2043 impl->num_blocks = impl->end_block->index = index;
2044 }
2045
2046 static bool
index_ssa_def_cb(nir_def * def,void * state)2047 index_ssa_def_cb(nir_def *def, void *state)
2048 {
2049 unsigned *index = (unsigned *)state;
2050 def->index = (*index)++;
2051
2052 return true;
2053 }
2054
2055 /**
2056 * The indices are applied top-to-bottom which has the very nice property
2057 * that, if A dominates B, then A->index <= B->index.
2058 */
2059 void
nir_index_ssa_defs(nir_function_impl * impl)2060 nir_index_ssa_defs(nir_function_impl *impl)
2061 {
2062 unsigned index = 0;
2063
2064 impl->valid_metadata &= ~nir_metadata_live_defs;
2065
2066 nir_foreach_block_unstructured(block, impl) {
2067 nir_foreach_instr(instr, block)
2068 nir_foreach_def(instr, index_ssa_def_cb, &index);
2069 }
2070
2071 impl->ssa_alloc = index;
2072 }
2073
2074 /**
2075 * The indices are applied top-to-bottom which has the very nice property
2076 * that, if A dominates B, then A->index <= B->index.
2077 */
2078 unsigned
nir_index_instrs(nir_function_impl * impl)2079 nir_index_instrs(nir_function_impl *impl)
2080 {
2081 unsigned index = 0;
2082
2083 nir_foreach_block(block, impl) {
2084 block->start_ip = index++;
2085
2086 nir_foreach_instr(instr, block)
2087 instr->index = index++;
2088
2089 block->end_ip = index++;
2090 }
2091
2092 return index;
2093 }
2094
2095 void
nir_shader_clear_pass_flags(nir_shader * shader)2096 nir_shader_clear_pass_flags(nir_shader *shader)
2097 {
2098 nir_foreach_function_impl(impl, shader) {
2099 nir_foreach_block(block, impl) {
2100 nir_foreach_instr(instr, block) {
2101 instr->pass_flags = 0;
2102 }
2103 }
2104 }
2105 }
2106
2107 unsigned
nir_shader_index_vars(nir_shader * shader,nir_variable_mode modes)2108 nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes)
2109 {
2110 unsigned count = 0;
2111 nir_foreach_variable_with_modes(var, shader, modes)
2112 var->index = count++;
2113 return count;
2114 }
2115
2116 unsigned
nir_function_impl_index_vars(nir_function_impl * impl)2117 nir_function_impl_index_vars(nir_function_impl *impl)
2118 {
2119 unsigned count = 0;
2120 nir_foreach_function_temp_variable(var, impl)
2121 var->index = count++;
2122 return count;
2123 }
2124
2125 static nir_instr *
cursor_next_instr(nir_cursor cursor)2126 cursor_next_instr(nir_cursor cursor)
2127 {
2128 switch (cursor.option) {
2129 case nir_cursor_before_block:
2130 for (nir_block *block = cursor.block; block;
2131 block = nir_block_cf_tree_next(block)) {
2132 nir_instr *instr = nir_block_first_instr(block);
2133 if (instr)
2134 return instr;
2135 }
2136 return NULL;
2137
2138 case nir_cursor_after_block:
2139 cursor.block = nir_block_cf_tree_next(cursor.block);
2140 if (cursor.block == NULL)
2141 return NULL;
2142
2143 cursor.option = nir_cursor_before_block;
2144 return cursor_next_instr(cursor);
2145
2146 case nir_cursor_before_instr:
2147 return cursor.instr;
2148
2149 case nir_cursor_after_instr:
2150 if (nir_instr_next(cursor.instr))
2151 return nir_instr_next(cursor.instr);
2152
2153 cursor.option = nir_cursor_after_block;
2154 cursor.block = cursor.instr->block;
2155 return cursor_next_instr(cursor);
2156 }
2157
2158 unreachable("Inavlid cursor option");
2159 }
2160
2161 bool
nir_function_impl_lower_instructions(nir_function_impl * impl,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2162 nir_function_impl_lower_instructions(nir_function_impl *impl,
2163 nir_instr_filter_cb filter,
2164 nir_lower_instr_cb lower,
2165 void *cb_data)
2166 {
2167 nir_builder b = nir_builder_create(impl);
2168
2169 nir_metadata preserved = nir_metadata_control_flow;
2170
2171 bool progress = false;
2172 nir_cursor iter = nir_before_impl(impl);
2173 nir_instr *instr;
2174 while ((instr = cursor_next_instr(iter)) != NULL) {
2175 if (filter && !filter(instr, cb_data)) {
2176 iter = nir_after_instr(instr);
2177 continue;
2178 }
2179
2180 nir_def *old_def = nir_instr_def(instr);
2181 struct list_head old_uses;
2182 if (old_def != NULL) {
2183 /* We're about to ask the callback to generate a replacement for instr.
2184 * Save off the uses from instr's SSA def so we know what uses to
2185 * rewrite later. If we use nir_def_rewrite_uses, it fails in the
2186 * case where the generated replacement code uses the result of instr
2187 * itself. If we use nir_def_rewrite_uses_after (which is the
2188 * normal solution to this problem), it doesn't work well if control-
2189 * flow is inserted as part of the replacement, doesn't handle cases
2190 * where the replacement is something consumed by instr, and suffers
2191 * from performance issues. This is the only way to 100% guarantee
2192 * that we rewrite the correct set efficiently.
2193 */
2194
2195 list_replace(&old_def->uses, &old_uses);
2196 list_inithead(&old_def->uses);
2197 }
2198
2199 b.cursor = nir_after_instr(instr);
2200 nir_def *new_def = lower(&b, instr, cb_data);
2201 if (new_def && new_def != NIR_LOWER_INSTR_PROGRESS &&
2202 new_def != NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2203 assert(old_def != NULL);
2204 if (new_def->parent_instr->block != instr->block)
2205 preserved = nir_metadata_none;
2206
2207 list_for_each_entry_safe(nir_src, use_src, &old_uses, use_link)
2208 nir_src_rewrite(use_src, new_def);
2209
2210 if (nir_def_is_unused(old_def)) {
2211 iter = nir_instr_free_and_dce(instr);
2212 } else {
2213 iter = nir_after_instr(instr);
2214 }
2215 progress = true;
2216 } else {
2217 /* We didn't end up lowering after all. Put the uses back */
2218 if (old_def)
2219 list_replace(&old_uses, &old_def->uses);
2220
2221 if (new_def == NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2222 /* Only instructions without a return value can be removed like this */
2223 assert(!old_def);
2224 iter = nir_instr_free_and_dce(instr);
2225 progress = true;
2226 } else
2227 iter = nir_after_instr(instr);
2228
2229 if (new_def == NIR_LOWER_INSTR_PROGRESS)
2230 progress = true;
2231 }
2232 }
2233
2234 if (progress) {
2235 nir_metadata_preserve(impl, preserved);
2236 } else {
2237 nir_metadata_preserve(impl, nir_metadata_all);
2238 }
2239
2240 return progress;
2241 }
2242
2243 bool
nir_shader_lower_instructions(nir_shader * shader,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2244 nir_shader_lower_instructions(nir_shader *shader,
2245 nir_instr_filter_cb filter,
2246 nir_lower_instr_cb lower,
2247 void *cb_data)
2248 {
2249 bool progress = false;
2250
2251 nir_foreach_function_impl(impl, shader) {
2252 if (nir_function_impl_lower_instructions(impl, filter, lower, cb_data))
2253 progress = true;
2254 }
2255
2256 return progress;
2257 }
2258
2259 /**
2260 * Returns true if the shader supports quad-based implicit derivatives on
2261 * texture sampling.
2262 */
2263 bool
nir_shader_supports_implicit_lod(nir_shader * shader)2264 nir_shader_supports_implicit_lod(nir_shader *shader)
2265 {
2266 return (shader->info.stage == MESA_SHADER_FRAGMENT ||
2267 (gl_shader_stage_uses_workgroup(shader->info.stage) &&
2268 shader->info.derivative_group != DERIVATIVE_GROUP_NONE));
2269 }
2270
2271 nir_intrinsic_op
nir_intrinsic_from_system_value(gl_system_value val)2272 nir_intrinsic_from_system_value(gl_system_value val)
2273 {
2274 switch (val) {
2275 case SYSTEM_VALUE_VERTEX_ID:
2276 return nir_intrinsic_load_vertex_id;
2277 case SYSTEM_VALUE_INSTANCE_ID:
2278 return nir_intrinsic_load_instance_id;
2279 case SYSTEM_VALUE_DRAW_ID:
2280 return nir_intrinsic_load_draw_id;
2281 case SYSTEM_VALUE_BASE_INSTANCE:
2282 return nir_intrinsic_load_base_instance;
2283 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
2284 return nir_intrinsic_load_vertex_id_zero_base;
2285 case SYSTEM_VALUE_IS_INDEXED_DRAW:
2286 return nir_intrinsic_load_is_indexed_draw;
2287 case SYSTEM_VALUE_FIRST_VERTEX:
2288 return nir_intrinsic_load_first_vertex;
2289 case SYSTEM_VALUE_BASE_VERTEX:
2290 return nir_intrinsic_load_base_vertex;
2291 case SYSTEM_VALUE_INVOCATION_ID:
2292 return nir_intrinsic_load_invocation_id;
2293 case SYSTEM_VALUE_FRAG_COORD:
2294 return nir_intrinsic_load_frag_coord;
2295 case SYSTEM_VALUE_PIXEL_COORD:
2296 return nir_intrinsic_load_pixel_coord;
2297 case SYSTEM_VALUE_POINT_COORD:
2298 return nir_intrinsic_load_point_coord;
2299 case SYSTEM_VALUE_LINE_COORD:
2300 return nir_intrinsic_load_line_coord;
2301 case SYSTEM_VALUE_FRONT_FACE:
2302 return nir_intrinsic_load_front_face;
2303 case SYSTEM_VALUE_FRONT_FACE_FSIGN:
2304 return nir_intrinsic_load_front_face_fsign;
2305 case SYSTEM_VALUE_SAMPLE_ID:
2306 return nir_intrinsic_load_sample_id;
2307 case SYSTEM_VALUE_SAMPLE_POS:
2308 return nir_intrinsic_load_sample_pos;
2309 case SYSTEM_VALUE_SAMPLE_POS_OR_CENTER:
2310 return nir_intrinsic_load_sample_pos_or_center;
2311 case SYSTEM_VALUE_SAMPLE_MASK_IN:
2312 return nir_intrinsic_load_sample_mask_in;
2313 case SYSTEM_VALUE_LAYER_ID:
2314 return nir_intrinsic_load_layer_id;
2315 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
2316 return nir_intrinsic_load_local_invocation_id;
2317 case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
2318 return nir_intrinsic_load_local_invocation_index;
2319 case SYSTEM_VALUE_WORKGROUP_ID:
2320 return nir_intrinsic_load_workgroup_id;
2321 case SYSTEM_VALUE_BASE_WORKGROUP_ID:
2322 return nir_intrinsic_load_base_workgroup_id;
2323 case SYSTEM_VALUE_WORKGROUP_INDEX:
2324 return nir_intrinsic_load_workgroup_index;
2325 case SYSTEM_VALUE_NUM_WORKGROUPS:
2326 return nir_intrinsic_load_num_workgroups;
2327 case SYSTEM_VALUE_PRIMITIVE_ID:
2328 return nir_intrinsic_load_primitive_id;
2329 case SYSTEM_VALUE_TESS_COORD:
2330 return nir_intrinsic_load_tess_coord;
2331 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
2332 return nir_intrinsic_load_tess_level_outer;
2333 case SYSTEM_VALUE_TESS_LEVEL_INNER:
2334 return nir_intrinsic_load_tess_level_inner;
2335 case SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT:
2336 return nir_intrinsic_load_tess_level_outer_default;
2337 case SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT:
2338 return nir_intrinsic_load_tess_level_inner_default;
2339 case SYSTEM_VALUE_VERTICES_IN:
2340 return nir_intrinsic_load_patch_vertices_in;
2341 case SYSTEM_VALUE_HELPER_INVOCATION:
2342 return nir_intrinsic_load_helper_invocation;
2343 case SYSTEM_VALUE_COLOR0:
2344 return nir_intrinsic_load_color0;
2345 case SYSTEM_VALUE_COLOR1:
2346 return nir_intrinsic_load_color1;
2347 case SYSTEM_VALUE_VIEW_INDEX:
2348 return nir_intrinsic_load_view_index;
2349 case SYSTEM_VALUE_SUBGROUP_SIZE:
2350 return nir_intrinsic_load_subgroup_size;
2351 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
2352 return nir_intrinsic_load_subgroup_invocation;
2353 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
2354 return nir_intrinsic_load_subgroup_eq_mask;
2355 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
2356 return nir_intrinsic_load_subgroup_ge_mask;
2357 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
2358 return nir_intrinsic_load_subgroup_gt_mask;
2359 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
2360 return nir_intrinsic_load_subgroup_le_mask;
2361 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
2362 return nir_intrinsic_load_subgroup_lt_mask;
2363 case SYSTEM_VALUE_NUM_SUBGROUPS:
2364 return nir_intrinsic_load_num_subgroups;
2365 case SYSTEM_VALUE_SUBGROUP_ID:
2366 return nir_intrinsic_load_subgroup_id;
2367 case SYSTEM_VALUE_WORKGROUP_SIZE:
2368 return nir_intrinsic_load_workgroup_size;
2369 case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
2370 return nir_intrinsic_load_global_invocation_id;
2371 case SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID:
2372 return nir_intrinsic_load_base_global_invocation_id;
2373 case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX:
2374 return nir_intrinsic_load_global_invocation_index;
2375 case SYSTEM_VALUE_GLOBAL_GROUP_SIZE:
2376 return nir_intrinsic_load_global_size;
2377 case SYSTEM_VALUE_WORK_DIM:
2378 return nir_intrinsic_load_work_dim;
2379 case SYSTEM_VALUE_USER_DATA_AMD:
2380 return nir_intrinsic_load_user_data_amd;
2381 case SYSTEM_VALUE_RAY_LAUNCH_ID:
2382 return nir_intrinsic_load_ray_launch_id;
2383 case SYSTEM_VALUE_RAY_LAUNCH_SIZE:
2384 return nir_intrinsic_load_ray_launch_size;
2385 case SYSTEM_VALUE_RAY_WORLD_ORIGIN:
2386 return nir_intrinsic_load_ray_world_origin;
2387 case SYSTEM_VALUE_RAY_WORLD_DIRECTION:
2388 return nir_intrinsic_load_ray_world_direction;
2389 case SYSTEM_VALUE_RAY_OBJECT_ORIGIN:
2390 return nir_intrinsic_load_ray_object_origin;
2391 case SYSTEM_VALUE_RAY_OBJECT_DIRECTION:
2392 return nir_intrinsic_load_ray_object_direction;
2393 case SYSTEM_VALUE_RAY_T_MIN:
2394 return nir_intrinsic_load_ray_t_min;
2395 case SYSTEM_VALUE_RAY_T_MAX:
2396 return nir_intrinsic_load_ray_t_max;
2397 case SYSTEM_VALUE_RAY_OBJECT_TO_WORLD:
2398 return nir_intrinsic_load_ray_object_to_world;
2399 case SYSTEM_VALUE_RAY_WORLD_TO_OBJECT:
2400 return nir_intrinsic_load_ray_world_to_object;
2401 case SYSTEM_VALUE_RAY_HIT_KIND:
2402 return nir_intrinsic_load_ray_hit_kind;
2403 case SYSTEM_VALUE_RAY_FLAGS:
2404 return nir_intrinsic_load_ray_flags;
2405 case SYSTEM_VALUE_RAY_GEOMETRY_INDEX:
2406 return nir_intrinsic_load_ray_geometry_index;
2407 case SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX:
2408 return nir_intrinsic_load_ray_instance_custom_index;
2409 case SYSTEM_VALUE_CULL_MASK:
2410 return nir_intrinsic_load_cull_mask;
2411 case SYSTEM_VALUE_RAY_TRIANGLE_VERTEX_POSITIONS:
2412 return nir_intrinsic_load_ray_triangle_vertex_positions;
2413 case SYSTEM_VALUE_MESH_VIEW_COUNT:
2414 return nir_intrinsic_load_mesh_view_count;
2415 case SYSTEM_VALUE_FRAG_SHADING_RATE:
2416 return nir_intrinsic_load_frag_shading_rate;
2417 case SYSTEM_VALUE_FULLY_COVERED:
2418 return nir_intrinsic_load_fully_covered;
2419 case SYSTEM_VALUE_FRAG_SIZE:
2420 return nir_intrinsic_load_frag_size;
2421 case SYSTEM_VALUE_FRAG_INVOCATION_COUNT:
2422 return nir_intrinsic_load_frag_invocation_count;
2423 case SYSTEM_VALUE_SHADER_INDEX:
2424 return nir_intrinsic_load_shader_index;
2425 case SYSTEM_VALUE_COALESCED_INPUT_COUNT:
2426 return nir_intrinsic_load_coalesced_input_count;
2427 case SYSTEM_VALUE_WARPS_PER_SM_NV:
2428 return nir_intrinsic_load_warps_per_sm_nv;
2429 case SYSTEM_VALUE_SM_COUNT_NV:
2430 return nir_intrinsic_load_sm_count_nv;
2431 case SYSTEM_VALUE_WARP_ID_NV:
2432 return nir_intrinsic_load_warp_id_nv;
2433 case SYSTEM_VALUE_SM_ID_NV:
2434 return nir_intrinsic_load_sm_id_nv;
2435 default:
2436 return nir_num_intrinsics;
2437 }
2438 }
2439
2440 gl_system_value
nir_system_value_from_intrinsic(nir_intrinsic_op intrin)2441 nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
2442 {
2443 switch (intrin) {
2444 case nir_intrinsic_load_vertex_id:
2445 return SYSTEM_VALUE_VERTEX_ID;
2446 case nir_intrinsic_load_instance_id:
2447 return SYSTEM_VALUE_INSTANCE_ID;
2448 case nir_intrinsic_load_draw_id:
2449 return SYSTEM_VALUE_DRAW_ID;
2450 case nir_intrinsic_load_base_instance:
2451 return SYSTEM_VALUE_BASE_INSTANCE;
2452 case nir_intrinsic_load_vertex_id_zero_base:
2453 return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
2454 case nir_intrinsic_load_first_vertex:
2455 return SYSTEM_VALUE_FIRST_VERTEX;
2456 case nir_intrinsic_load_is_indexed_draw:
2457 return SYSTEM_VALUE_IS_INDEXED_DRAW;
2458 case nir_intrinsic_load_base_vertex:
2459 return SYSTEM_VALUE_BASE_VERTEX;
2460 case nir_intrinsic_load_invocation_id:
2461 return SYSTEM_VALUE_INVOCATION_ID;
2462 case nir_intrinsic_load_frag_coord:
2463 return SYSTEM_VALUE_FRAG_COORD;
2464 case nir_intrinsic_load_pixel_coord:
2465 return SYSTEM_VALUE_PIXEL_COORD;
2466 case nir_intrinsic_load_point_coord:
2467 return SYSTEM_VALUE_POINT_COORD;
2468 case nir_intrinsic_load_line_coord:
2469 return SYSTEM_VALUE_LINE_COORD;
2470 case nir_intrinsic_load_front_face:
2471 return SYSTEM_VALUE_FRONT_FACE;
2472 case nir_intrinsic_load_front_face_fsign:
2473 return SYSTEM_VALUE_FRONT_FACE_FSIGN;
2474 case nir_intrinsic_load_sample_id:
2475 return SYSTEM_VALUE_SAMPLE_ID;
2476 case nir_intrinsic_load_sample_pos:
2477 return SYSTEM_VALUE_SAMPLE_POS;
2478 case nir_intrinsic_load_sample_pos_or_center:
2479 return SYSTEM_VALUE_SAMPLE_POS_OR_CENTER;
2480 case nir_intrinsic_load_sample_mask_in:
2481 return SYSTEM_VALUE_SAMPLE_MASK_IN;
2482 case nir_intrinsic_load_layer_id:
2483 return SYSTEM_VALUE_LAYER_ID;
2484 case nir_intrinsic_load_local_invocation_id:
2485 return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
2486 case nir_intrinsic_load_local_invocation_index:
2487 return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
2488 case nir_intrinsic_load_num_workgroups:
2489 return SYSTEM_VALUE_NUM_WORKGROUPS;
2490 case nir_intrinsic_load_workgroup_id:
2491 return SYSTEM_VALUE_WORKGROUP_ID;
2492 case nir_intrinsic_load_base_workgroup_id:
2493 return SYSTEM_VALUE_BASE_WORKGROUP_ID;
2494 case nir_intrinsic_load_workgroup_index:
2495 return SYSTEM_VALUE_WORKGROUP_INDEX;
2496 case nir_intrinsic_load_primitive_id:
2497 return SYSTEM_VALUE_PRIMITIVE_ID;
2498 case nir_intrinsic_load_tess_coord:
2499 case nir_intrinsic_load_tess_coord_xy:
2500 return SYSTEM_VALUE_TESS_COORD;
2501 case nir_intrinsic_load_tess_level_outer:
2502 return SYSTEM_VALUE_TESS_LEVEL_OUTER;
2503 case nir_intrinsic_load_tess_level_inner:
2504 return SYSTEM_VALUE_TESS_LEVEL_INNER;
2505 case nir_intrinsic_load_tess_level_outer_default:
2506 return SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT;
2507 case nir_intrinsic_load_tess_level_inner_default:
2508 return SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT;
2509 case nir_intrinsic_load_patch_vertices_in:
2510 return SYSTEM_VALUE_VERTICES_IN;
2511 case nir_intrinsic_load_helper_invocation:
2512 return SYSTEM_VALUE_HELPER_INVOCATION;
2513 case nir_intrinsic_load_color0:
2514 return SYSTEM_VALUE_COLOR0;
2515 case nir_intrinsic_load_color1:
2516 return SYSTEM_VALUE_COLOR1;
2517 case nir_intrinsic_load_view_index:
2518 return SYSTEM_VALUE_VIEW_INDEX;
2519 case nir_intrinsic_load_subgroup_size:
2520 return SYSTEM_VALUE_SUBGROUP_SIZE;
2521 case nir_intrinsic_load_subgroup_invocation:
2522 return SYSTEM_VALUE_SUBGROUP_INVOCATION;
2523 case nir_intrinsic_load_subgroup_eq_mask:
2524 return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
2525 case nir_intrinsic_load_subgroup_ge_mask:
2526 return SYSTEM_VALUE_SUBGROUP_GE_MASK;
2527 case nir_intrinsic_load_subgroup_gt_mask:
2528 return SYSTEM_VALUE_SUBGROUP_GT_MASK;
2529 case nir_intrinsic_load_subgroup_le_mask:
2530 return SYSTEM_VALUE_SUBGROUP_LE_MASK;
2531 case nir_intrinsic_load_subgroup_lt_mask:
2532 return SYSTEM_VALUE_SUBGROUP_LT_MASK;
2533 case nir_intrinsic_load_num_subgroups:
2534 return SYSTEM_VALUE_NUM_SUBGROUPS;
2535 case nir_intrinsic_load_subgroup_id:
2536 return SYSTEM_VALUE_SUBGROUP_ID;
2537 case nir_intrinsic_load_workgroup_size:
2538 return SYSTEM_VALUE_WORKGROUP_SIZE;
2539 case nir_intrinsic_load_global_invocation_id:
2540 return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
2541 case nir_intrinsic_load_base_global_invocation_id:
2542 return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID;
2543 case nir_intrinsic_load_global_invocation_index:
2544 return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX;
2545 case nir_intrinsic_load_global_size:
2546 return SYSTEM_VALUE_GLOBAL_GROUP_SIZE;
2547 case nir_intrinsic_load_work_dim:
2548 return SYSTEM_VALUE_WORK_DIM;
2549 case nir_intrinsic_load_user_data_amd:
2550 return SYSTEM_VALUE_USER_DATA_AMD;
2551 case nir_intrinsic_load_barycentric_model:
2552 return SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL;
2553 case nir_intrinsic_load_gs_header_ir3:
2554 return SYSTEM_VALUE_GS_HEADER_IR3;
2555 case nir_intrinsic_load_tcs_header_ir3:
2556 return SYSTEM_VALUE_TCS_HEADER_IR3;
2557 case nir_intrinsic_load_ray_launch_id:
2558 return SYSTEM_VALUE_RAY_LAUNCH_ID;
2559 case nir_intrinsic_load_ray_launch_size:
2560 return SYSTEM_VALUE_RAY_LAUNCH_SIZE;
2561 case nir_intrinsic_load_ray_world_origin:
2562 return SYSTEM_VALUE_RAY_WORLD_ORIGIN;
2563 case nir_intrinsic_load_ray_world_direction:
2564 return SYSTEM_VALUE_RAY_WORLD_DIRECTION;
2565 case nir_intrinsic_load_ray_object_origin:
2566 return SYSTEM_VALUE_RAY_OBJECT_ORIGIN;
2567 case nir_intrinsic_load_ray_object_direction:
2568 return SYSTEM_VALUE_RAY_OBJECT_DIRECTION;
2569 case nir_intrinsic_load_ray_t_min:
2570 return SYSTEM_VALUE_RAY_T_MIN;
2571 case nir_intrinsic_load_ray_t_max:
2572 return SYSTEM_VALUE_RAY_T_MAX;
2573 case nir_intrinsic_load_ray_object_to_world:
2574 return SYSTEM_VALUE_RAY_OBJECT_TO_WORLD;
2575 case nir_intrinsic_load_ray_world_to_object:
2576 return SYSTEM_VALUE_RAY_WORLD_TO_OBJECT;
2577 case nir_intrinsic_load_ray_hit_kind:
2578 return SYSTEM_VALUE_RAY_HIT_KIND;
2579 case nir_intrinsic_load_ray_flags:
2580 return SYSTEM_VALUE_RAY_FLAGS;
2581 case nir_intrinsic_load_ray_geometry_index:
2582 return SYSTEM_VALUE_RAY_GEOMETRY_INDEX;
2583 case nir_intrinsic_load_ray_instance_custom_index:
2584 return SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX;
2585 case nir_intrinsic_load_cull_mask:
2586 return SYSTEM_VALUE_CULL_MASK;
2587 case nir_intrinsic_load_ray_triangle_vertex_positions:
2588 return SYSTEM_VALUE_RAY_TRIANGLE_VERTEX_POSITIONS;
2589 case nir_intrinsic_load_frag_shading_rate:
2590 return SYSTEM_VALUE_FRAG_SHADING_RATE;
2591 case nir_intrinsic_load_mesh_view_count:
2592 return SYSTEM_VALUE_MESH_VIEW_COUNT;
2593 case nir_intrinsic_load_fully_covered:
2594 return SYSTEM_VALUE_FULLY_COVERED;
2595 case nir_intrinsic_load_frag_size:
2596 return SYSTEM_VALUE_FRAG_SIZE;
2597 case nir_intrinsic_load_frag_invocation_count:
2598 return SYSTEM_VALUE_FRAG_INVOCATION_COUNT;
2599 case nir_intrinsic_load_shader_index:
2600 return SYSTEM_VALUE_SHADER_INDEX;
2601 case nir_intrinsic_load_coalesced_input_count:
2602 return SYSTEM_VALUE_COALESCED_INPUT_COUNT;
2603 case nir_intrinsic_load_warps_per_sm_nv:
2604 return SYSTEM_VALUE_WARPS_PER_SM_NV;
2605 case nir_intrinsic_load_sm_count_nv:
2606 return SYSTEM_VALUE_SM_COUNT_NV;
2607 case nir_intrinsic_load_warp_id_nv:
2608 return SYSTEM_VALUE_WARP_ID_NV;
2609 case nir_intrinsic_load_sm_id_nv:
2610 return SYSTEM_VALUE_SM_ID_NV;
2611 default:
2612 unreachable("intrinsic doesn't produce a system value");
2613 }
2614 }
2615
2616 void
nir_rewrite_image_intrinsic(nir_intrinsic_instr * intrin,nir_def * src,bool bindless)2617 nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_def *src,
2618 bool bindless)
2619 {
2620 enum gl_access_qualifier access = nir_intrinsic_access(intrin);
2621
2622 /* Image intrinsics only have one of these */
2623 assert(!nir_intrinsic_has_src_type(intrin) ||
2624 !nir_intrinsic_has_dest_type(intrin));
2625
2626 nir_alu_type data_type = nir_type_invalid;
2627 if (nir_intrinsic_has_src_type(intrin))
2628 data_type = nir_intrinsic_src_type(intrin);
2629 if (nir_intrinsic_has_dest_type(intrin))
2630 data_type = nir_intrinsic_dest_type(intrin);
2631
2632 nir_atomic_op atomic_op = 0;
2633 if (nir_intrinsic_has_atomic_op(intrin))
2634 atomic_op = nir_intrinsic_atomic_op(intrin);
2635
2636 switch (intrin->intrinsic) {
2637 #define CASE(op) \
2638 case nir_intrinsic_image_deref_##op: \
2639 intrin->intrinsic = bindless ? nir_intrinsic_bindless_image_##op \
2640 : nir_intrinsic_image_##op; \
2641 break;
2642 CASE(load)
2643 CASE(sparse_load)
2644 CASE(store)
2645 CASE(atomic)
2646 CASE(atomic_swap)
2647 CASE(size)
2648 CASE(samples)
2649 CASE(load_raw_intel)
2650 CASE(store_raw_intel)
2651 CASE(fragment_mask_load_amd)
2652 CASE(store_block_agx)
2653 #undef CASE
2654 default:
2655 unreachable("Unhanded image intrinsic");
2656 }
2657
2658 nir_variable *var = nir_intrinsic_get_var(intrin, 0);
2659
2660 /* Only update the format if the intrinsic doesn't have one set */
2661 if (nir_intrinsic_format(intrin) == PIPE_FORMAT_NONE)
2662 nir_intrinsic_set_format(intrin, var->data.image.format);
2663
2664 nir_intrinsic_set_access(intrin, access | var->data.access);
2665 if (nir_intrinsic_has_src_type(intrin))
2666 nir_intrinsic_set_src_type(intrin, data_type);
2667 if (nir_intrinsic_has_dest_type(intrin))
2668 nir_intrinsic_set_dest_type(intrin, data_type);
2669
2670 if (nir_intrinsic_has_atomic_op(intrin))
2671 nir_intrinsic_set_atomic_op(intrin, atomic_op);
2672
2673 nir_src_rewrite(&intrin->src[0], src);
2674 }
2675
2676 unsigned
nir_image_intrinsic_coord_components(const nir_intrinsic_instr * instr)2677 nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr)
2678 {
2679 enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
2680 int coords = glsl_get_sampler_dim_coordinate_components(dim);
2681 if (dim == GLSL_SAMPLER_DIM_CUBE)
2682 return coords;
2683 else
2684 return coords + nir_intrinsic_image_array(instr);
2685 }
2686
2687 bool
nir_intrinsic_can_reorder(nir_intrinsic_instr * instr)2688 nir_intrinsic_can_reorder(nir_intrinsic_instr *instr)
2689 {
2690 if (nir_intrinsic_has_access(instr)) {
2691 enum gl_access_qualifier access = nir_intrinsic_access(instr);
2692 if (access & ACCESS_VOLATILE)
2693 return false;
2694 if (access & ACCESS_CAN_REORDER)
2695 return true;
2696 }
2697
2698 const nir_intrinsic_info *info;
2699 if (instr->intrinsic == nir_intrinsic_load_deref) {
2700 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
2701 if (nir_deref_mode_is_in_set(deref, nir_var_system_value)) {
2702 nir_variable *var = nir_deref_instr_get_variable(deref);
2703 if (!var)
2704 return false;
2705
2706 nir_intrinsic_op sysval_op =
2707 nir_intrinsic_from_system_value((gl_system_value)var->data.location);
2708 if (sysval_op == nir_num_intrinsics)
2709 return true;
2710
2711 info = &nir_intrinsic_infos[sysval_op];
2712 } else {
2713 return nir_deref_mode_is_in_set(deref, nir_var_read_only_modes);
2714 }
2715 } else {
2716 info = &nir_intrinsic_infos[instr->intrinsic];
2717 }
2718
2719 return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
2720 (info->flags & NIR_INTRINSIC_CAN_REORDER);
2721 }
2722
2723 nir_src *
nir_get_shader_call_payload_src(nir_intrinsic_instr * call)2724 nir_get_shader_call_payload_src(nir_intrinsic_instr *call)
2725 {
2726 switch (call->intrinsic) {
2727 case nir_intrinsic_trace_ray:
2728 case nir_intrinsic_rt_trace_ray:
2729 return &call->src[10];
2730 case nir_intrinsic_execute_callable:
2731 case nir_intrinsic_rt_execute_callable:
2732 return &call->src[1];
2733 default:
2734 unreachable("Not a call intrinsic");
2735 return NULL;
2736 }
2737 }
2738
2739 nir_binding
nir_chase_binding(nir_src rsrc)2740 nir_chase_binding(nir_src rsrc)
2741 {
2742 nir_binding res = { 0 };
2743 if (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2744 const struct glsl_type *type = glsl_without_array(nir_src_as_deref(rsrc)->type);
2745 bool is_image = glsl_type_is_image(type) || glsl_type_is_sampler(type);
2746 while (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2747 nir_deref_instr *deref = nir_src_as_deref(rsrc);
2748
2749 if (deref->deref_type == nir_deref_type_var) {
2750 res.success = true;
2751 res.var = deref->var;
2752 res.desc_set = deref->var->data.descriptor_set;
2753 res.binding = deref->var->data.binding;
2754 return res;
2755 } else if (deref->deref_type == nir_deref_type_array && is_image) {
2756 if (res.num_indices == ARRAY_SIZE(res.indices))
2757 return (nir_binding){ 0 };
2758 res.indices[res.num_indices++] = deref->arr.index;
2759 }
2760
2761 rsrc = deref->parent;
2762 }
2763 }
2764
2765 /* Skip copies and trimming. Trimming can appear as nir_op_mov instructions
2766 * when removing the offset from addresses. We also consider
2767 * nir_op_is_vec_or_mov() instructions to skip trimming of
2768 * vec2_index_32bit_offset addresses after lowering ALU to scalar.
2769 */
2770 unsigned num_components = nir_src_num_components(rsrc);
2771 while (true) {
2772 nir_alu_instr *alu = nir_src_as_alu_instr(rsrc);
2773 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2774 if (alu && alu->op == nir_op_mov) {
2775 for (unsigned i = 0; i < num_components; i++) {
2776 if (alu->src[0].swizzle[i] != i)
2777 return (nir_binding){ 0 };
2778 }
2779 rsrc = alu->src[0].src;
2780 } else if (alu && nir_op_is_vec(alu->op)) {
2781 for (unsigned i = 0; i < num_components; i++) {
2782 if (alu->src[i].swizzle[0] != i || alu->src[i].src.ssa != alu->src[0].src.ssa)
2783 return (nir_binding){ 0 };
2784 }
2785 rsrc = alu->src[0].src;
2786 } else if (intrin && intrin->intrinsic == nir_intrinsic_read_first_invocation) {
2787 /* The caller might want to be aware if only the first invocation of
2788 * the indices are used.
2789 */
2790 res.read_first_invocation = true;
2791 rsrc = intrin->src[0];
2792 } else {
2793 break;
2794 }
2795 }
2796
2797 if (nir_src_is_const(rsrc)) {
2798 /* GL binding model after deref lowering */
2799 res.success = true;
2800 /* Can't use just nir_src_as_uint. Vulkan resource index produces a
2801 * vec2. Some drivers lower it to vec1 (to handle get_ssbo_size for
2802 * example) but others just keep it around as a vec2 (v3dv).
2803 */
2804 res.binding = nir_src_comp_as_uint(rsrc, 0);
2805 return res;
2806 }
2807
2808 /* otherwise, must be Vulkan binding model after deref lowering or GL bindless */
2809
2810 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2811 if (!intrin)
2812 return (nir_binding){ 0 };
2813
2814 /* Intel resource, similar to load_vulkan_descriptor after it has been
2815 * lowered.
2816 */
2817 if (intrin->intrinsic == nir_intrinsic_resource_intel) {
2818 res.success = true;
2819 res.desc_set = nir_intrinsic_desc_set(intrin);
2820 res.binding = nir_intrinsic_binding(intrin);
2821 /* nir_intrinsic_resource_intel has 3 sources, but src[2] is included in
2822 * src[1], it is kept around for other purposes.
2823 */
2824 res.num_indices = 2;
2825 res.indices[0] = intrin->src[0];
2826 res.indices[1] = intrin->src[1];
2827 return res;
2828 }
2829
2830 /* skip load_vulkan_descriptor */
2831 if (intrin->intrinsic == nir_intrinsic_load_vulkan_descriptor) {
2832 intrin = nir_src_as_intrinsic(intrin->src[0]);
2833 if (!intrin)
2834 return (nir_binding){ 0 };
2835 }
2836
2837 if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
2838 return (nir_binding){ 0 };
2839
2840 assert(res.num_indices == 0);
2841 res.success = true;
2842 res.desc_set = nir_intrinsic_desc_set(intrin);
2843 res.binding = nir_intrinsic_binding(intrin);
2844 res.num_indices = 1;
2845 res.indices[0] = intrin->src[0];
2846 return res;
2847 }
2848
2849 nir_variable *
nir_get_binding_variable(nir_shader * shader,nir_binding binding)2850 nir_get_binding_variable(nir_shader *shader, nir_binding binding)
2851 {
2852 nir_variable *binding_var = NULL;
2853 unsigned count = 0;
2854
2855 if (!binding.success)
2856 return NULL;
2857
2858 if (binding.var)
2859 return binding.var;
2860
2861 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ubo | nir_var_mem_ssbo) {
2862 if (var->data.descriptor_set == binding.desc_set && var->data.binding == binding.binding) {
2863 binding_var = var;
2864 count++;
2865 }
2866 }
2867
2868 /* Be conservative if another variable is using the same binding/desc_set
2869 * because the access mask might be different and we can't get it reliably.
2870 */
2871 if (count > 1)
2872 return NULL;
2873
2874 return binding_var;
2875 }
2876
2877 nir_scalar
nir_scalar_chase_movs(nir_scalar s)2878 nir_scalar_chase_movs(nir_scalar s)
2879 {
2880 while (nir_scalar_is_alu(s)) {
2881 nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
2882 if (alu->op == nir_op_mov) {
2883 s.def = alu->src[0].src.ssa;
2884 s.comp = alu->src[0].swizzle[s.comp];
2885 } else if (nir_op_is_vec(alu->op)) {
2886 s.def = alu->src[s.comp].src.ssa;
2887 s.comp = alu->src[s.comp].swizzle[0];
2888 } else {
2889 break;
2890 }
2891 }
2892
2893 return s;
2894 }
2895
2896 nir_alu_type
nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)2897 nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)
2898 {
2899 switch (base_type) {
2900 /* clang-format off */
2901 case GLSL_TYPE_BOOL: return nir_type_bool1;
2902 case GLSL_TYPE_UINT: return nir_type_uint32;
2903 case GLSL_TYPE_INT: return nir_type_int32;
2904 case GLSL_TYPE_UINT16: return nir_type_uint16;
2905 case GLSL_TYPE_INT16: return nir_type_int16;
2906 case GLSL_TYPE_UINT8: return nir_type_uint8;
2907 case GLSL_TYPE_INT8: return nir_type_int8;
2908 case GLSL_TYPE_UINT64: return nir_type_uint64;
2909 case GLSL_TYPE_INT64: return nir_type_int64;
2910 case GLSL_TYPE_FLOAT: return nir_type_float32;
2911 case GLSL_TYPE_FLOAT16: return nir_type_float16;
2912 case GLSL_TYPE_DOUBLE: return nir_type_float64;
2913 /* clang-format on */
2914
2915 case GLSL_TYPE_COOPERATIVE_MATRIX:
2916 case GLSL_TYPE_SAMPLER:
2917 case GLSL_TYPE_TEXTURE:
2918 case GLSL_TYPE_IMAGE:
2919 case GLSL_TYPE_ATOMIC_UINT:
2920 case GLSL_TYPE_STRUCT:
2921 case GLSL_TYPE_INTERFACE:
2922 case GLSL_TYPE_ARRAY:
2923 case GLSL_TYPE_VOID:
2924 case GLSL_TYPE_SUBROUTINE:
2925 case GLSL_TYPE_ERROR:
2926 return nir_type_invalid;
2927 }
2928
2929 unreachable("unknown type");
2930 }
2931
2932 enum glsl_base_type
nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)2933 nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)
2934 {
2935 /* clang-format off */
2936 switch (base_type) {
2937 case nir_type_bool1: return GLSL_TYPE_BOOL;
2938 case nir_type_uint32: return GLSL_TYPE_UINT;
2939 case nir_type_int32: return GLSL_TYPE_INT;
2940 case nir_type_uint16: return GLSL_TYPE_UINT16;
2941 case nir_type_int16: return GLSL_TYPE_INT16;
2942 case nir_type_uint8: return GLSL_TYPE_UINT8;
2943 case nir_type_int8: return GLSL_TYPE_INT8;
2944 case nir_type_uint64: return GLSL_TYPE_UINT64;
2945 case nir_type_int64: return GLSL_TYPE_INT64;
2946 case nir_type_float32: return GLSL_TYPE_FLOAT;
2947 case nir_type_float16: return GLSL_TYPE_FLOAT16;
2948 case nir_type_float64: return GLSL_TYPE_DOUBLE;
2949 default: unreachable("Not a sized nir_alu_type");
2950 }
2951 /* clang-format on */
2952 }
2953
2954 nir_op
nir_op_vec(unsigned num_components)2955 nir_op_vec(unsigned num_components)
2956 {
2957 /* clang-format off */
2958 switch (num_components) {
2959 case 1: return nir_op_mov;
2960 case 2: return nir_op_vec2;
2961 case 3: return nir_op_vec3;
2962 case 4: return nir_op_vec4;
2963 case 5: return nir_op_vec5;
2964 case 8: return nir_op_vec8;
2965 case 16: return nir_op_vec16;
2966 default: unreachable("bad component count");
2967 }
2968 /* clang-format on */
2969 }
2970
2971 bool
nir_op_is_vec(nir_op op)2972 nir_op_is_vec(nir_op op)
2973 {
2974 switch (op) {
2975 case nir_op_vec2:
2976 case nir_op_vec3:
2977 case nir_op_vec4:
2978 case nir_op_vec5:
2979 case nir_op_vec8:
2980 case nir_op_vec16:
2981 return true;
2982 default:
2983 return false;
2984 }
2985 }
2986
2987 nir_component_mask_t
nir_alu_instr_src_read_mask(const nir_alu_instr * instr,unsigned src)2988 nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src)
2989 {
2990 nir_component_mask_t read_mask = 0;
2991 for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
2992 if (!nir_alu_instr_channel_used(instr, src, c))
2993 continue;
2994
2995 read_mask |= (1 << instr->src[src].swizzle[c]);
2996 }
2997 return read_mask;
2998 }
2999
3000 unsigned
nir_ssa_alu_instr_src_components(const nir_alu_instr * instr,unsigned src)3001 nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
3002 {
3003 if (nir_op_infos[instr->op].input_sizes[src] > 0)
3004 return nir_op_infos[instr->op].input_sizes[src];
3005
3006 return instr->def.num_components;
3007 }
3008
3009 #define CASE_ALL_SIZES(op) \
3010 case op: \
3011 case op##8: \
3012 case op##16: \
3013 case op##32:
3014
3015 bool
nir_alu_instr_is_comparison(const nir_alu_instr * instr)3016 nir_alu_instr_is_comparison(const nir_alu_instr *instr)
3017 {
3018 switch (instr->op) {
3019 CASE_ALL_SIZES(nir_op_flt)
3020 CASE_ALL_SIZES(nir_op_fge)
3021 CASE_ALL_SIZES(nir_op_fltu)
3022 CASE_ALL_SIZES(nir_op_fgeu)
3023 CASE_ALL_SIZES(nir_op_feq)
3024 CASE_ALL_SIZES(nir_op_fneu)
3025 CASE_ALL_SIZES(nir_op_fequ)
3026 CASE_ALL_SIZES(nir_op_fneo)
3027 CASE_ALL_SIZES(nir_op_funord)
3028 CASE_ALL_SIZES(nir_op_ford)
3029 CASE_ALL_SIZES(nir_op_ilt)
3030 CASE_ALL_SIZES(nir_op_ult)
3031 CASE_ALL_SIZES(nir_op_ige)
3032 CASE_ALL_SIZES(nir_op_uge)
3033 CASE_ALL_SIZES(nir_op_ieq)
3034 CASE_ALL_SIZES(nir_op_ine)
3035 CASE_ALL_SIZES(nir_op_bitz)
3036 CASE_ALL_SIZES(nir_op_bitnz)
3037 case nir_op_inot:
3038 return true;
3039 default:
3040 return false;
3041 }
3042 }
3043
3044 #undef CASE_ALL_SIZES
3045
3046 unsigned
nir_intrinsic_src_components(const nir_intrinsic_instr * intr,unsigned srcn)3047 nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn)
3048 {
3049 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
3050 assert(srcn < info->num_srcs);
3051 if (info->src_components[srcn] > 0)
3052 return info->src_components[srcn];
3053 else if (info->src_components[srcn] == 0)
3054 return intr->num_components;
3055 else
3056 return nir_src_num_components(intr->src[srcn]);
3057 }
3058
3059 unsigned
nir_intrinsic_dest_components(nir_intrinsic_instr * intr)3060 nir_intrinsic_dest_components(nir_intrinsic_instr *intr)
3061 {
3062 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
3063 if (!info->has_dest)
3064 return 0;
3065 else if (info->dest_components)
3066 return info->dest_components;
3067 else
3068 return intr->num_components;
3069 }
3070
3071 nir_alu_type
nir_intrinsic_instr_src_type(const nir_intrinsic_instr * intrin,unsigned src)3072 nir_intrinsic_instr_src_type(const nir_intrinsic_instr *intrin, unsigned src)
3073 {
3074 /* We could go nuts here, but we'll just handle a few simple
3075 * cases and let everything else be untyped.
3076 */
3077 switch (intrin->intrinsic) {
3078 case nir_intrinsic_store_deref: {
3079 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
3080 if (src == 1)
3081 return nir_get_nir_type_for_glsl_type(deref->type);
3082 break;
3083 }
3084
3085 case nir_intrinsic_store_output:
3086 if (src == 0)
3087 return nir_intrinsic_src_type(intrin);
3088 break;
3089
3090 default:
3091 break;
3092 }
3093
3094 /* For the most part, we leave other intrinsics alone. Most
3095 * of them don't matter in OpenGL ES 2.0 drivers anyway.
3096 * However, we should at least check if this is some sort of
3097 * IO intrinsic and flag it's offset and index sources.
3098 */
3099 {
3100 int offset_src_idx = nir_get_io_offset_src_number(intrin);
3101 if (src == offset_src_idx) {
3102 const nir_src *offset_src = offset_src_idx >= 0 ? &intrin->src[offset_src_idx] : NULL;
3103 if (offset_src)
3104 return nir_type_int;
3105 }
3106 }
3107
3108 return nir_type_invalid;
3109 }
3110
3111 nir_alu_type
nir_intrinsic_instr_dest_type(const nir_intrinsic_instr * intrin)3112 nir_intrinsic_instr_dest_type(const nir_intrinsic_instr *intrin)
3113 {
3114 /* We could go nuts here, but we'll just handle a few simple
3115 * cases and let everything else be untyped.
3116 */
3117 switch (intrin->intrinsic) {
3118 case nir_intrinsic_load_deref: {
3119 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
3120 return nir_get_nir_type_for_glsl_type(deref->type);
3121 }
3122
3123 case nir_intrinsic_load_input:
3124 case nir_intrinsic_load_per_primitive_input:
3125 case nir_intrinsic_load_uniform:
3126 return nir_intrinsic_dest_type(intrin);
3127
3128 default:
3129 break;
3130 }
3131
3132 return nir_type_invalid;
3133 }
3134
3135 /**
3136 * Helper to copy const_index[] from src to dst, without assuming they
3137 * match in order.
3138 */
3139 void
nir_intrinsic_copy_const_indices(nir_intrinsic_instr * dst,nir_intrinsic_instr * src)3140 nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src)
3141 {
3142 if (src->intrinsic == dst->intrinsic) {
3143 memcpy(dst->const_index, src->const_index, sizeof(dst->const_index));
3144 return;
3145 }
3146
3147 const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic];
3148 const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic];
3149
3150 for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) {
3151 if (src_info->index_map[i] == 0)
3152 continue;
3153
3154 /* require that dst instruction also uses the same const_index[]: */
3155 assert(dst_info->index_map[i] > 0);
3156
3157 dst->const_index[dst_info->index_map[i] - 1] =
3158 src->const_index[src_info->index_map[i] - 1];
3159 }
3160 }
3161
3162 bool
nir_tex_instr_need_sampler(const nir_tex_instr * instr)3163 nir_tex_instr_need_sampler(const nir_tex_instr *instr)
3164 {
3165 switch (instr->op) {
3166 case nir_texop_txf:
3167 case nir_texop_txf_ms:
3168 case nir_texop_txs:
3169 case nir_texop_query_levels:
3170 case nir_texop_texture_samples:
3171 case nir_texop_samples_identical:
3172 case nir_texop_descriptor_amd:
3173 return false;
3174 default:
3175 return true;
3176 }
3177 }
3178
3179 unsigned
nir_tex_instr_result_size(const nir_tex_instr * instr)3180 nir_tex_instr_result_size(const nir_tex_instr *instr)
3181 {
3182 switch (instr->op) {
3183 case nir_texop_txs: {
3184 unsigned ret;
3185 switch (instr->sampler_dim) {
3186 case GLSL_SAMPLER_DIM_1D:
3187 case GLSL_SAMPLER_DIM_BUF:
3188 ret = 1;
3189 break;
3190 case GLSL_SAMPLER_DIM_2D:
3191 case GLSL_SAMPLER_DIM_CUBE:
3192 case GLSL_SAMPLER_DIM_MS:
3193 case GLSL_SAMPLER_DIM_RECT:
3194 case GLSL_SAMPLER_DIM_EXTERNAL:
3195 case GLSL_SAMPLER_DIM_SUBPASS:
3196 case GLSL_SAMPLER_DIM_SUBPASS_MS:
3197 ret = 2;
3198 break;
3199 case GLSL_SAMPLER_DIM_3D:
3200 ret = 3;
3201 break;
3202 default:
3203 unreachable("not reached");
3204 }
3205 if (instr->is_array)
3206 ret++;
3207 return ret;
3208 }
3209
3210 case nir_texop_lod:
3211 return 2;
3212
3213 case nir_texop_texture_samples:
3214 case nir_texop_query_levels:
3215 case nir_texop_samples_identical:
3216 case nir_texop_fragment_mask_fetch_amd:
3217 case nir_texop_lod_bias_agx:
3218 case nir_texop_has_custom_border_color_agx:
3219 return 1;
3220
3221 case nir_texop_descriptor_amd:
3222 return instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8;
3223
3224 case nir_texop_sampler_descriptor_amd:
3225 return 4;
3226
3227 case nir_texop_hdr_dim_nv:
3228 case nir_texop_tex_type_nv:
3229 return 4;
3230
3231 case nir_texop_custom_border_color_agx:
3232 return 4;
3233
3234 default:
3235 if (instr->is_shadow && instr->is_new_style_shadow)
3236 return 1;
3237
3238 return 4;
3239 }
3240 }
3241
3242 bool
nir_tex_instr_is_query(const nir_tex_instr * instr)3243 nir_tex_instr_is_query(const nir_tex_instr *instr)
3244 {
3245 switch (instr->op) {
3246 case nir_texop_txs:
3247 case nir_texop_lod:
3248 case nir_texop_texture_samples:
3249 case nir_texop_query_levels:
3250 case nir_texop_descriptor_amd:
3251 case nir_texop_sampler_descriptor_amd:
3252 case nir_texop_lod_bias_agx:
3253 case nir_texop_custom_border_color_agx:
3254 case nir_texop_has_custom_border_color_agx:
3255 case nir_texop_hdr_dim_nv:
3256 case nir_texop_tex_type_nv:
3257 return true;
3258 case nir_texop_tex:
3259 case nir_texop_txb:
3260 case nir_texop_txl:
3261 case nir_texop_txd:
3262 case nir_texop_txf:
3263 case nir_texop_txf_ms:
3264 case nir_texop_txf_ms_fb:
3265 case nir_texop_txf_ms_mcs_intel:
3266 case nir_texop_tg4:
3267 case nir_texop_samples_identical:
3268 case nir_texop_fragment_mask_fetch_amd:
3269 case nir_texop_fragment_fetch_amd:
3270 return false;
3271 default:
3272 unreachable("Invalid texture opcode");
3273 }
3274 }
3275
3276 bool
nir_tex_instr_has_implicit_derivative(const nir_tex_instr * instr)3277 nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr)
3278 {
3279 switch (instr->op) {
3280 case nir_texop_tex:
3281 case nir_texop_txb:
3282 case nir_texop_lod:
3283 return true;
3284 case nir_texop_tg4:
3285 return instr->is_gather_implicit_lod;
3286 default:
3287 return false;
3288 }
3289 }
3290
3291 nir_alu_type
nir_tex_instr_src_type(const nir_tex_instr * instr,unsigned src)3292 nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
3293 {
3294 switch (instr->src[src].src_type) {
3295 case nir_tex_src_coord:
3296 switch (instr->op) {
3297 case nir_texop_txf:
3298 case nir_texop_txf_ms:
3299 case nir_texop_txf_ms_fb:
3300 case nir_texop_txf_ms_mcs_intel:
3301 case nir_texop_samples_identical:
3302 case nir_texop_fragment_fetch_amd:
3303 case nir_texop_fragment_mask_fetch_amd:
3304 return nir_type_int;
3305
3306 default:
3307 return nir_type_float;
3308 }
3309
3310 case nir_tex_src_lod:
3311 switch (instr->op) {
3312 case nir_texop_txs:
3313 case nir_texop_txf:
3314 case nir_texop_txf_ms:
3315 case nir_texop_fragment_fetch_amd:
3316 case nir_texop_fragment_mask_fetch_amd:
3317 return nir_type_int;
3318
3319 default:
3320 return nir_type_float;
3321 }
3322
3323 case nir_tex_src_projector:
3324 case nir_tex_src_comparator:
3325 case nir_tex_src_bias:
3326 case nir_tex_src_min_lod:
3327 case nir_tex_src_ddx:
3328 case nir_tex_src_ddy:
3329 case nir_tex_src_backend1:
3330 case nir_tex_src_backend2:
3331 return nir_type_float;
3332
3333 case nir_tex_src_offset:
3334 case nir_tex_src_ms_index:
3335 case nir_tex_src_plane:
3336 return nir_type_int;
3337
3338 case nir_tex_src_sampler_deref_intrinsic:
3339 case nir_tex_src_texture_deref_intrinsic:
3340 case nir_tex_src_ms_mcs_intel:
3341 case nir_tex_src_texture_deref:
3342 case nir_tex_src_sampler_deref:
3343 case nir_tex_src_texture_offset:
3344 case nir_tex_src_sampler_offset:
3345 case nir_tex_src_texture_handle:
3346 case nir_tex_src_sampler_handle:
3347 return nir_type_uint;
3348
3349 case nir_num_tex_src_types:
3350 unreachable("nir_num_tex_src_types is not a valid source type");
3351 }
3352
3353 unreachable("Invalid texture source type");
3354 }
3355
3356 unsigned
nir_tex_instr_src_size(const nir_tex_instr * instr,unsigned src)3357 nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src)
3358 {
3359 if (instr->src[src].src_type == nir_tex_src_coord)
3360 return instr->coord_components;
3361
3362 /* The MCS value is expected to be a vec4 returned by a txf_ms_mcs_intel */
3363 if (instr->src[src].src_type == nir_tex_src_ms_mcs_intel)
3364 return 4;
3365
3366 if (instr->src[src].src_type == nir_tex_src_ddx ||
3367 instr->src[src].src_type == nir_tex_src_ddy) {
3368
3369 if (instr->is_array && !instr->array_is_lowered_cube)
3370 return instr->coord_components - 1;
3371 else
3372 return instr->coord_components;
3373 }
3374
3375 if (instr->src[src].src_type == nir_tex_src_offset) {
3376 if (instr->is_array)
3377 return instr->coord_components - 1;
3378 else
3379 return instr->coord_components;
3380 }
3381
3382 if (instr->src[src].src_type == nir_tex_src_backend1 ||
3383 instr->src[src].src_type == nir_tex_src_backend2)
3384 return nir_src_num_components(instr->src[src].src);
3385
3386 /* For AMD, this can be a vec8/vec4 image/sampler descriptor. */
3387 if (instr->src[src].src_type == nir_tex_src_texture_handle ||
3388 instr->src[src].src_type == nir_tex_src_sampler_handle)
3389 return 0;
3390
3391 return 1;
3392 }
3393
3394 /**
3395 * Return which components are written into transform feedback buffers.
3396 * The result is relative to 0, not "component".
3397 */
3398 unsigned
nir_instr_xfb_write_mask(nir_intrinsic_instr * instr)3399 nir_instr_xfb_write_mask(nir_intrinsic_instr *instr)
3400 {
3401 unsigned mask = 0;
3402
3403 if (nir_intrinsic_has_io_xfb(instr)) {
3404 unsigned wr_mask = nir_intrinsic_write_mask(instr) << nir_intrinsic_component(instr);
3405 assert((wr_mask & ~0xf) == 0); /* only 4 components allowed */
3406
3407 unsigned iter_mask = wr_mask;
3408 while (iter_mask) {
3409 unsigned i = u_bit_scan(&iter_mask);
3410 nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(instr) : nir_intrinsic_io_xfb2(instr);
3411 if (xfb.out[i % 2].num_components)
3412 mask |= BITFIELD_RANGE(i, xfb.out[i % 2].num_components) & wr_mask;
3413 }
3414 }
3415
3416 return mask;
3417 }
3418
3419 /**
3420 * Whether an output slot is consumed by fixed-function logic.
3421 */
3422 bool
nir_slot_is_sysval_output(gl_varying_slot slot,gl_shader_stage next_shader)3423 nir_slot_is_sysval_output(gl_varying_slot slot, gl_shader_stage next_shader)
3424 {
3425 switch (next_shader) {
3426 case MESA_SHADER_FRAGMENT:
3427 return slot == VARYING_SLOT_POS ||
3428 slot == VARYING_SLOT_PSIZ ||
3429 slot == VARYING_SLOT_EDGE ||
3430 slot == VARYING_SLOT_CLIP_VERTEX ||
3431 slot == VARYING_SLOT_CLIP_DIST0 ||
3432 slot == VARYING_SLOT_CLIP_DIST1 ||
3433 slot == VARYING_SLOT_CULL_DIST0 ||
3434 slot == VARYING_SLOT_CULL_DIST1 ||
3435 slot == VARYING_SLOT_LAYER ||
3436 slot == VARYING_SLOT_VIEWPORT ||
3437 slot == VARYING_SLOT_VIEW_INDEX ||
3438 slot == VARYING_SLOT_VIEWPORT_MASK ||
3439 slot == VARYING_SLOT_PRIMITIVE_SHADING_RATE ||
3440 /* NV_mesh_shader_only */
3441 slot == VARYING_SLOT_PRIMITIVE_COUNT ||
3442 slot == VARYING_SLOT_PRIMITIVE_INDICES;
3443
3444 case MESA_SHADER_TESS_EVAL:
3445 return slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
3446 slot == VARYING_SLOT_TESS_LEVEL_INNER ||
3447 slot == VARYING_SLOT_BOUNDING_BOX0 ||
3448 slot == VARYING_SLOT_BOUNDING_BOX1;
3449
3450 case MESA_SHADER_MESH:
3451 /* NV_mesh_shader only */
3452 return slot == VARYING_SLOT_TASK_COUNT;
3453
3454 case MESA_SHADER_NONE:
3455 /* NONE means unknown. Check all possibilities. */
3456 return nir_slot_is_sysval_output(slot, MESA_SHADER_FRAGMENT) ||
3457 nir_slot_is_sysval_output(slot, MESA_SHADER_TESS_EVAL) ||
3458 nir_slot_is_sysval_output(slot, MESA_SHADER_MESH);
3459
3460 default:
3461 /* No other shaders have preceding shaders with sysval outputs. */
3462 return false;
3463 }
3464 }
3465
3466 /**
3467 * Whether an input/output slot is consumed by the next shader stage,
3468 * or written by the previous shader stage.
3469 *
3470 * Pass MESA_SHADER_NONE if the next shader is unknown.
3471 */
3472 bool
nir_slot_is_varying(gl_varying_slot slot,gl_shader_stage next_shader)3473 nir_slot_is_varying(gl_varying_slot slot, gl_shader_stage next_shader)
3474 {
3475 bool unknown = next_shader == MESA_SHADER_NONE;
3476 bool exactly_before_fs = next_shader == MESA_SHADER_FRAGMENT || unknown;
3477 bool at_most_before_gs = next_shader <= MESA_SHADER_GEOMETRY || unknown;
3478
3479 return slot >= VARYING_SLOT_VAR0 ||
3480 (slot == VARYING_SLOT_POS && at_most_before_gs) ||
3481 slot == VARYING_SLOT_COL0 ||
3482 slot == VARYING_SLOT_COL1 ||
3483 slot == VARYING_SLOT_BFC0 ||
3484 slot == VARYING_SLOT_BFC1 ||
3485 slot == VARYING_SLOT_FOGC ||
3486 (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) ||
3487 slot == VARYING_SLOT_PNTC ||
3488 (slot == VARYING_SLOT_CLIP_VERTEX && at_most_before_gs) ||
3489 slot == VARYING_SLOT_CLIP_DIST0 ||
3490 slot == VARYING_SLOT_CLIP_DIST1 ||
3491 slot == VARYING_SLOT_CULL_DIST0 ||
3492 slot == VARYING_SLOT_CULL_DIST1 ||
3493 slot == VARYING_SLOT_PRIMITIVE_ID ||
3494 slot == VARYING_SLOT_LAYER ||
3495 slot == VARYING_SLOT_VIEWPORT ||
3496 slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
3497 slot == VARYING_SLOT_TESS_LEVEL_INNER ||
3498 (slot == VARYING_SLOT_VIEW_INDEX && exactly_before_fs);
3499 }
3500
3501 bool
nir_slot_is_sysval_output_and_varying(gl_varying_slot slot,gl_shader_stage next_shader)3502 nir_slot_is_sysval_output_and_varying(gl_varying_slot slot,
3503 gl_shader_stage next_shader)
3504 {
3505 return nir_slot_is_sysval_output(slot, next_shader) &&
3506 nir_slot_is_varying(slot, next_shader);
3507 }
3508
3509 /**
3510 * This marks the output store instruction as not feeding the next shader
3511 * stage. If the instruction has no other use, it's removed.
3512 */
3513 bool
nir_remove_varying(nir_intrinsic_instr * intr,gl_shader_stage next_shader)3514 nir_remove_varying(nir_intrinsic_instr *intr, gl_shader_stage next_shader)
3515 {
3516 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3517
3518 if ((!sem.no_sysval_output &&
3519 nir_slot_is_sysval_output(sem.location, next_shader)) ||
3520 nir_instr_xfb_write_mask(intr)) {
3521 /* Demote the store instruction. */
3522 sem.no_varying = true;
3523 nir_intrinsic_set_io_semantics(intr, sem);
3524 return false;
3525 } else {
3526 nir_instr_remove(&intr->instr);
3527 return true;
3528 }
3529 }
3530
3531 /**
3532 * This marks the output store instruction as not feeding fixed-function
3533 * logic. If the instruction has no other use, it's removed.
3534 */
3535 bool
nir_remove_sysval_output(nir_intrinsic_instr * intr,gl_shader_stage next_shader)3536 nir_remove_sysval_output(nir_intrinsic_instr *intr, gl_shader_stage next_shader)
3537 {
3538 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3539
3540 if ((!sem.no_varying && nir_slot_is_varying(sem.location, next_shader)) ||
3541 nir_instr_xfb_write_mask(intr)) {
3542 /* Demote the store instruction. */
3543 sem.no_sysval_output = true;
3544 nir_intrinsic_set_io_semantics(intr, sem);
3545 return false;
3546 } else {
3547 nir_instr_remove(&intr->instr);
3548 return true;
3549 }
3550 }
3551
3552 void
nir_remove_non_entrypoints(nir_shader * nir)3553 nir_remove_non_entrypoints(nir_shader *nir)
3554 {
3555 nir_foreach_function_safe(func, nir) {
3556 if (!func->is_entrypoint)
3557 exec_node_remove(&func->node);
3558 }
3559 assert(exec_list_length(&nir->functions) == 1);
3560 }
3561
3562 void
nir_remove_non_exported(nir_shader * nir)3563 nir_remove_non_exported(nir_shader *nir)
3564 {
3565 nir_foreach_function_safe(func, nir) {
3566 if (!func->is_exported)
3567 exec_node_remove(&func->node);
3568 }
3569 }
3570
3571 /*
3572 * After precompiling entrypoints from a kernel library, we want to garbage
3573 * collect the NIR entrypoints but leave the exported library functions. This
3574 * helper does that.
3575 */
3576 void
nir_remove_entrypoints(nir_shader * nir)3577 nir_remove_entrypoints(nir_shader *nir)
3578 {
3579 nir_foreach_entrypoint_safe(func, nir) {
3580 exec_node_remove(&func->node);
3581 }
3582 }
3583
3584 unsigned
nir_static_workgroup_size(const nir_shader * s)3585 nir_static_workgroup_size(const nir_shader *s)
3586 {
3587 return s->info.workgroup_size[0] * s->info.workgroup_size[1] *
3588 s->info.workgroup_size[2];
3589 }
3590
3591 bool
nir_block_contains_work(nir_block * block)3592 nir_block_contains_work(nir_block *block)
3593 {
3594 if (!nir_cf_node_is_last(&block->cf_node))
3595 return true;
3596
3597 nir_foreach_instr(instr, block) {
3598 if (instr->type == nir_instr_type_phi)
3599 continue;
3600 if (instr->type != nir_instr_type_alu ||
3601 !nir_op_is_vec_or_mov(nir_instr_as_alu(instr)->op))
3602 return true;
3603 }
3604
3605 return false;
3606 }
3607
3608 nir_op
nir_atomic_op_to_alu(nir_atomic_op op)3609 nir_atomic_op_to_alu(nir_atomic_op op)
3610 {
3611 switch (op) {
3612 case nir_atomic_op_iadd:
3613 return nir_op_iadd;
3614 case nir_atomic_op_imin:
3615 return nir_op_imin;
3616 case nir_atomic_op_umin:
3617 return nir_op_umin;
3618 case nir_atomic_op_imax:
3619 return nir_op_imax;
3620 case nir_atomic_op_umax:
3621 return nir_op_umax;
3622 case nir_atomic_op_iand:
3623 return nir_op_iand;
3624 case nir_atomic_op_ior:
3625 return nir_op_ior;
3626 case nir_atomic_op_ixor:
3627 return nir_op_ixor;
3628 case nir_atomic_op_fadd:
3629 return nir_op_fadd;
3630 case nir_atomic_op_fmin:
3631 return nir_op_fmin;
3632 case nir_atomic_op_fmax:
3633 return nir_op_fmax;
3634
3635 /* We don't handle exchanges or wraps */
3636 case nir_atomic_op_xchg:
3637 case nir_atomic_op_cmpxchg:
3638 case nir_atomic_op_fcmpxchg:
3639 case nir_atomic_op_inc_wrap:
3640 case nir_atomic_op_dec_wrap:
3641 case nir_atomic_op_ordered_add_gfx12_amd:
3642 return nir_num_opcodes;
3643 }
3644
3645 unreachable("Invalid nir_atomic_op");
3646 }
3647
3648