1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 *
26 */
27
28 #include "nir.h"
29 #include <assert.h>
30 #include <limits.h>
31 #include <math.h>
32 #include "util/half_float.h"
33 #include "util/macros.h"
34 #include "util/u_math.h"
35 #include "util/u_qsort.h"
36 #include "nir_builder.h"
37 #include "nir_control_flow_private.h"
38 #include "nir_worklist.h"
39
40 #ifndef NDEBUG
41 uint32_t nir_debug = 0;
42 bool nir_debug_print_shader[MESA_SHADER_KERNEL + 1] = { 0 };
43
44 static const struct debug_named_value nir_debug_control[] = {
45 { "clone", NIR_DEBUG_CLONE,
46 "Test cloning a shader at each successful lowering/optimization call" },
47 { "serialize", NIR_DEBUG_SERIALIZE,
48 "Test serialize and deserialize shader at each successful lowering/optimization call" },
49 { "novalidate", NIR_DEBUG_NOVALIDATE,
50 "Disable shader validation at each successful lowering/optimization call" },
51 { "validate_ssa_dominance", NIR_DEBUG_VALIDATE_SSA_DOMINANCE,
52 "Validate SSA dominance in shader at each successful lowering/optimization call" },
53 { "tgsi", NIR_DEBUG_TGSI,
54 "Dump NIR/TGSI shaders when doing a NIR<->TGSI translation" },
55 { "print", NIR_DEBUG_PRINT,
56 "Dump resulting shader after each successful lowering/optimization call" },
57 { "print_vs", NIR_DEBUG_PRINT_VS,
58 "Dump resulting vertex shader after each successful lowering/optimization call" },
59 { "print_tcs", NIR_DEBUG_PRINT_TCS,
60 "Dump resulting tessellation control shader after each successful lowering/optimization call" },
61 { "print_tes", NIR_DEBUG_PRINT_TES,
62 "Dump resulting tessellation evaluation shader after each successful lowering/optimization call" },
63 { "print_gs", NIR_DEBUG_PRINT_GS,
64 "Dump resulting geometry shader after each successful lowering/optimization call" },
65 { "print_fs", NIR_DEBUG_PRINT_FS,
66 "Dump resulting fragment shader after each successful lowering/optimization call" },
67 { "print_cs", NIR_DEBUG_PRINT_CS,
68 "Dump resulting compute shader after each successful lowering/optimization call" },
69 { "print_ts", NIR_DEBUG_PRINT_TS,
70 "Dump resulting task shader after each successful lowering/optimization call" },
71 { "print_ms", NIR_DEBUG_PRINT_MS,
72 "Dump resulting mesh shader after each successful lowering/optimization call" },
73 { "print_rgs", NIR_DEBUG_PRINT_RGS,
74 "Dump resulting raygen shader after each successful lowering/optimization call" },
75 { "print_ahs", NIR_DEBUG_PRINT_AHS,
76 "Dump resulting any-hit shader after each successful lowering/optimization call" },
77 { "print_chs", NIR_DEBUG_PRINT_CHS,
78 "Dump resulting closest-hit shader after each successful lowering/optimization call" },
79 { "print_mhs", NIR_DEBUG_PRINT_MHS,
80 "Dump resulting miss-hit shader after each successful lowering/optimization call" },
81 { "print_is", NIR_DEBUG_PRINT_IS,
82 "Dump resulting intersection shader after each successful lowering/optimization call" },
83 { "print_cbs", NIR_DEBUG_PRINT_CBS,
84 "Dump resulting callable shader after each successful lowering/optimization call" },
85 { "print_ks", NIR_DEBUG_PRINT_KS,
86 "Dump resulting kernel shader after each successful lowering/optimization call" },
87 { "print_no_inline_consts", NIR_DEBUG_PRINT_NO_INLINE_CONSTS,
88 "Do not print const value near each use of const SSA variable" },
89 { "print_internal", NIR_DEBUG_PRINT_INTERNAL,
90 "Print shaders even if they are marked as internal" },
91 { "print_pass_flags", NIR_DEBUG_PRINT_PASS_FLAGS,
92 "Print pass_flags for every instruction when pass_flags are non-zero" },
93 DEBUG_NAMED_VALUE_END
94 };
95
96 DEBUG_GET_ONCE_FLAGS_OPTION(nir_debug, "NIR_DEBUG", nir_debug_control, 0)
97
98 static void
nir_process_debug_variable_once(void)99 nir_process_debug_variable_once(void)
100 {
101 nir_debug = debug_get_option_nir_debug();
102
103 /* clang-format off */
104 nir_debug_print_shader[MESA_SHADER_VERTEX] = NIR_DEBUG(PRINT_VS);
105 nir_debug_print_shader[MESA_SHADER_TESS_CTRL] = NIR_DEBUG(PRINT_TCS);
106 nir_debug_print_shader[MESA_SHADER_TESS_EVAL] = NIR_DEBUG(PRINT_TES);
107 nir_debug_print_shader[MESA_SHADER_GEOMETRY] = NIR_DEBUG(PRINT_GS);
108 nir_debug_print_shader[MESA_SHADER_FRAGMENT] = NIR_DEBUG(PRINT_FS);
109 nir_debug_print_shader[MESA_SHADER_COMPUTE] = NIR_DEBUG(PRINT_CS);
110 nir_debug_print_shader[MESA_SHADER_TASK] = NIR_DEBUG(PRINT_TS);
111 nir_debug_print_shader[MESA_SHADER_MESH] = NIR_DEBUG(PRINT_MS);
112 nir_debug_print_shader[MESA_SHADER_RAYGEN] = NIR_DEBUG(PRINT_RGS);
113 nir_debug_print_shader[MESA_SHADER_ANY_HIT] = NIR_DEBUG(PRINT_AHS);
114 nir_debug_print_shader[MESA_SHADER_CLOSEST_HIT] = NIR_DEBUG(PRINT_CHS);
115 nir_debug_print_shader[MESA_SHADER_MISS] = NIR_DEBUG(PRINT_MHS);
116 nir_debug_print_shader[MESA_SHADER_INTERSECTION] = NIR_DEBUG(PRINT_IS);
117 nir_debug_print_shader[MESA_SHADER_CALLABLE] = NIR_DEBUG(PRINT_CBS);
118 nir_debug_print_shader[MESA_SHADER_KERNEL] = NIR_DEBUG(PRINT_KS);
119 /* clang-format on */
120 }
121
122 void
nir_process_debug_variable(void)123 nir_process_debug_variable(void)
124 {
125 static once_flag flag = ONCE_FLAG_INIT;
126 call_once(&flag, nir_process_debug_variable_once);
127 }
128 #endif
129
130 /** Return true if the component mask "mask" with bit size "old_bit_size" can
131 * be re-interpreted to be used with "new_bit_size".
132 */
133 bool
nir_component_mask_can_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)134 nir_component_mask_can_reinterpret(nir_component_mask_t mask,
135 unsigned old_bit_size,
136 unsigned new_bit_size)
137 {
138 assert(util_is_power_of_two_nonzero(old_bit_size));
139 assert(util_is_power_of_two_nonzero(new_bit_size));
140
141 if (old_bit_size == new_bit_size)
142 return true;
143
144 if (old_bit_size == 1 || new_bit_size == 1)
145 return false;
146
147 if (old_bit_size > new_bit_size) {
148 unsigned ratio = old_bit_size / new_bit_size;
149 return util_last_bit(mask) * ratio <= NIR_MAX_VEC_COMPONENTS;
150 }
151
152 unsigned iter = mask;
153 while (iter) {
154 int start, count;
155 u_bit_scan_consecutive_range(&iter, &start, &count);
156 start *= old_bit_size;
157 count *= old_bit_size;
158 if (start % new_bit_size != 0)
159 return false;
160 if (count % new_bit_size != 0)
161 return false;
162 }
163 return true;
164 }
165
166 /** Re-interprets a component mask "mask" with bit size "old_bit_size" so that
167 * it can be used can be used with "new_bit_size".
168 */
169 nir_component_mask_t
nir_component_mask_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)170 nir_component_mask_reinterpret(nir_component_mask_t mask,
171 unsigned old_bit_size,
172 unsigned new_bit_size)
173 {
174 assert(nir_component_mask_can_reinterpret(mask, old_bit_size, new_bit_size));
175
176 if (old_bit_size == new_bit_size)
177 return mask;
178
179 nir_component_mask_t new_mask = 0;
180 unsigned iter = mask;
181 while (iter) {
182 int start, count;
183 u_bit_scan_consecutive_range(&iter, &start, &count);
184 start = start * old_bit_size / new_bit_size;
185 count = count * old_bit_size / new_bit_size;
186 new_mask |= BITFIELD_RANGE(start, count);
187 }
188 return new_mask;
189 }
190
191 nir_shader *
nir_shader_create(void * mem_ctx,gl_shader_stage stage,const nir_shader_compiler_options * options,shader_info * si)192 nir_shader_create(void *mem_ctx,
193 gl_shader_stage stage,
194 const nir_shader_compiler_options *options,
195 shader_info *si)
196 {
197 nir_shader *shader = rzalloc(mem_ctx, nir_shader);
198
199 shader->gctx = gc_context(shader);
200
201 #ifndef NDEBUG
202 nir_process_debug_variable();
203 #endif
204
205 exec_list_make_empty(&shader->variables);
206
207 shader->options = options;
208
209 if (si) {
210 assert(si->stage == stage);
211 shader->info = *si;
212 } else {
213 shader->info.stage = stage;
214 }
215
216 exec_list_make_empty(&shader->functions);
217
218 shader->num_inputs = 0;
219 shader->num_outputs = 0;
220 shader->num_uniforms = 0;
221
222 return shader;
223 }
224
225 void
nir_shader_add_variable(nir_shader * shader,nir_variable * var)226 nir_shader_add_variable(nir_shader *shader, nir_variable *var)
227 {
228 switch (var->data.mode) {
229 case nir_var_function_temp:
230 assert(!"nir_shader_add_variable cannot be used for local variables");
231 return;
232
233 case nir_var_shader_temp:
234 case nir_var_shader_in:
235 case nir_var_shader_out:
236 case nir_var_uniform:
237 case nir_var_mem_ubo:
238 case nir_var_mem_ssbo:
239 case nir_var_image:
240 case nir_var_mem_shared:
241 case nir_var_system_value:
242 case nir_var_mem_push_const:
243 case nir_var_mem_constant:
244 case nir_var_shader_call_data:
245 case nir_var_ray_hit_attrib:
246 case nir_var_mem_task_payload:
247 case nir_var_mem_node_payload:
248 case nir_var_mem_node_payload_in:
249 case nir_var_mem_global:
250 break;
251
252 default:
253 assert(!"invalid mode");
254 return;
255 }
256
257 exec_list_push_tail(&shader->variables, &var->node);
258 }
259
260 nir_variable *
nir_variable_create(nir_shader * shader,nir_variable_mode mode,const struct glsl_type * type,const char * name)261 nir_variable_create(nir_shader *shader, nir_variable_mode mode,
262 const struct glsl_type *type, const char *name)
263 {
264 nir_variable *var = rzalloc(shader, nir_variable);
265 var->name = ralloc_strdup(var, name);
266 var->type = type;
267 var->data.mode = mode;
268 var->data.how_declared = nir_var_declared_normally;
269
270 if ((mode == nir_var_shader_in &&
271 shader->info.stage != MESA_SHADER_VERTEX &&
272 shader->info.stage != MESA_SHADER_KERNEL) ||
273 (mode == nir_var_shader_out &&
274 shader->info.stage != MESA_SHADER_FRAGMENT))
275 var->data.interpolation = INTERP_MODE_SMOOTH;
276
277 if (mode == nir_var_shader_in || mode == nir_var_uniform)
278 var->data.read_only = true;
279
280 nir_shader_add_variable(shader, var);
281
282 return var;
283 }
284
285 nir_variable *
nir_local_variable_create(nir_function_impl * impl,const struct glsl_type * type,const char * name)286 nir_local_variable_create(nir_function_impl *impl,
287 const struct glsl_type *type, const char *name)
288 {
289 nir_variable *var = rzalloc(impl->function->shader, nir_variable);
290 var->name = ralloc_strdup(var, name);
291 var->type = type;
292 var->data.mode = nir_var_function_temp;
293
294 nir_function_impl_add_variable(impl, var);
295
296 return var;
297 }
298
299 nir_variable *
nir_state_variable_create(nir_shader * shader,const struct glsl_type * type,const char * name,const gl_state_index16 tokens[STATE_LENGTH])300 nir_state_variable_create(nir_shader *shader,
301 const struct glsl_type *type,
302 const char *name,
303 const gl_state_index16 tokens[STATE_LENGTH])
304 {
305 nir_variable *var = nir_variable_create(shader, nir_var_uniform, type, name);
306 var->num_state_slots = 1;
307 var->state_slots = rzalloc_array(var, nir_state_slot, 1);
308 memcpy(var->state_slots[0].tokens, tokens,
309 sizeof(var->state_slots[0].tokens));
310 shader->num_uniforms++;
311 return var;
312 }
313
314 nir_variable *
nir_create_variable_with_location(nir_shader * shader,nir_variable_mode mode,int location,const struct glsl_type * type)315 nir_create_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location,
316 const struct glsl_type *type)
317 {
318 /* Only supporting non-array, or arrayed-io types, because otherwise we don't
319 * know how much to increment num_inputs/outputs
320 */
321 assert(glsl_type_is_vector_or_scalar(type) || glsl_type_is_unsized_array(type));
322
323 const char *name;
324 switch (mode) {
325 case nir_var_shader_in:
326 if (shader->info.stage == MESA_SHADER_VERTEX)
327 name = gl_vert_attrib_name(location);
328 else
329 name = gl_varying_slot_name_for_stage(location, shader->info.stage);
330 break;
331
332 case nir_var_shader_out:
333 if (shader->info.stage == MESA_SHADER_FRAGMENT)
334 name = gl_frag_result_name(location);
335 else
336 name = gl_varying_slot_name_for_stage(location, shader->info.stage);
337 break;
338
339 case nir_var_system_value:
340 name = gl_system_value_name(location);
341 break;
342
343 default:
344 unreachable("Unsupported variable mode");
345 }
346
347 nir_variable *var = nir_variable_create(shader, mode, type, name);
348 var->data.location = location;
349
350 switch (mode) {
351 case nir_var_shader_in:
352 var->data.driver_location = shader->num_inputs++;
353 break;
354
355 case nir_var_shader_out:
356 var->data.driver_location = shader->num_outputs++;
357 break;
358
359 case nir_var_system_value:
360 break;
361
362 default:
363 unreachable("Unsupported variable mode");
364 }
365
366 return var;
367 }
368
369 nir_variable *
nir_get_variable_with_location(nir_shader * shader,nir_variable_mode mode,int location,const struct glsl_type * type)370 nir_get_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location,
371 const struct glsl_type *type)
372 {
373 nir_variable *var = nir_find_variable_with_location(shader, mode, location);
374 if (var) {
375 /* If this shader has location_fracs, this builder function is not suitable. */
376 assert(var->data.location_frac == 0);
377
378 /* The variable for the slot should match what we expected. */
379 assert(type == var->type);
380 return var;
381 }
382
383 return nir_create_variable_with_location(shader, mode, location, type);
384 }
385
386 nir_variable *
nir_find_variable_with_location(nir_shader * shader,nir_variable_mode mode,unsigned location)387 nir_find_variable_with_location(nir_shader *shader,
388 nir_variable_mode mode,
389 unsigned location)
390 {
391 assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
392 nir_foreach_variable_with_modes(var, shader, mode) {
393 if (var->data.location == location)
394 return var;
395 }
396 return NULL;
397 }
398
399 nir_variable *
nir_find_variable_with_driver_location(nir_shader * shader,nir_variable_mode mode,unsigned location)400 nir_find_variable_with_driver_location(nir_shader *shader,
401 nir_variable_mode mode,
402 unsigned location)
403 {
404 assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
405 nir_foreach_variable_with_modes(var, shader, mode) {
406 if (var->data.driver_location == location)
407 return var;
408 }
409 return NULL;
410 }
411
412 nir_variable *
nir_find_state_variable(nir_shader * s,gl_state_index16 tokens[STATE_LENGTH])413 nir_find_state_variable(nir_shader *s,
414 gl_state_index16 tokens[STATE_LENGTH])
415 {
416 nir_foreach_variable_with_modes(var, s, nir_var_uniform) {
417 if (var->num_state_slots == 1 &&
418 !memcmp(var->state_slots[0].tokens, tokens,
419 sizeof(var->state_slots[0].tokens)))
420 return var;
421 }
422 return NULL;
423 }
424
nir_find_sampler_variable_with_tex_index(nir_shader * shader,unsigned texture_index)425 nir_variable *nir_find_sampler_variable_with_tex_index(nir_shader *shader,
426 unsigned texture_index)
427 {
428 nir_foreach_variable_with_modes(var, shader, nir_var_uniform) {
429 unsigned size =
430 glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
431 if ((glsl_type_is_texture(glsl_without_array(var->type)) ||
432 glsl_type_is_sampler(glsl_without_array(var->type))) &&
433 (var->data.binding == texture_index ||
434 (var->data.binding < texture_index &&
435 var->data.binding + size > texture_index)))
436 return var;
437 }
438 return NULL;
439 }
440
441 /* Annoyingly, qsort_r is not in the C standard library and, in particular, we
442 * can't count on it on MSV and Android. So we stuff the CMP function into
443 * each array element. It's a bit messy and burns more memory but the list of
444 * variables should hever be all that long.
445 */
446 struct var_cmp {
447 nir_variable *var;
448 int (*cmp)(const nir_variable *, const nir_variable *);
449 };
450
451 static int
var_sort_cmp(const void * _a,const void * _b,void * _cmp)452 var_sort_cmp(const void *_a, const void *_b, void *_cmp)
453 {
454 const struct var_cmp *a = _a;
455 const struct var_cmp *b = _b;
456 assert(a->cmp == b->cmp);
457 return a->cmp(a->var, b->var);
458 }
459
460 void
nir_sort_variables_with_modes(nir_shader * shader,int (* cmp)(const nir_variable *,const nir_variable *),nir_variable_mode modes)461 nir_sort_variables_with_modes(nir_shader *shader,
462 int (*cmp)(const nir_variable *,
463 const nir_variable *),
464 nir_variable_mode modes)
465 {
466 unsigned num_vars = 0;
467 nir_foreach_variable_with_modes(var, shader, modes) {
468 ++num_vars;
469 }
470 struct var_cmp *vars = ralloc_array(shader, struct var_cmp, num_vars);
471 unsigned i = 0;
472 nir_foreach_variable_with_modes_safe(var, shader, modes) {
473 exec_node_remove(&var->node);
474 vars[i++] = (struct var_cmp){
475 .var = var,
476 .cmp = cmp,
477 };
478 }
479 assert(i == num_vars);
480
481 util_qsort_r(vars, num_vars, sizeof(*vars), var_sort_cmp, cmp);
482
483 for (i = 0; i < num_vars; i++)
484 exec_list_push_tail(&shader->variables, &vars[i].var->node);
485
486 ralloc_free(vars);
487 }
488
489 nir_function *
nir_function_create(nir_shader * shader,const char * name)490 nir_function_create(nir_shader *shader, const char *name)
491 {
492 nir_function *func = ralloc(shader, nir_function);
493
494 exec_list_push_tail(&shader->functions, &func->node);
495
496 func->name = ralloc_strdup(func, name);
497 func->shader = shader;
498 func->num_params = 0;
499 func->params = NULL;
500 func->impl = NULL;
501 func->is_entrypoint = false;
502 func->is_preamble = false;
503 func->dont_inline = false;
504 func->should_inline = false;
505 func->is_subroutine = false;
506 func->is_tmp_globals_wrapper = false;
507 func->subroutine_index = 0;
508 func->num_subroutine_types = 0;
509 func->subroutine_types = NULL;
510 func->workgroup_size[0] = 0;
511 func->workgroup_size[1] = 0;
512 func->workgroup_size[2] = 0;
513
514 /* Only meaningful for shader libraries, so don't export by default. */
515 func->is_exported = false;
516
517 return func;
518 }
519
520 void
nir_alu_src_copy(nir_alu_src * dest,const nir_alu_src * src)521 nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src)
522 {
523 dest->src = nir_src_for_ssa(src->src.ssa);
524 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
525 dest->swizzle[i] = src->swizzle[i];
526 }
527
528 bool
nir_alu_src_is_trivial_ssa(const nir_alu_instr * alu,unsigned srcn)529 nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn)
530 {
531 static uint8_t trivial_swizzle[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
532 STATIC_ASSERT(ARRAY_SIZE(trivial_swizzle) == NIR_MAX_VEC_COMPONENTS);
533
534 const nir_alu_src *src = &alu->src[srcn];
535 unsigned num_components = nir_ssa_alu_instr_src_components(alu, srcn);
536
537 return (src->src.ssa->num_components == num_components) &&
538 (memcmp(src->swizzle, trivial_swizzle, num_components) == 0);
539 }
540
541 static void
cf_init(nir_cf_node * node,nir_cf_node_type type)542 cf_init(nir_cf_node *node, nir_cf_node_type type)
543 {
544 exec_node_init(&node->node);
545 node->parent = NULL;
546 node->type = type;
547 }
548
549 nir_function_impl *
nir_function_impl_create_bare(nir_shader * shader)550 nir_function_impl_create_bare(nir_shader *shader)
551 {
552 nir_function_impl *impl = ralloc(shader, nir_function_impl);
553
554 impl->function = NULL;
555 impl->preamble = NULL;
556
557 cf_init(&impl->cf_node, nir_cf_node_function);
558
559 exec_list_make_empty(&impl->body);
560 exec_list_make_empty(&impl->locals);
561 impl->ssa_alloc = 0;
562 impl->num_blocks = 0;
563 impl->valid_metadata = nir_metadata_none;
564 impl->structured = true;
565
566 /* create start & end blocks */
567 nir_block *start_block = nir_block_create(shader);
568 nir_block *end_block = nir_block_create(shader);
569 start_block->cf_node.parent = &impl->cf_node;
570 end_block->cf_node.parent = &impl->cf_node;
571 impl->end_block = end_block;
572
573 exec_list_push_tail(&impl->body, &start_block->cf_node.node);
574
575 start_block->successors[0] = end_block;
576 _mesa_set_add(end_block->predecessors, start_block);
577 return impl;
578 }
579
580 nir_function_impl *
nir_function_impl_create(nir_function * function)581 nir_function_impl_create(nir_function *function)
582 {
583 assert(function->impl == NULL);
584
585 nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
586 nir_function_set_impl(function, impl);
587 return impl;
588 }
589
590 nir_block *
nir_block_create(nir_shader * shader)591 nir_block_create(nir_shader *shader)
592 {
593 nir_block *block = rzalloc(shader, nir_block);
594
595 cf_init(&block->cf_node, nir_cf_node_block);
596
597 block->successors[0] = block->successors[1] = NULL;
598 block->predecessors = _mesa_pointer_set_create(block);
599 block->imm_dom = NULL;
600 /* XXX maybe it would be worth it to defer allocation? This
601 * way it doesn't get allocated for shader refs that never run
602 * nir_calc_dominance? For example, state-tracker creates an
603 * initial IR, clones that, runs appropriate lowering pass, passes
604 * to driver which does common lowering/opt, and then stores ref
605 * which is later used to do state specific lowering and futher
606 * opt. Do any of the references not need dominance metadata?
607 */
608 block->dom_frontier = _mesa_pointer_set_create(block);
609
610 exec_list_make_empty(&block->instr_list);
611
612 return block;
613 }
614
615 static inline void
src_init(nir_src * src)616 src_init(nir_src *src)
617 {
618 src->ssa = NULL;
619 }
620
621 nir_if *
nir_if_create(nir_shader * shader)622 nir_if_create(nir_shader *shader)
623 {
624 nir_if *if_stmt = ralloc(shader, nir_if);
625
626 if_stmt->control = nir_selection_control_none;
627
628 cf_init(&if_stmt->cf_node, nir_cf_node_if);
629 src_init(&if_stmt->condition);
630
631 nir_block *then = nir_block_create(shader);
632 exec_list_make_empty(&if_stmt->then_list);
633 exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
634 then->cf_node.parent = &if_stmt->cf_node;
635
636 nir_block *else_stmt = nir_block_create(shader);
637 exec_list_make_empty(&if_stmt->else_list);
638 exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
639 else_stmt->cf_node.parent = &if_stmt->cf_node;
640
641 return if_stmt;
642 }
643
644 nir_loop *
nir_loop_create(nir_shader * shader)645 nir_loop_create(nir_shader *shader)
646 {
647 nir_loop *loop = rzalloc(shader, nir_loop);
648
649 cf_init(&loop->cf_node, nir_cf_node_loop);
650 /* Assume that loops are divergent until proven otherwise */
651 loop->divergent_break = true;
652 loop->divergent_continue = true;
653
654 nir_block *body = nir_block_create(shader);
655 exec_list_make_empty(&loop->body);
656 exec_list_push_tail(&loop->body, &body->cf_node.node);
657 body->cf_node.parent = &loop->cf_node;
658
659 body->successors[0] = body;
660 _mesa_set_add(body->predecessors, body);
661
662 exec_list_make_empty(&loop->continue_list);
663
664 return loop;
665 }
666
667 static void
instr_init(nir_instr * instr,nir_instr_type type)668 instr_init(nir_instr *instr, nir_instr_type type)
669 {
670 instr->type = type;
671 instr->block = NULL;
672 exec_node_init(&instr->node);
673 }
674
675 static void
alu_src_init(nir_alu_src * src)676 alu_src_init(nir_alu_src *src)
677 {
678 src_init(&src->src);
679 for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
680 src->swizzle[i] = i;
681 }
682
683 nir_alu_instr *
nir_alu_instr_create(nir_shader * shader,nir_op op)684 nir_alu_instr_create(nir_shader *shader, nir_op op)
685 {
686 unsigned num_srcs = nir_op_infos[op].num_inputs;
687 nir_alu_instr *instr = gc_zalloc_zla(shader->gctx, nir_alu_instr, nir_alu_src, num_srcs);
688
689 instr_init(&instr->instr, nir_instr_type_alu);
690 instr->op = op;
691 for (unsigned i = 0; i < num_srcs; i++)
692 alu_src_init(&instr->src[i]);
693
694 return instr;
695 }
696
697 nir_deref_instr *
nir_deref_instr_create(nir_shader * shader,nir_deref_type deref_type)698 nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
699 {
700 nir_deref_instr *instr = gc_zalloc(shader->gctx, nir_deref_instr, 1);
701
702 instr_init(&instr->instr, nir_instr_type_deref);
703
704 instr->deref_type = deref_type;
705 if (deref_type != nir_deref_type_var)
706 src_init(&instr->parent);
707
708 if (deref_type == nir_deref_type_array ||
709 deref_type == nir_deref_type_ptr_as_array)
710 src_init(&instr->arr.index);
711
712 return instr;
713 }
714
715 nir_jump_instr *
nir_jump_instr_create(nir_shader * shader,nir_jump_type type)716 nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
717 {
718 nir_jump_instr *instr = gc_alloc(shader->gctx, nir_jump_instr, 1);
719 instr_init(&instr->instr, nir_instr_type_jump);
720 src_init(&instr->condition);
721 instr->type = type;
722 instr->target = NULL;
723 instr->else_target = NULL;
724
725 return instr;
726 }
727
728 nir_load_const_instr *
nir_load_const_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)729 nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
730 unsigned bit_size)
731 {
732 nir_load_const_instr *instr =
733 gc_zalloc_zla(shader->gctx, nir_load_const_instr, nir_const_value, num_components);
734 instr_init(&instr->instr, nir_instr_type_load_const);
735
736 nir_def_init(&instr->instr, &instr->def, num_components, bit_size);
737
738 return instr;
739 }
740
741 nir_intrinsic_instr *
nir_intrinsic_instr_create(nir_shader * shader,nir_intrinsic_op op)742 nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
743 {
744 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
745 nir_intrinsic_instr *instr =
746 gc_zalloc_zla(shader->gctx, nir_intrinsic_instr, nir_src, num_srcs);
747
748 instr_init(&instr->instr, nir_instr_type_intrinsic);
749 instr->intrinsic = op;
750
751 for (unsigned i = 0; i < num_srcs; i++)
752 src_init(&instr->src[i]);
753
754 return instr;
755 }
756
757 nir_call_instr *
nir_call_instr_create(nir_shader * shader,nir_function * callee)758 nir_call_instr_create(nir_shader *shader, nir_function *callee)
759 {
760 const unsigned num_params = callee->num_params;
761 nir_call_instr *instr =
762 gc_zalloc_zla(shader->gctx, nir_call_instr, nir_src, num_params);
763
764 instr_init(&instr->instr, nir_instr_type_call);
765 instr->callee = callee;
766 instr->num_params = num_params;
767 for (unsigned i = 0; i < num_params; i++)
768 src_init(&instr->params[i]);
769
770 return instr;
771 }
772
773 static int8_t default_tg4_offsets[4][2] = {
774 { 0, 1 },
775 { 1, 1 },
776 { 1, 0 },
777 { 0, 0 },
778 };
779
780 nir_tex_instr *
nir_tex_instr_create(nir_shader * shader,unsigned num_srcs)781 nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
782 {
783 nir_tex_instr *instr = gc_zalloc(shader->gctx, nir_tex_instr, 1);
784 instr_init(&instr->instr, nir_instr_type_tex);
785
786 instr->num_srcs = num_srcs;
787 instr->src = gc_alloc(shader->gctx, nir_tex_src, num_srcs);
788 for (unsigned i = 0; i < num_srcs; i++)
789 src_init(&instr->src[i].src);
790
791 instr->texture_index = 0;
792 instr->sampler_index = 0;
793 memcpy(instr->tg4_offsets, default_tg4_offsets, sizeof(instr->tg4_offsets));
794
795 return instr;
796 }
797
798 void
nir_tex_instr_add_src(nir_tex_instr * tex,nir_tex_src_type src_type,nir_def * src)799 nir_tex_instr_add_src(nir_tex_instr *tex,
800 nir_tex_src_type src_type,
801 nir_def *src)
802 {
803 nir_tex_src *new_srcs = gc_zalloc(gc_get_context(tex), nir_tex_src, tex->num_srcs + 1);
804
805 for (unsigned i = 0; i < tex->num_srcs; i++) {
806 new_srcs[i].src_type = tex->src[i].src_type;
807 nir_instr_move_src(&tex->instr, &new_srcs[i].src,
808 &tex->src[i].src);
809 }
810
811 gc_free(tex->src);
812 tex->src = new_srcs;
813
814 tex->src[tex->num_srcs].src_type = src_type;
815 nir_instr_init_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
816 tex->num_srcs++;
817 }
818
819 void
nir_tex_instr_remove_src(nir_tex_instr * tex,unsigned src_idx)820 nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
821 {
822 assert(src_idx < tex->num_srcs);
823
824 /* First rewrite the source to NIR_SRC_INIT */
825 nir_instr_clear_src(&tex->instr, &tex->src[src_idx].src);
826
827 /* Now, move all of the other sources down */
828 for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
829 tex->src[i - 1].src_type = tex->src[i].src_type;
830 nir_instr_move_src(&tex->instr, &tex->src[i - 1].src, &tex->src[i].src);
831 }
832 tex->num_srcs--;
833 }
834
835 bool
nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr * tex)836 nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex)
837 {
838 if (tex->op != nir_texop_tg4)
839 return false;
840 return memcmp(tex->tg4_offsets, default_tg4_offsets,
841 sizeof(tex->tg4_offsets)) != 0;
842 }
843
844 nir_phi_instr *
nir_phi_instr_create(nir_shader * shader)845 nir_phi_instr_create(nir_shader *shader)
846 {
847 nir_phi_instr *instr = gc_alloc(shader->gctx, nir_phi_instr, 1);
848 instr_init(&instr->instr, nir_instr_type_phi);
849
850 exec_list_make_empty(&instr->srcs);
851
852 return instr;
853 }
854
855 /**
856 * Adds a new source to a NIR instruction.
857 *
858 * Note that this does not update the def/use relationship for src, assuming
859 * that the instr is not in the shader. If it is, you have to do:
860 *
861 * list_addtail(&phi_src->src.use_link, &src.ssa->uses);
862 */
863 nir_phi_src *
nir_phi_instr_add_src(nir_phi_instr * instr,nir_block * pred,nir_def * src)864 nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_def *src)
865 {
866 nir_phi_src *phi_src;
867
868 phi_src = gc_zalloc(gc_get_context(instr), nir_phi_src, 1);
869 phi_src->pred = pred;
870 phi_src->src = nir_src_for_ssa(src);
871 nir_src_set_parent_instr(&phi_src->src, &instr->instr);
872 exec_list_push_tail(&instr->srcs, &phi_src->node);
873
874 return phi_src;
875 }
876
877 nir_parallel_copy_instr *
nir_parallel_copy_instr_create(nir_shader * shader)878 nir_parallel_copy_instr_create(nir_shader *shader)
879 {
880 nir_parallel_copy_instr *instr = gc_alloc(shader->gctx, nir_parallel_copy_instr, 1);
881 instr_init(&instr->instr, nir_instr_type_parallel_copy);
882
883 exec_list_make_empty(&instr->entries);
884
885 return instr;
886 }
887
888 nir_debug_info_instr *
nir_debug_info_instr_create(nir_shader * shader,nir_debug_info_type type,uint32_t string_length)889 nir_debug_info_instr_create(nir_shader *shader, nir_debug_info_type type,
890 uint32_t string_length)
891 {
892 uint32_t additional_size = 0;
893 if (type == nir_debug_info_string)
894 additional_size = string_length + 1;
895
896 nir_debug_info_instr *instr = gc_zalloc_size(
897 shader->gctx, sizeof(nir_debug_info_instr) + additional_size, 1);
898 instr_init(&instr->instr, nir_instr_type_debug_info);
899
900 instr->type = type;
901
902 if (type == nir_debug_info_string)
903 instr->string_length = string_length;
904
905 return instr;
906 }
907
908 nir_undef_instr *
nir_undef_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)909 nir_undef_instr_create(nir_shader *shader,
910 unsigned num_components,
911 unsigned bit_size)
912 {
913 nir_undef_instr *instr = gc_alloc(shader->gctx, nir_undef_instr, 1);
914 instr_init(&instr->instr, nir_instr_type_undef);
915
916 nir_def_init(&instr->instr, &instr->def, num_components, bit_size);
917
918 return instr;
919 }
920
921 static nir_const_value
const_value_float(double d,unsigned bit_size)922 const_value_float(double d, unsigned bit_size)
923 {
924 nir_const_value v;
925 memset(&v, 0, sizeof(v));
926
927 /* clang-format off */
928 switch (bit_size) {
929 case 16: v.u16 = _mesa_float_to_half(d); break;
930 case 32: v.f32 = d; break;
931 case 64: v.f64 = d; break;
932 default:
933 unreachable("Invalid bit size");
934 }
935 /* clang-format on */
936
937 return v;
938 }
939
940 static nir_const_value
const_value_int(int64_t i,unsigned bit_size)941 const_value_int(int64_t i, unsigned bit_size)
942 {
943 nir_const_value v;
944 memset(&v, 0, sizeof(v));
945
946 /* clang-format off */
947 switch (bit_size) {
948 case 1: v.b = i & 1; break;
949 case 8: v.i8 = i; break;
950 case 16: v.i16 = i; break;
951 case 32: v.i32 = i; break;
952 case 64: v.i64 = i; break;
953 default:
954 unreachable("Invalid bit size");
955 }
956 /* clang-format on */
957
958 return v;
959 }
960
961 nir_const_value
nir_alu_binop_identity(nir_op binop,unsigned bit_size)962 nir_alu_binop_identity(nir_op binop, unsigned bit_size)
963 {
964 const int64_t max_int = (1ull << (bit_size - 1)) - 1;
965 const int64_t min_int = -max_int - 1;
966 switch (binop) {
967 case nir_op_iadd:
968 return const_value_int(0, bit_size);
969 case nir_op_fadd:
970 return const_value_float(0, bit_size);
971 case nir_op_imul:
972 return const_value_int(1, bit_size);
973 case nir_op_fmul:
974 return const_value_float(1, bit_size);
975 case nir_op_imin:
976 return const_value_int(max_int, bit_size);
977 case nir_op_umin:
978 return const_value_int(~0ull, bit_size);
979 case nir_op_fmin:
980 return const_value_float(INFINITY, bit_size);
981 case nir_op_imax:
982 return const_value_int(min_int, bit_size);
983 case nir_op_umax:
984 return const_value_int(0, bit_size);
985 case nir_op_fmax:
986 return const_value_float(-INFINITY, bit_size);
987 case nir_op_iand:
988 return const_value_int(~0ull, bit_size);
989 case nir_op_ior:
990 return const_value_int(0, bit_size);
991 case nir_op_ixor:
992 return const_value_int(0, bit_size);
993 default:
994 unreachable("Invalid reduction operation");
995 }
996 }
997
998 nir_function_impl *
nir_cf_node_get_function(nir_cf_node * node)999 nir_cf_node_get_function(nir_cf_node *node)
1000 {
1001 while (node->type != nir_cf_node_function) {
1002 node = node->parent;
1003 }
1004
1005 return nir_cf_node_as_function(node);
1006 }
1007
1008 /* Reduces a cursor by trying to convert everything to after and trying to
1009 * go up to block granularity when possible.
1010 */
1011 static nir_cursor
reduce_cursor(nir_cursor cursor)1012 reduce_cursor(nir_cursor cursor)
1013 {
1014 switch (cursor.option) {
1015 case nir_cursor_before_block:
1016 if (exec_list_is_empty(&cursor.block->instr_list)) {
1017 /* Empty block. After is as good as before. */
1018 cursor.option = nir_cursor_after_block;
1019 }
1020 return cursor;
1021
1022 case nir_cursor_after_block:
1023 return cursor;
1024
1025 case nir_cursor_before_instr: {
1026 nir_instr *prev_instr = nir_instr_prev(cursor.instr);
1027 if (prev_instr) {
1028 /* Before this instruction is after the previous */
1029 cursor.instr = prev_instr;
1030 cursor.option = nir_cursor_after_instr;
1031 } else {
1032 /* No previous instruction. Switch to before block */
1033 cursor.block = cursor.instr->block;
1034 cursor.option = nir_cursor_before_block;
1035 }
1036 return reduce_cursor(cursor);
1037 }
1038
1039 case nir_cursor_after_instr:
1040 if (nir_instr_next(cursor.instr) == NULL) {
1041 /* This is the last instruction, switch to after block */
1042 cursor.option = nir_cursor_after_block;
1043 cursor.block = cursor.instr->block;
1044 }
1045 return cursor;
1046
1047 default:
1048 unreachable("Inavlid cursor option");
1049 }
1050 }
1051
1052 bool
nir_cursors_equal(nir_cursor a,nir_cursor b)1053 nir_cursors_equal(nir_cursor a, nir_cursor b)
1054 {
1055 /* Reduced cursors should be unique */
1056 a = reduce_cursor(a);
1057 b = reduce_cursor(b);
1058
1059 return a.block == b.block && a.option == b.option;
1060 }
1061
1062 static bool
add_use_cb(nir_src * src,void * state)1063 add_use_cb(nir_src *src, void *state)
1064 {
1065 nir_instr *instr = state;
1066
1067 nir_src_set_parent_instr(src, instr);
1068 list_addtail(&src->use_link, &src->ssa->uses);
1069
1070 return true;
1071 }
1072
1073 static bool
add_ssa_def_cb(nir_def * def,void * state)1074 add_ssa_def_cb(nir_def *def, void *state)
1075 {
1076 nir_instr *instr = state;
1077
1078 if (instr->block && def->index == UINT_MAX) {
1079 nir_function_impl *impl =
1080 nir_cf_node_get_function(&instr->block->cf_node);
1081
1082 def->index = impl->ssa_alloc++;
1083
1084 impl->valid_metadata &= ~nir_metadata_live_defs;
1085 }
1086
1087 return true;
1088 }
1089
1090 static void
add_defs_uses(nir_instr * instr)1091 add_defs_uses(nir_instr *instr)
1092 {
1093 nir_foreach_src(instr, add_use_cb, instr);
1094 nir_foreach_def(instr, add_ssa_def_cb, instr);
1095 }
1096
1097 void
nir_instr_insert(nir_cursor cursor,nir_instr * instr)1098 nir_instr_insert(nir_cursor cursor, nir_instr *instr)
1099 {
1100 switch (cursor.option) {
1101 case nir_cursor_before_block:
1102 /* Only allow inserting jumps into empty blocks. */
1103 if (instr->type == nir_instr_type_jump)
1104 assert(exec_list_is_empty(&cursor.block->instr_list));
1105
1106 instr->block = cursor.block;
1107 add_defs_uses(instr);
1108 exec_list_push_head(&cursor.block->instr_list, &instr->node);
1109 break;
1110 case nir_cursor_after_block: {
1111 /* Inserting instructions after a jump is illegal. */
1112 nir_instr *last = nir_block_last_instr(cursor.block);
1113 assert(last == NULL || last->type != nir_instr_type_jump);
1114 (void)last;
1115
1116 instr->block = cursor.block;
1117 add_defs_uses(instr);
1118 exec_list_push_tail(&cursor.block->instr_list, &instr->node);
1119 break;
1120 }
1121 case nir_cursor_before_instr:
1122 assert(instr->type != nir_instr_type_jump);
1123 instr->block = cursor.instr->block;
1124 add_defs_uses(instr);
1125 exec_node_insert_node_before(&cursor.instr->node, &instr->node);
1126 break;
1127 case nir_cursor_after_instr:
1128 /* Inserting instructions after a jump is illegal. */
1129 assert(cursor.instr->type != nir_instr_type_jump);
1130
1131 /* Only allow inserting jumps at the end of the block. */
1132 if (instr->type == nir_instr_type_jump)
1133 assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
1134
1135 instr->block = cursor.instr->block;
1136 add_defs_uses(instr);
1137 exec_node_insert_after(&cursor.instr->node, &instr->node);
1138 break;
1139 }
1140
1141 if (instr->type == nir_instr_type_jump)
1142 nir_handle_add_jump(instr->block);
1143
1144 nir_function_impl *impl = nir_cf_node_get_function(&instr->block->cf_node);
1145 impl->valid_metadata &= ~nir_metadata_instr_index;
1146 }
1147
1148 bool
nir_instr_move(nir_cursor cursor,nir_instr * instr)1149 nir_instr_move(nir_cursor cursor, nir_instr *instr)
1150 {
1151 /* If the cursor happens to refer to this instruction (either before or
1152 * after), don't do anything.
1153 */
1154 switch (cursor.option) {
1155 case nir_cursor_before_instr:
1156 if (cursor.instr == instr || nir_instr_prev(cursor.instr) == instr)
1157 return false;
1158 break;
1159 case nir_cursor_after_instr:
1160 if (cursor.instr == instr || nir_instr_next(cursor.instr) == instr)
1161 return false;
1162 break;
1163 case nir_cursor_before_block:
1164 if (cursor.block == instr->block && nir_instr_is_first(instr))
1165 return false;
1166 break;
1167 case nir_cursor_after_block:
1168 if (cursor.block == instr->block && nir_instr_is_last(instr))
1169 return false;
1170 break;
1171 }
1172
1173 nir_instr_remove(instr);
1174 nir_instr_insert(cursor, instr);
1175 return true;
1176 }
1177
1178 static bool
src_is_valid(const nir_src * src)1179 src_is_valid(const nir_src *src)
1180 {
1181 return (src->ssa != NULL);
1182 }
1183
1184 static bool
remove_use_cb(nir_src * src,void * state)1185 remove_use_cb(nir_src *src, void *state)
1186 {
1187 (void)state;
1188
1189 if (src_is_valid(src))
1190 list_del(&src->use_link);
1191
1192 return true;
1193 }
1194
1195 static void
remove_defs_uses(nir_instr * instr)1196 remove_defs_uses(nir_instr *instr)
1197 {
1198 nir_foreach_src(instr, remove_use_cb, instr);
1199 }
1200
1201 void
nir_instr_remove_v(nir_instr * instr)1202 nir_instr_remove_v(nir_instr *instr)
1203 {
1204 remove_defs_uses(instr);
1205 exec_node_remove(&instr->node);
1206
1207 if (instr->type == nir_instr_type_jump) {
1208 nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
1209 nir_handle_remove_jump(instr->block, jump_instr->type);
1210 }
1211 }
1212
1213 void
nir_instr_free(nir_instr * instr)1214 nir_instr_free(nir_instr *instr)
1215 {
1216 switch (instr->type) {
1217 case nir_instr_type_tex:
1218 gc_free(nir_instr_as_tex(instr)->src);
1219 break;
1220
1221 case nir_instr_type_phi: {
1222 nir_phi_instr *phi = nir_instr_as_phi(instr);
1223 nir_foreach_phi_src_safe(phi_src, phi)
1224 gc_free(phi_src);
1225 break;
1226 }
1227
1228 default:
1229 break;
1230 }
1231
1232 gc_free(instr);
1233 }
1234
1235 void
nir_instr_free_list(struct exec_list * list)1236 nir_instr_free_list(struct exec_list *list)
1237 {
1238 struct exec_node *node;
1239 while ((node = exec_list_pop_head(list))) {
1240 nir_instr *removed_instr = exec_node_data(nir_instr, node, node);
1241 nir_instr_free(removed_instr);
1242 }
1243 }
1244
1245 static bool
nir_instr_free_and_dce_live_cb(nir_def * def,void * state)1246 nir_instr_free_and_dce_live_cb(nir_def *def, void *state)
1247 {
1248 bool *live = state;
1249
1250 if (!nir_def_is_unused(def)) {
1251 *live = true;
1252 return false;
1253 } else {
1254 return true;
1255 }
1256 }
1257
1258 static bool
nir_instr_free_and_dce_is_live(nir_instr * instr)1259 nir_instr_free_and_dce_is_live(nir_instr *instr)
1260 {
1261 /* Note: don't have to worry about jumps because they don't have dests to
1262 * become unused.
1263 */
1264 if (instr->type == nir_instr_type_intrinsic) {
1265 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1266 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
1267 if (!(info->flags & NIR_INTRINSIC_CAN_ELIMINATE))
1268 return true;
1269 }
1270
1271 bool live = false;
1272 nir_foreach_def(instr, nir_instr_free_and_dce_live_cb, &live);
1273 return live;
1274 }
1275
1276 static bool
nir_instr_dce_add_dead_srcs_cb(nir_src * src,void * state)1277 nir_instr_dce_add_dead_srcs_cb(nir_src *src, void *state)
1278 {
1279 nir_instr_worklist *wl = state;
1280
1281 list_del(&src->use_link);
1282 if (!nir_instr_free_and_dce_is_live(src->ssa->parent_instr))
1283 nir_instr_worklist_push_tail(wl, src->ssa->parent_instr);
1284
1285 /* Stop nir_instr_remove from trying to delete the link again. */
1286 src->ssa = NULL;
1287
1288 return true;
1289 }
1290
1291 static void
nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist * wl,nir_instr * instr)1292 nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist *wl, nir_instr *instr)
1293 {
1294 nir_foreach_src(instr, nir_instr_dce_add_dead_srcs_cb, wl);
1295 }
1296
1297 /**
1298 * Frees an instruction and any SSA defs that it used that are now dead,
1299 * returning a nir_cursor where the instruction previously was.
1300 */
1301 nir_cursor
nir_instr_free_and_dce(nir_instr * instr)1302 nir_instr_free_and_dce(nir_instr *instr)
1303 {
1304 nir_instr_worklist *worklist = nir_instr_worklist_create();
1305
1306 nir_instr_dce_add_dead_ssa_srcs(worklist, instr);
1307 nir_cursor c = nir_instr_remove(instr);
1308
1309 struct exec_list to_free;
1310 exec_list_make_empty(&to_free);
1311
1312 nir_instr *dce_instr;
1313 while ((dce_instr = nir_instr_worklist_pop_head(worklist))) {
1314 nir_instr_dce_add_dead_ssa_srcs(worklist, dce_instr);
1315
1316 /* If we're removing the instr where our cursor is, then we have to
1317 * point the cursor elsewhere.
1318 */
1319 if ((c.option == nir_cursor_before_instr ||
1320 c.option == nir_cursor_after_instr) &&
1321 c.instr == dce_instr)
1322 c = nir_instr_remove(dce_instr);
1323 else
1324 nir_instr_remove(dce_instr);
1325 exec_list_push_tail(&to_free, &dce_instr->node);
1326 }
1327
1328 nir_instr_free_list(&to_free);
1329
1330 nir_instr_worklist_destroy(worklist);
1331
1332 return c;
1333 }
1334
1335 /*@}*/
1336
1337 nir_def *
nir_instr_def(nir_instr * instr)1338 nir_instr_def(nir_instr *instr)
1339 {
1340 switch (instr->type) {
1341 case nir_instr_type_alu:
1342 return &nir_instr_as_alu(instr)->def;
1343
1344 case nir_instr_type_deref:
1345 return &nir_instr_as_deref(instr)->def;
1346
1347 case nir_instr_type_tex:
1348 return &nir_instr_as_tex(instr)->def;
1349
1350 case nir_instr_type_intrinsic: {
1351 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1352 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
1353 return &intrin->def;
1354 } else {
1355 return NULL;
1356 }
1357 }
1358
1359 case nir_instr_type_phi:
1360 return &nir_instr_as_phi(instr)->def;
1361
1362 case nir_instr_type_parallel_copy:
1363 unreachable("Parallel copies are unsupported by this function");
1364
1365 case nir_instr_type_load_const:
1366 return &nir_instr_as_load_const(instr)->def;
1367
1368 case nir_instr_type_undef:
1369 return &nir_instr_as_undef(instr)->def;
1370
1371 case nir_instr_type_debug_info:
1372 return &nir_instr_as_debug_info(instr)->def;
1373
1374 case nir_instr_type_call:
1375 case nir_instr_type_jump:
1376 return NULL;
1377 }
1378
1379 unreachable("Invalid instruction type");
1380 }
1381
1382 bool
nir_foreach_phi_src_leaving_block(nir_block * block,nir_foreach_src_cb cb,void * state)1383 nir_foreach_phi_src_leaving_block(nir_block *block,
1384 nir_foreach_src_cb cb,
1385 void *state)
1386 {
1387 for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
1388 if (block->successors[i] == NULL)
1389 continue;
1390
1391 nir_foreach_phi(phi, block->successors[i]) {
1392 nir_foreach_phi_src(phi_src, phi) {
1393 if (phi_src->pred == block) {
1394 if (!cb(&phi_src->src, state))
1395 return false;
1396 }
1397 }
1398 }
1399 }
1400
1401 return true;
1402 }
1403
1404 nir_const_value
nir_const_value_for_float(double f,unsigned bit_size)1405 nir_const_value_for_float(double f, unsigned bit_size)
1406 {
1407 nir_const_value v;
1408 memset(&v, 0, sizeof(v));
1409
1410 /* clang-format off */
1411 switch (bit_size) {
1412 case 16: v.u16 = _mesa_float_to_half(f); break;
1413 case 32: v.f32 = f; break;
1414 case 64: v.f64 = f; break;
1415 default: unreachable("Invalid bit size");
1416 }
1417 /* clang-format on */
1418
1419 return v;
1420 }
1421
1422 double
nir_const_value_as_float(nir_const_value value,unsigned bit_size)1423 nir_const_value_as_float(nir_const_value value, unsigned bit_size)
1424 {
1425 /* clang-format off */
1426 switch (bit_size) {
1427 case 16: return _mesa_half_to_float(value.u16);
1428 case 32: return value.f32;
1429 case 64: return value.f64;
1430 default: unreachable("Invalid bit size");
1431 }
1432 /* clang-format on */
1433 }
1434
1435 nir_const_value *
nir_src_as_const_value(nir_src src)1436 nir_src_as_const_value(nir_src src)
1437 {
1438 if (src.ssa->parent_instr->type != nir_instr_type_load_const)
1439 return NULL;
1440
1441 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1442
1443 return load->value;
1444 }
1445
1446 const char *
nir_src_as_string(nir_src src)1447 nir_src_as_string(nir_src src)
1448 {
1449 nir_debug_info_instr *di = nir_src_as_debug_info(src);
1450 if (di && di->type == nir_debug_info_string)
1451 return di->string;
1452
1453 return NULL;
1454 }
1455
1456 /**
1457 * Returns true if the source is known to be always uniform. Otherwise it
1458 * returns false which means it may or may not be uniform but it can't be
1459 * determined.
1460 *
1461 * For a more precise analysis of uniform values, use nir_divergence_analysis.
1462 */
1463 bool
nir_src_is_always_uniform(nir_src src)1464 nir_src_is_always_uniform(nir_src src)
1465 {
1466 /* Constants are trivially uniform */
1467 if (src.ssa->parent_instr->type == nir_instr_type_load_const)
1468 return true;
1469
1470 if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
1471 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
1472 /* As are uniform variables */
1473 if (intr->intrinsic == nir_intrinsic_load_uniform &&
1474 nir_src_is_always_uniform(intr->src[0]))
1475 return true;
1476 /* From the Vulkan specification 15.6.1. Push Constant Interface:
1477 * "Any member of a push constant block that is declared as an array must
1478 * only be accessed with dynamically uniform indices."
1479 */
1480 if (intr->intrinsic == nir_intrinsic_load_push_constant)
1481 return true;
1482 if (intr->intrinsic == nir_intrinsic_load_deref &&
1483 nir_deref_mode_is(nir_src_as_deref(intr->src[0]), nir_var_mem_push_const))
1484 return true;
1485 }
1486
1487 /* Operating together uniform expressions produces a uniform result */
1488 if (src.ssa->parent_instr->type == nir_instr_type_alu) {
1489 nir_alu_instr *alu = nir_instr_as_alu(src.ssa->parent_instr);
1490 for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
1491 if (!nir_src_is_always_uniform(alu->src[i].src))
1492 return false;
1493 }
1494
1495 return true;
1496 }
1497
1498 /* XXX: this could have many more tests, such as when a sampler function is
1499 * called with uniform arguments.
1500 */
1501 return false;
1502 }
1503
1504 nir_block *
nir_src_get_block(nir_src * src)1505 nir_src_get_block(nir_src *src)
1506 {
1507 if (nir_src_is_if(src))
1508 return nir_cf_node_cf_tree_prev(&nir_src_parent_if(src)->cf_node);
1509 else if (nir_src_parent_instr(src)->type == nir_instr_type_phi)
1510 return list_entry(src, nir_phi_src, src)->pred;
1511 else
1512 return nir_src_parent_instr(src)->block;
1513 }
1514
1515 static void
src_remove_all_uses(nir_src * src)1516 src_remove_all_uses(nir_src *src)
1517 {
1518 if (src && src_is_valid(src))
1519 list_del(&src->use_link);
1520 }
1521
1522 static void
src_add_all_uses(nir_src * src,nir_instr * parent_instr,nir_if * parent_if)1523 src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
1524 {
1525 if (!src)
1526 return;
1527
1528 if (!src_is_valid(src))
1529 return;
1530
1531 if (parent_instr) {
1532 nir_src_set_parent_instr(src, parent_instr);
1533 } else {
1534 assert(parent_if);
1535 nir_src_set_parent_if(src, parent_if);
1536 }
1537
1538 list_addtail(&src->use_link, &src->ssa->uses);
1539 }
1540
1541 void
nir_instr_init_src(nir_instr * instr,nir_src * src,nir_def * def)1542 nir_instr_init_src(nir_instr *instr, nir_src *src, nir_def *def)
1543 {
1544 *src = nir_src_for_ssa(def);
1545 src_add_all_uses(src, instr, NULL);
1546 }
1547
1548 void
nir_instr_clear_src(nir_instr * instr,nir_src * src)1549 nir_instr_clear_src(nir_instr *instr, nir_src *src)
1550 {
1551 src_remove_all_uses(src);
1552 *src = NIR_SRC_INIT;
1553 }
1554
1555 void
nir_instr_move_src(nir_instr * dest_instr,nir_src * dest,nir_src * src)1556 nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
1557 {
1558 assert(!src_is_valid(dest) || nir_src_parent_instr(dest) == dest_instr);
1559
1560 src_remove_all_uses(dest);
1561 src_remove_all_uses(src);
1562 *dest = *src;
1563 *src = NIR_SRC_INIT;
1564 src_add_all_uses(dest, dest_instr, NULL);
1565 }
1566
1567 void
nir_def_init(nir_instr * instr,nir_def * def,unsigned num_components,unsigned bit_size)1568 nir_def_init(nir_instr *instr, nir_def *def,
1569 unsigned num_components,
1570 unsigned bit_size)
1571 {
1572 def->parent_instr = instr;
1573 list_inithead(&def->uses);
1574 def->num_components = num_components;
1575 def->bit_size = bit_size;
1576 def->divergent = true; /* This is the safer default */
1577 def->loop_invariant = false;
1578
1579 if (instr->block) {
1580 nir_function_impl *impl =
1581 nir_cf_node_get_function(&instr->block->cf_node);
1582
1583 def->index = impl->ssa_alloc++;
1584
1585 impl->valid_metadata &= ~nir_metadata_live_defs;
1586 } else {
1587 def->index = UINT_MAX;
1588 }
1589 }
1590
1591 void
nir_def_rewrite_uses(nir_def * def,nir_def * new_ssa)1592 nir_def_rewrite_uses(nir_def *def, nir_def *new_ssa)
1593 {
1594 assert(def != new_ssa);
1595 nir_foreach_use_including_if_safe(use_src, def) {
1596 nir_src_rewrite(use_src, new_ssa);
1597 }
1598 }
1599
1600 void
nir_def_rewrite_uses_src(nir_def * def,nir_src new_src)1601 nir_def_rewrite_uses_src(nir_def *def, nir_src new_src)
1602 {
1603 nir_def_rewrite_uses(def, new_src.ssa);
1604 }
1605
1606 static bool
is_instr_between(nir_instr * start,nir_instr * end,nir_instr * between)1607 is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
1608 {
1609 assert(start->block == end->block);
1610
1611 if (between->block != start->block)
1612 return false;
1613
1614 /* Search backwards looking for "between" */
1615 while (start != end) {
1616 if (between == end)
1617 return true;
1618
1619 end = nir_instr_prev(end);
1620 assert(end);
1621 }
1622
1623 return false;
1624 }
1625
1626 /* Replaces all uses of the given SSA def with the given source but only if
1627 * the use comes after the after_me instruction. This can be useful if you
1628 * are emitting code to fix up the result of some instruction: you can freely
1629 * use the result in that code and then call rewrite_uses_after and pass the
1630 * last fixup instruction as after_me and it will replace all of the uses you
1631 * want without touching the fixup code.
1632 *
1633 * This function assumes that after_me is in the same block as
1634 * def->parent_instr and that after_me comes after def->parent_instr.
1635 */
1636 void
nir_def_rewrite_uses_after(nir_def * def,nir_def * new_ssa,nir_instr * after_me)1637 nir_def_rewrite_uses_after(nir_def *def, nir_def *new_ssa,
1638 nir_instr *after_me)
1639 {
1640 if (def == new_ssa)
1641 return;
1642
1643 nir_foreach_use_including_if_safe(use_src, def) {
1644 if (!nir_src_is_if(use_src)) {
1645 assert(nir_src_parent_instr(use_src) != def->parent_instr);
1646
1647 /* Since def already dominates all of its uses, the only way a use can
1648 * not be dominated by after_me is if it is between def and after_me in
1649 * the instruction list.
1650 */
1651 if (is_instr_between(def->parent_instr, after_me, nir_src_parent_instr(use_src)))
1652 continue;
1653 }
1654
1655 nir_src_rewrite(use_src, new_ssa);
1656 }
1657 }
1658
1659 static nir_def *
get_store_value(nir_intrinsic_instr * intrin)1660 get_store_value(nir_intrinsic_instr *intrin)
1661 {
1662 assert(nir_intrinsic_has_write_mask(intrin));
1663 /* deref stores have the deref in src[0] and the store value in src[1] */
1664 if (intrin->intrinsic == nir_intrinsic_store_deref ||
1665 intrin->intrinsic == nir_intrinsic_store_deref_block_intel)
1666 return intrin->src[1].ssa;
1667
1668 /* all other stores have the store value in src[0] */
1669 return intrin->src[0].ssa;
1670 }
1671
1672 nir_component_mask_t
nir_src_components_read(const nir_src * src)1673 nir_src_components_read(const nir_src *src)
1674 {
1675 assert(nir_src_parent_instr(src));
1676
1677 if (nir_src_parent_instr(src)->type == nir_instr_type_alu) {
1678 nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(src));
1679 nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src);
1680 int src_idx = alu_src - &alu->src[0];
1681 assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
1682 return nir_alu_instr_src_read_mask(alu, src_idx);
1683 } else if (nir_src_parent_instr(src)->type == nir_instr_type_intrinsic) {
1684 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(nir_src_parent_instr(src));
1685 if (nir_intrinsic_has_write_mask(intrin) && src->ssa == get_store_value(intrin))
1686 return nir_intrinsic_write_mask(intrin);
1687 else
1688 return (1 << src->ssa->num_components) - 1;
1689 } else {
1690 return (1 << src->ssa->num_components) - 1;
1691 }
1692 }
1693
1694 nir_component_mask_t
nir_def_components_read(const nir_def * def)1695 nir_def_components_read(const nir_def *def)
1696 {
1697 nir_component_mask_t read_mask = 0;
1698
1699 nir_foreach_use_including_if(use, def) {
1700 read_mask |= nir_src_is_if(use) ? 1 : nir_src_components_read(use);
1701
1702 if (read_mask == (1 << def->num_components) - 1)
1703 return read_mask;
1704 }
1705
1706 return read_mask;
1707 }
1708
1709 bool
nir_def_all_uses_are_fsat(const nir_def * def)1710 nir_def_all_uses_are_fsat(const nir_def *def)
1711 {
1712 nir_foreach_use(src, def) {
1713 if (nir_src_is_if(src))
1714 return false;
1715
1716 nir_instr *use = nir_src_parent_instr(src);
1717 if (use->type != nir_instr_type_alu)
1718 return false;
1719
1720 nir_alu_instr *alu = nir_instr_as_alu(use);
1721 if (alu->op != nir_op_fsat)
1722 return false;
1723 }
1724
1725 return true;
1726 }
1727
1728 bool
nir_def_all_uses_ignore_sign_bit(const nir_def * def)1729 nir_def_all_uses_ignore_sign_bit(const nir_def *def)
1730 {
1731 nir_foreach_use(use, def) {
1732 if (nir_src_is_if(use))
1733 return false;
1734 nir_instr *instr = nir_src_parent_instr(use);
1735
1736 if (instr->type != nir_instr_type_alu)
1737 return false;
1738
1739 nir_alu_instr *alu = nir_instr_as_alu(instr);
1740 if (alu->op == nir_op_fabs) {
1741 continue;
1742 } else if (alu->op == nir_op_fmul || alu->op == nir_op_ffma) {
1743 nir_alu_src *alu_src = list_entry(use, nir_alu_src, src);
1744 unsigned src_index = alu_src - alu->src;
1745 /* a * a doesn't care about sign of a. */
1746 if (src_index < 2 && nir_alu_srcs_equal(alu, alu, 0, 1))
1747 continue;
1748 }
1749
1750 return false;
1751 }
1752 return true;
1753 }
1754
1755 nir_block *
nir_block_unstructured_next(nir_block * block)1756 nir_block_unstructured_next(nir_block *block)
1757 {
1758 if (block == NULL) {
1759 /* nir_foreach_block_unstructured_safe() will call this function on a
1760 * NULL block after the last iteration, but it won't use the result so
1761 * just return NULL here.
1762 */
1763 return NULL;
1764 }
1765
1766 nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1767 if (cf_next == NULL && block->cf_node.parent->type == nir_cf_node_function)
1768 return NULL;
1769
1770 if (cf_next && cf_next->type == nir_cf_node_block)
1771 return nir_cf_node_as_block(cf_next);
1772
1773 return nir_block_cf_tree_next(block);
1774 }
1775
1776 nir_block *
nir_unstructured_start_block(nir_function_impl * impl)1777 nir_unstructured_start_block(nir_function_impl *impl)
1778 {
1779 return nir_start_block(impl);
1780 }
1781
1782 nir_block *
nir_block_cf_tree_next(nir_block * block)1783 nir_block_cf_tree_next(nir_block *block)
1784 {
1785 if (block == NULL) {
1786 /* nir_foreach_block_safe() will call this function on a NULL block
1787 * after the last iteration, but it won't use the result so just return
1788 * NULL here.
1789 */
1790 return NULL;
1791 }
1792
1793 assert(nir_cf_node_get_function(&block->cf_node)->structured);
1794
1795 nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1796 if (cf_next)
1797 return nir_cf_node_cf_tree_first(cf_next);
1798
1799 nir_cf_node *parent = block->cf_node.parent;
1800 if (parent->type == nir_cf_node_function)
1801 return NULL;
1802
1803 /* Is this the last block of a cf_node? Return the following block */
1804 if (block == nir_cf_node_cf_tree_last(parent))
1805 return nir_cf_node_as_block(nir_cf_node_next(parent));
1806
1807 switch (parent->type) {
1808 case nir_cf_node_if: {
1809 /* We are at the end of the if. Go to the beginning of the else */
1810 nir_if *if_stmt = nir_cf_node_as_if(parent);
1811 assert(block == nir_if_last_then_block(if_stmt));
1812 return nir_if_first_else_block(if_stmt);
1813 }
1814
1815 case nir_cf_node_loop: {
1816 /* We are at the end of the body and there is a continue construct */
1817 nir_loop *loop = nir_cf_node_as_loop(parent);
1818 assert(block == nir_loop_last_block(loop) &&
1819 nir_loop_has_continue_construct(loop));
1820 return nir_loop_first_continue_block(loop);
1821 }
1822
1823 default:
1824 unreachable("unknown cf node type");
1825 }
1826 }
1827
1828 nir_block *
nir_block_cf_tree_prev(nir_block * block)1829 nir_block_cf_tree_prev(nir_block *block)
1830 {
1831 if (block == NULL) {
1832 /* do this for consistency with nir_block_cf_tree_next() */
1833 return NULL;
1834 }
1835
1836 assert(nir_cf_node_get_function(&block->cf_node)->structured);
1837
1838 nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
1839 if (cf_prev)
1840 return nir_cf_node_cf_tree_last(cf_prev);
1841
1842 nir_cf_node *parent = block->cf_node.parent;
1843 if (parent->type == nir_cf_node_function)
1844 return NULL;
1845
1846 /* Is this the first block of a cf_node? Return the previous block */
1847 if (block == nir_cf_node_cf_tree_first(parent))
1848 return nir_cf_node_as_block(nir_cf_node_prev(parent));
1849
1850 switch (parent->type) {
1851 case nir_cf_node_if: {
1852 /* We are at the beginning of the else. Go to the end of the if */
1853 nir_if *if_stmt = nir_cf_node_as_if(parent);
1854 assert(block == nir_if_first_else_block(if_stmt));
1855 return nir_if_last_then_block(if_stmt);
1856 }
1857 case nir_cf_node_loop: {
1858 /* We are at the beginning of the continue construct. */
1859 nir_loop *loop = nir_cf_node_as_loop(parent);
1860 assert(nir_loop_has_continue_construct(loop) &&
1861 block == nir_loop_first_continue_block(loop));
1862 return nir_loop_last_block(loop);
1863 }
1864
1865 default:
1866 unreachable("unknown cf node type");
1867 }
1868 }
1869
1870 nir_block *
nir_cf_node_cf_tree_first(nir_cf_node * node)1871 nir_cf_node_cf_tree_first(nir_cf_node *node)
1872 {
1873 switch (node->type) {
1874 case nir_cf_node_function: {
1875 nir_function_impl *impl = nir_cf_node_as_function(node);
1876 return nir_start_block(impl);
1877 }
1878
1879 case nir_cf_node_if: {
1880 nir_if *if_stmt = nir_cf_node_as_if(node);
1881 return nir_if_first_then_block(if_stmt);
1882 }
1883
1884 case nir_cf_node_loop: {
1885 nir_loop *loop = nir_cf_node_as_loop(node);
1886 return nir_loop_first_block(loop);
1887 }
1888
1889 case nir_cf_node_block: {
1890 return nir_cf_node_as_block(node);
1891 }
1892
1893 default:
1894 unreachable("unknown node type");
1895 }
1896 }
1897
1898 nir_block *
nir_cf_node_cf_tree_last(nir_cf_node * node)1899 nir_cf_node_cf_tree_last(nir_cf_node *node)
1900 {
1901 switch (node->type) {
1902 case nir_cf_node_function: {
1903 nir_function_impl *impl = nir_cf_node_as_function(node);
1904 return nir_impl_last_block(impl);
1905 }
1906
1907 case nir_cf_node_if: {
1908 nir_if *if_stmt = nir_cf_node_as_if(node);
1909 return nir_if_last_else_block(if_stmt);
1910 }
1911
1912 case nir_cf_node_loop: {
1913 nir_loop *loop = nir_cf_node_as_loop(node);
1914 if (nir_loop_has_continue_construct(loop))
1915 return nir_loop_last_continue_block(loop);
1916 else
1917 return nir_loop_last_block(loop);
1918 }
1919
1920 case nir_cf_node_block: {
1921 return nir_cf_node_as_block(node);
1922 }
1923
1924 default:
1925 unreachable("unknown node type");
1926 }
1927 }
1928
1929 nir_block *
nir_cf_node_cf_tree_next(nir_cf_node * node)1930 nir_cf_node_cf_tree_next(nir_cf_node *node)
1931 {
1932 if (node->type == nir_cf_node_block)
1933 return nir_block_cf_tree_next(nir_cf_node_as_block(node));
1934 else if (node->type == nir_cf_node_function)
1935 return NULL;
1936 else
1937 return nir_cf_node_as_block(nir_cf_node_next(node));
1938 }
1939
1940 nir_block *
nir_cf_node_cf_tree_prev(nir_cf_node * node)1941 nir_cf_node_cf_tree_prev(nir_cf_node *node)
1942 {
1943 if (node->type == nir_cf_node_block)
1944 return nir_block_cf_tree_prev(nir_cf_node_as_block(node));
1945 else if (node->type == nir_cf_node_function)
1946 return NULL;
1947 else
1948 return nir_cf_node_as_block(nir_cf_node_prev(node));
1949 }
1950
1951 nir_if *
nir_block_get_following_if(nir_block * block)1952 nir_block_get_following_if(nir_block *block)
1953 {
1954 if (exec_node_is_tail_sentinel(&block->cf_node.node))
1955 return NULL;
1956
1957 if (nir_cf_node_is_last(&block->cf_node))
1958 return NULL;
1959
1960 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1961
1962 if (next_node->type != nir_cf_node_if)
1963 return NULL;
1964
1965 return nir_cf_node_as_if(next_node);
1966 }
1967
1968 nir_loop *
nir_block_get_following_loop(nir_block * block)1969 nir_block_get_following_loop(nir_block *block)
1970 {
1971 if (exec_node_is_tail_sentinel(&block->cf_node.node))
1972 return NULL;
1973
1974 if (nir_cf_node_is_last(&block->cf_node))
1975 return NULL;
1976
1977 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1978
1979 if (next_node->type != nir_cf_node_loop)
1980 return NULL;
1981
1982 return nir_cf_node_as_loop(next_node);
1983 }
1984
1985 static int
compare_block_index(const void * p1,const void * p2)1986 compare_block_index(const void *p1, const void *p2)
1987 {
1988 const nir_block *block1 = *((const nir_block **)p1);
1989 const nir_block *block2 = *((const nir_block **)p2);
1990
1991 return (int)block1->index - (int)block2->index;
1992 }
1993
1994 nir_block **
nir_block_get_predecessors_sorted(const nir_block * block,void * mem_ctx)1995 nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx)
1996 {
1997 nir_block **preds =
1998 ralloc_array(mem_ctx, nir_block *, block->predecessors->entries);
1999
2000 unsigned i = 0;
2001 set_foreach(block->predecessors, entry)
2002 preds[i++] = (nir_block *)entry->key;
2003 assert(i == block->predecessors->entries);
2004
2005 qsort(preds, block->predecessors->entries, sizeof(nir_block *),
2006 compare_block_index);
2007
2008 return preds;
2009 }
2010
2011 void
nir_index_blocks(nir_function_impl * impl)2012 nir_index_blocks(nir_function_impl *impl)
2013 {
2014 unsigned index = 0;
2015
2016 if (impl->valid_metadata & nir_metadata_block_index)
2017 return;
2018
2019 nir_foreach_block_unstructured(block, impl) {
2020 block->index = index++;
2021 }
2022
2023 /* The end_block isn't really part of the program, which is why its index
2024 * is >= num_blocks.
2025 */
2026 impl->num_blocks = impl->end_block->index = index;
2027 }
2028
2029 static bool
index_ssa_def_cb(nir_def * def,void * state)2030 index_ssa_def_cb(nir_def *def, void *state)
2031 {
2032 unsigned *index = (unsigned *)state;
2033 def->index = (*index)++;
2034
2035 return true;
2036 }
2037
2038 /**
2039 * The indices are applied top-to-bottom which has the very nice property
2040 * that, if A dominates B, then A->index <= B->index.
2041 */
2042 void
nir_index_ssa_defs(nir_function_impl * impl)2043 nir_index_ssa_defs(nir_function_impl *impl)
2044 {
2045 unsigned index = 0;
2046
2047 impl->valid_metadata &= ~nir_metadata_live_defs;
2048
2049 nir_foreach_block_unstructured(block, impl) {
2050 nir_foreach_instr(instr, block)
2051 nir_foreach_def(instr, index_ssa_def_cb, &index);
2052 }
2053
2054 impl->ssa_alloc = index;
2055 }
2056
2057 /**
2058 * The indices are applied top-to-bottom which has the very nice property
2059 * that, if A dominates B, then A->index <= B->index.
2060 */
2061 unsigned
nir_index_instrs(nir_function_impl * impl)2062 nir_index_instrs(nir_function_impl *impl)
2063 {
2064 unsigned index = 0;
2065
2066 nir_foreach_block(block, impl) {
2067 block->start_ip = index++;
2068
2069 nir_foreach_instr(instr, block)
2070 instr->index = index++;
2071
2072 block->end_ip = index++;
2073 }
2074
2075 return index;
2076 }
2077
2078 void
nir_shader_clear_pass_flags(nir_shader * shader)2079 nir_shader_clear_pass_flags(nir_shader *shader)
2080 {
2081 nir_foreach_function_impl(impl, shader) {
2082 nir_foreach_block(block, impl) {
2083 nir_foreach_instr(instr, block) {
2084 instr->pass_flags = 0;
2085 }
2086 }
2087 }
2088 }
2089
2090 unsigned
nir_shader_index_vars(nir_shader * shader,nir_variable_mode modes)2091 nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes)
2092 {
2093 unsigned count = 0;
2094 nir_foreach_variable_with_modes(var, shader, modes)
2095 var->index = count++;
2096 return count;
2097 }
2098
2099 unsigned
nir_function_impl_index_vars(nir_function_impl * impl)2100 nir_function_impl_index_vars(nir_function_impl *impl)
2101 {
2102 unsigned count = 0;
2103 nir_foreach_function_temp_variable(var, impl)
2104 var->index = count++;
2105 return count;
2106 }
2107
2108 static nir_instr *
cursor_next_instr(nir_cursor cursor)2109 cursor_next_instr(nir_cursor cursor)
2110 {
2111 switch (cursor.option) {
2112 case nir_cursor_before_block:
2113 for (nir_block *block = cursor.block; block;
2114 block = nir_block_cf_tree_next(block)) {
2115 nir_instr *instr = nir_block_first_instr(block);
2116 if (instr)
2117 return instr;
2118 }
2119 return NULL;
2120
2121 case nir_cursor_after_block:
2122 cursor.block = nir_block_cf_tree_next(cursor.block);
2123 if (cursor.block == NULL)
2124 return NULL;
2125
2126 cursor.option = nir_cursor_before_block;
2127 return cursor_next_instr(cursor);
2128
2129 case nir_cursor_before_instr:
2130 return cursor.instr;
2131
2132 case nir_cursor_after_instr:
2133 if (nir_instr_next(cursor.instr))
2134 return nir_instr_next(cursor.instr);
2135
2136 cursor.option = nir_cursor_after_block;
2137 cursor.block = cursor.instr->block;
2138 return cursor_next_instr(cursor);
2139 }
2140
2141 unreachable("Inavlid cursor option");
2142 }
2143
2144 bool
nir_function_impl_lower_instructions(nir_function_impl * impl,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2145 nir_function_impl_lower_instructions(nir_function_impl *impl,
2146 nir_instr_filter_cb filter,
2147 nir_lower_instr_cb lower,
2148 void *cb_data)
2149 {
2150 nir_builder b = nir_builder_create(impl);
2151
2152 nir_metadata preserved = nir_metadata_control_flow;
2153
2154 bool progress = false;
2155 nir_cursor iter = nir_before_impl(impl);
2156 nir_instr *instr;
2157 while ((instr = cursor_next_instr(iter)) != NULL) {
2158 if (filter && !filter(instr, cb_data)) {
2159 iter = nir_after_instr(instr);
2160 continue;
2161 }
2162
2163 nir_def *old_def = nir_instr_def(instr);
2164 struct list_head old_uses;
2165 if (old_def != NULL) {
2166 /* We're about to ask the callback to generate a replacement for instr.
2167 * Save off the uses from instr's SSA def so we know what uses to
2168 * rewrite later. If we use nir_def_rewrite_uses, it fails in the
2169 * case where the generated replacement code uses the result of instr
2170 * itself. If we use nir_def_rewrite_uses_after (which is the
2171 * normal solution to this problem), it doesn't work well if control-
2172 * flow is inserted as part of the replacement, doesn't handle cases
2173 * where the replacement is something consumed by instr, and suffers
2174 * from performance issues. This is the only way to 100% guarantee
2175 * that we rewrite the correct set efficiently.
2176 */
2177
2178 list_replace(&old_def->uses, &old_uses);
2179 list_inithead(&old_def->uses);
2180 }
2181
2182 b.cursor = nir_after_instr(instr);
2183 nir_def *new_def = lower(&b, instr, cb_data);
2184 if (new_def && new_def != NIR_LOWER_INSTR_PROGRESS &&
2185 new_def != NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2186 assert(old_def != NULL);
2187 if (new_def->parent_instr->block != instr->block)
2188 preserved = nir_metadata_none;
2189
2190 list_for_each_entry_safe(nir_src, use_src, &old_uses, use_link)
2191 nir_src_rewrite(use_src, new_def);
2192
2193 if (nir_def_is_unused(old_def)) {
2194 iter = nir_instr_free_and_dce(instr);
2195 } else {
2196 iter = nir_after_instr(instr);
2197 }
2198 progress = true;
2199 } else {
2200 /* We didn't end up lowering after all. Put the uses back */
2201 if (old_def)
2202 list_replace(&old_uses, &old_def->uses);
2203
2204 if (new_def == NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2205 /* Only instructions without a return value can be removed like this */
2206 assert(!old_def);
2207 iter = nir_instr_free_and_dce(instr);
2208 progress = true;
2209 } else
2210 iter = nir_after_instr(instr);
2211
2212 if (new_def == NIR_LOWER_INSTR_PROGRESS)
2213 progress = true;
2214 }
2215 }
2216
2217 if (progress) {
2218 nir_metadata_preserve(impl, preserved);
2219 } else {
2220 nir_metadata_preserve(impl, nir_metadata_all);
2221 }
2222
2223 return progress;
2224 }
2225
2226 bool
nir_shader_lower_instructions(nir_shader * shader,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2227 nir_shader_lower_instructions(nir_shader *shader,
2228 nir_instr_filter_cb filter,
2229 nir_lower_instr_cb lower,
2230 void *cb_data)
2231 {
2232 bool progress = false;
2233
2234 nir_foreach_function_impl(impl, shader) {
2235 if (nir_function_impl_lower_instructions(impl, filter, lower, cb_data))
2236 progress = true;
2237 }
2238
2239 return progress;
2240 }
2241
2242 /**
2243 * Returns true if the shader supports quad-based implicit derivatives on
2244 * texture sampling.
2245 */
2246 bool
nir_shader_supports_implicit_lod(nir_shader * shader)2247 nir_shader_supports_implicit_lod(nir_shader *shader)
2248 {
2249 return (shader->info.stage == MESA_SHADER_FRAGMENT ||
2250 (gl_shader_stage_uses_workgroup(shader->info.stage) &&
2251 shader->info.derivative_group != DERIVATIVE_GROUP_NONE));
2252 }
2253
2254 nir_intrinsic_op
nir_intrinsic_from_system_value(gl_system_value val)2255 nir_intrinsic_from_system_value(gl_system_value val)
2256 {
2257 switch (val) {
2258 case SYSTEM_VALUE_VERTEX_ID:
2259 return nir_intrinsic_load_vertex_id;
2260 case SYSTEM_VALUE_INSTANCE_ID:
2261 return nir_intrinsic_load_instance_id;
2262 case SYSTEM_VALUE_DRAW_ID:
2263 return nir_intrinsic_load_draw_id;
2264 case SYSTEM_VALUE_BASE_INSTANCE:
2265 return nir_intrinsic_load_base_instance;
2266 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
2267 return nir_intrinsic_load_vertex_id_zero_base;
2268 case SYSTEM_VALUE_IS_INDEXED_DRAW:
2269 return nir_intrinsic_load_is_indexed_draw;
2270 case SYSTEM_VALUE_FIRST_VERTEX:
2271 return nir_intrinsic_load_first_vertex;
2272 case SYSTEM_VALUE_BASE_VERTEX:
2273 return nir_intrinsic_load_base_vertex;
2274 case SYSTEM_VALUE_INVOCATION_ID:
2275 return nir_intrinsic_load_invocation_id;
2276 case SYSTEM_VALUE_FRAG_COORD:
2277 return nir_intrinsic_load_frag_coord;
2278 case SYSTEM_VALUE_PIXEL_COORD:
2279 return nir_intrinsic_load_pixel_coord;
2280 case SYSTEM_VALUE_POINT_COORD:
2281 return nir_intrinsic_load_point_coord;
2282 case SYSTEM_VALUE_LINE_COORD:
2283 return nir_intrinsic_load_line_coord;
2284 case SYSTEM_VALUE_FRONT_FACE:
2285 return nir_intrinsic_load_front_face;
2286 case SYSTEM_VALUE_FRONT_FACE_FSIGN:
2287 return nir_intrinsic_load_front_face_fsign;
2288 case SYSTEM_VALUE_SAMPLE_ID:
2289 return nir_intrinsic_load_sample_id;
2290 case SYSTEM_VALUE_SAMPLE_POS:
2291 return nir_intrinsic_load_sample_pos;
2292 case SYSTEM_VALUE_SAMPLE_POS_OR_CENTER:
2293 return nir_intrinsic_load_sample_pos_or_center;
2294 case SYSTEM_VALUE_SAMPLE_MASK_IN:
2295 return nir_intrinsic_load_sample_mask_in;
2296 case SYSTEM_VALUE_LAYER_ID:
2297 return nir_intrinsic_load_layer_id;
2298 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
2299 return nir_intrinsic_load_local_invocation_id;
2300 case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
2301 return nir_intrinsic_load_local_invocation_index;
2302 case SYSTEM_VALUE_WORKGROUP_ID:
2303 return nir_intrinsic_load_workgroup_id;
2304 case SYSTEM_VALUE_BASE_WORKGROUP_ID:
2305 return nir_intrinsic_load_base_workgroup_id;
2306 case SYSTEM_VALUE_WORKGROUP_INDEX:
2307 return nir_intrinsic_load_workgroup_index;
2308 case SYSTEM_VALUE_NUM_WORKGROUPS:
2309 return nir_intrinsic_load_num_workgroups;
2310 case SYSTEM_VALUE_PRIMITIVE_ID:
2311 return nir_intrinsic_load_primitive_id;
2312 case SYSTEM_VALUE_TESS_COORD:
2313 return nir_intrinsic_load_tess_coord;
2314 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
2315 return nir_intrinsic_load_tess_level_outer;
2316 case SYSTEM_VALUE_TESS_LEVEL_INNER:
2317 return nir_intrinsic_load_tess_level_inner;
2318 case SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT:
2319 return nir_intrinsic_load_tess_level_outer_default;
2320 case SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT:
2321 return nir_intrinsic_load_tess_level_inner_default;
2322 case SYSTEM_VALUE_VERTICES_IN:
2323 return nir_intrinsic_load_patch_vertices_in;
2324 case SYSTEM_VALUE_HELPER_INVOCATION:
2325 return nir_intrinsic_load_helper_invocation;
2326 case SYSTEM_VALUE_COLOR0:
2327 return nir_intrinsic_load_color0;
2328 case SYSTEM_VALUE_COLOR1:
2329 return nir_intrinsic_load_color1;
2330 case SYSTEM_VALUE_VIEW_INDEX:
2331 return nir_intrinsic_load_view_index;
2332 case SYSTEM_VALUE_SUBGROUP_SIZE:
2333 return nir_intrinsic_load_subgroup_size;
2334 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
2335 return nir_intrinsic_load_subgroup_invocation;
2336 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
2337 return nir_intrinsic_load_subgroup_eq_mask;
2338 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
2339 return nir_intrinsic_load_subgroup_ge_mask;
2340 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
2341 return nir_intrinsic_load_subgroup_gt_mask;
2342 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
2343 return nir_intrinsic_load_subgroup_le_mask;
2344 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
2345 return nir_intrinsic_load_subgroup_lt_mask;
2346 case SYSTEM_VALUE_NUM_SUBGROUPS:
2347 return nir_intrinsic_load_num_subgroups;
2348 case SYSTEM_VALUE_SUBGROUP_ID:
2349 return nir_intrinsic_load_subgroup_id;
2350 case SYSTEM_VALUE_WORKGROUP_SIZE:
2351 return nir_intrinsic_load_workgroup_size;
2352 case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
2353 return nir_intrinsic_load_global_invocation_id;
2354 case SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID:
2355 return nir_intrinsic_load_base_global_invocation_id;
2356 case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX:
2357 return nir_intrinsic_load_global_invocation_index;
2358 case SYSTEM_VALUE_GLOBAL_GROUP_SIZE:
2359 return nir_intrinsic_load_global_size;
2360 case SYSTEM_VALUE_WORK_DIM:
2361 return nir_intrinsic_load_work_dim;
2362 case SYSTEM_VALUE_USER_DATA_AMD:
2363 return nir_intrinsic_load_user_data_amd;
2364 case SYSTEM_VALUE_RAY_LAUNCH_ID:
2365 return nir_intrinsic_load_ray_launch_id;
2366 case SYSTEM_VALUE_RAY_LAUNCH_SIZE:
2367 return nir_intrinsic_load_ray_launch_size;
2368 case SYSTEM_VALUE_RAY_WORLD_ORIGIN:
2369 return nir_intrinsic_load_ray_world_origin;
2370 case SYSTEM_VALUE_RAY_WORLD_DIRECTION:
2371 return nir_intrinsic_load_ray_world_direction;
2372 case SYSTEM_VALUE_RAY_OBJECT_ORIGIN:
2373 return nir_intrinsic_load_ray_object_origin;
2374 case SYSTEM_VALUE_RAY_OBJECT_DIRECTION:
2375 return nir_intrinsic_load_ray_object_direction;
2376 case SYSTEM_VALUE_RAY_T_MIN:
2377 return nir_intrinsic_load_ray_t_min;
2378 case SYSTEM_VALUE_RAY_T_MAX:
2379 return nir_intrinsic_load_ray_t_max;
2380 case SYSTEM_VALUE_RAY_OBJECT_TO_WORLD:
2381 return nir_intrinsic_load_ray_object_to_world;
2382 case SYSTEM_VALUE_RAY_WORLD_TO_OBJECT:
2383 return nir_intrinsic_load_ray_world_to_object;
2384 case SYSTEM_VALUE_RAY_HIT_KIND:
2385 return nir_intrinsic_load_ray_hit_kind;
2386 case SYSTEM_VALUE_RAY_FLAGS:
2387 return nir_intrinsic_load_ray_flags;
2388 case SYSTEM_VALUE_RAY_GEOMETRY_INDEX:
2389 return nir_intrinsic_load_ray_geometry_index;
2390 case SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX:
2391 return nir_intrinsic_load_ray_instance_custom_index;
2392 case SYSTEM_VALUE_CULL_MASK:
2393 return nir_intrinsic_load_cull_mask;
2394 case SYSTEM_VALUE_RAY_TRIANGLE_VERTEX_POSITIONS:
2395 return nir_intrinsic_load_ray_triangle_vertex_positions;
2396 case SYSTEM_VALUE_MESH_VIEW_COUNT:
2397 return nir_intrinsic_load_mesh_view_count;
2398 case SYSTEM_VALUE_FRAG_SHADING_RATE:
2399 return nir_intrinsic_load_frag_shading_rate;
2400 case SYSTEM_VALUE_FULLY_COVERED:
2401 return nir_intrinsic_load_fully_covered;
2402 case SYSTEM_VALUE_FRAG_SIZE:
2403 return nir_intrinsic_load_frag_size;
2404 case SYSTEM_VALUE_FRAG_INVOCATION_COUNT:
2405 return nir_intrinsic_load_frag_invocation_count;
2406 case SYSTEM_VALUE_SHADER_INDEX:
2407 return nir_intrinsic_load_shader_index;
2408 case SYSTEM_VALUE_COALESCED_INPUT_COUNT:
2409 return nir_intrinsic_load_coalesced_input_count;
2410 case SYSTEM_VALUE_WARPS_PER_SM_NV:
2411 return nir_intrinsic_load_warps_per_sm_nv;
2412 case SYSTEM_VALUE_SM_COUNT_NV:
2413 return nir_intrinsic_load_sm_count_nv;
2414 case SYSTEM_VALUE_WARP_ID_NV:
2415 return nir_intrinsic_load_warp_id_nv;
2416 case SYSTEM_VALUE_SM_ID_NV:
2417 return nir_intrinsic_load_sm_id_nv;
2418 default:
2419 return nir_num_intrinsics;
2420 }
2421 }
2422
2423 gl_system_value
nir_system_value_from_intrinsic(nir_intrinsic_op intrin)2424 nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
2425 {
2426 switch (intrin) {
2427 case nir_intrinsic_load_vertex_id:
2428 return SYSTEM_VALUE_VERTEX_ID;
2429 case nir_intrinsic_load_instance_id:
2430 return SYSTEM_VALUE_INSTANCE_ID;
2431 case nir_intrinsic_load_draw_id:
2432 return SYSTEM_VALUE_DRAW_ID;
2433 case nir_intrinsic_load_base_instance:
2434 return SYSTEM_VALUE_BASE_INSTANCE;
2435 case nir_intrinsic_load_vertex_id_zero_base:
2436 return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
2437 case nir_intrinsic_load_first_vertex:
2438 return SYSTEM_VALUE_FIRST_VERTEX;
2439 case nir_intrinsic_load_is_indexed_draw:
2440 return SYSTEM_VALUE_IS_INDEXED_DRAW;
2441 case nir_intrinsic_load_base_vertex:
2442 return SYSTEM_VALUE_BASE_VERTEX;
2443 case nir_intrinsic_load_invocation_id:
2444 return SYSTEM_VALUE_INVOCATION_ID;
2445 case nir_intrinsic_load_frag_coord:
2446 return SYSTEM_VALUE_FRAG_COORD;
2447 case nir_intrinsic_load_pixel_coord:
2448 return SYSTEM_VALUE_PIXEL_COORD;
2449 case nir_intrinsic_load_point_coord:
2450 return SYSTEM_VALUE_POINT_COORD;
2451 case nir_intrinsic_load_line_coord:
2452 return SYSTEM_VALUE_LINE_COORD;
2453 case nir_intrinsic_load_front_face:
2454 return SYSTEM_VALUE_FRONT_FACE;
2455 case nir_intrinsic_load_front_face_fsign:
2456 return SYSTEM_VALUE_FRONT_FACE_FSIGN;
2457 case nir_intrinsic_load_sample_id:
2458 return SYSTEM_VALUE_SAMPLE_ID;
2459 case nir_intrinsic_load_sample_pos:
2460 return SYSTEM_VALUE_SAMPLE_POS;
2461 case nir_intrinsic_load_sample_pos_or_center:
2462 return SYSTEM_VALUE_SAMPLE_POS_OR_CENTER;
2463 case nir_intrinsic_load_sample_mask_in:
2464 return SYSTEM_VALUE_SAMPLE_MASK_IN;
2465 case nir_intrinsic_load_layer_id:
2466 return SYSTEM_VALUE_LAYER_ID;
2467 case nir_intrinsic_load_local_invocation_id:
2468 return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
2469 case nir_intrinsic_load_local_invocation_index:
2470 return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
2471 case nir_intrinsic_load_num_workgroups:
2472 return SYSTEM_VALUE_NUM_WORKGROUPS;
2473 case nir_intrinsic_load_workgroup_id:
2474 return SYSTEM_VALUE_WORKGROUP_ID;
2475 case nir_intrinsic_load_base_workgroup_id:
2476 return SYSTEM_VALUE_BASE_WORKGROUP_ID;
2477 case nir_intrinsic_load_workgroup_index:
2478 return SYSTEM_VALUE_WORKGROUP_INDEX;
2479 case nir_intrinsic_load_primitive_id:
2480 return SYSTEM_VALUE_PRIMITIVE_ID;
2481 case nir_intrinsic_load_tess_coord:
2482 case nir_intrinsic_load_tess_coord_xy:
2483 return SYSTEM_VALUE_TESS_COORD;
2484 case nir_intrinsic_load_tess_level_outer:
2485 return SYSTEM_VALUE_TESS_LEVEL_OUTER;
2486 case nir_intrinsic_load_tess_level_inner:
2487 return SYSTEM_VALUE_TESS_LEVEL_INNER;
2488 case nir_intrinsic_load_tess_level_outer_default:
2489 return SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT;
2490 case nir_intrinsic_load_tess_level_inner_default:
2491 return SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT;
2492 case nir_intrinsic_load_patch_vertices_in:
2493 return SYSTEM_VALUE_VERTICES_IN;
2494 case nir_intrinsic_load_helper_invocation:
2495 return SYSTEM_VALUE_HELPER_INVOCATION;
2496 case nir_intrinsic_load_color0:
2497 return SYSTEM_VALUE_COLOR0;
2498 case nir_intrinsic_load_color1:
2499 return SYSTEM_VALUE_COLOR1;
2500 case nir_intrinsic_load_view_index:
2501 return SYSTEM_VALUE_VIEW_INDEX;
2502 case nir_intrinsic_load_subgroup_size:
2503 return SYSTEM_VALUE_SUBGROUP_SIZE;
2504 case nir_intrinsic_load_subgroup_invocation:
2505 return SYSTEM_VALUE_SUBGROUP_INVOCATION;
2506 case nir_intrinsic_load_subgroup_eq_mask:
2507 return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
2508 case nir_intrinsic_load_subgroup_ge_mask:
2509 return SYSTEM_VALUE_SUBGROUP_GE_MASK;
2510 case nir_intrinsic_load_subgroup_gt_mask:
2511 return SYSTEM_VALUE_SUBGROUP_GT_MASK;
2512 case nir_intrinsic_load_subgroup_le_mask:
2513 return SYSTEM_VALUE_SUBGROUP_LE_MASK;
2514 case nir_intrinsic_load_subgroup_lt_mask:
2515 return SYSTEM_VALUE_SUBGROUP_LT_MASK;
2516 case nir_intrinsic_load_num_subgroups:
2517 return SYSTEM_VALUE_NUM_SUBGROUPS;
2518 case nir_intrinsic_load_subgroup_id:
2519 return SYSTEM_VALUE_SUBGROUP_ID;
2520 case nir_intrinsic_load_workgroup_size:
2521 return SYSTEM_VALUE_WORKGROUP_SIZE;
2522 case nir_intrinsic_load_global_invocation_id:
2523 return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
2524 case nir_intrinsic_load_base_global_invocation_id:
2525 return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID;
2526 case nir_intrinsic_load_global_invocation_index:
2527 return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX;
2528 case nir_intrinsic_load_global_size:
2529 return SYSTEM_VALUE_GLOBAL_GROUP_SIZE;
2530 case nir_intrinsic_load_work_dim:
2531 return SYSTEM_VALUE_WORK_DIM;
2532 case nir_intrinsic_load_user_data_amd:
2533 return SYSTEM_VALUE_USER_DATA_AMD;
2534 case nir_intrinsic_load_barycentric_model:
2535 return SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL;
2536 case nir_intrinsic_load_gs_header_ir3:
2537 return SYSTEM_VALUE_GS_HEADER_IR3;
2538 case nir_intrinsic_load_tcs_header_ir3:
2539 return SYSTEM_VALUE_TCS_HEADER_IR3;
2540 case nir_intrinsic_load_ray_launch_id:
2541 return SYSTEM_VALUE_RAY_LAUNCH_ID;
2542 case nir_intrinsic_load_ray_launch_size:
2543 return SYSTEM_VALUE_RAY_LAUNCH_SIZE;
2544 case nir_intrinsic_load_ray_world_origin:
2545 return SYSTEM_VALUE_RAY_WORLD_ORIGIN;
2546 case nir_intrinsic_load_ray_world_direction:
2547 return SYSTEM_VALUE_RAY_WORLD_DIRECTION;
2548 case nir_intrinsic_load_ray_object_origin:
2549 return SYSTEM_VALUE_RAY_OBJECT_ORIGIN;
2550 case nir_intrinsic_load_ray_object_direction:
2551 return SYSTEM_VALUE_RAY_OBJECT_DIRECTION;
2552 case nir_intrinsic_load_ray_t_min:
2553 return SYSTEM_VALUE_RAY_T_MIN;
2554 case nir_intrinsic_load_ray_t_max:
2555 return SYSTEM_VALUE_RAY_T_MAX;
2556 case nir_intrinsic_load_ray_object_to_world:
2557 return SYSTEM_VALUE_RAY_OBJECT_TO_WORLD;
2558 case nir_intrinsic_load_ray_world_to_object:
2559 return SYSTEM_VALUE_RAY_WORLD_TO_OBJECT;
2560 case nir_intrinsic_load_ray_hit_kind:
2561 return SYSTEM_VALUE_RAY_HIT_KIND;
2562 case nir_intrinsic_load_ray_flags:
2563 return SYSTEM_VALUE_RAY_FLAGS;
2564 case nir_intrinsic_load_ray_geometry_index:
2565 return SYSTEM_VALUE_RAY_GEOMETRY_INDEX;
2566 case nir_intrinsic_load_ray_instance_custom_index:
2567 return SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX;
2568 case nir_intrinsic_load_cull_mask:
2569 return SYSTEM_VALUE_CULL_MASK;
2570 case nir_intrinsic_load_ray_triangle_vertex_positions:
2571 return SYSTEM_VALUE_RAY_TRIANGLE_VERTEX_POSITIONS;
2572 case nir_intrinsic_load_frag_shading_rate:
2573 return SYSTEM_VALUE_FRAG_SHADING_RATE;
2574 case nir_intrinsic_load_mesh_view_count:
2575 return SYSTEM_VALUE_MESH_VIEW_COUNT;
2576 case nir_intrinsic_load_fully_covered:
2577 return SYSTEM_VALUE_FULLY_COVERED;
2578 case nir_intrinsic_load_frag_size:
2579 return SYSTEM_VALUE_FRAG_SIZE;
2580 case nir_intrinsic_load_frag_invocation_count:
2581 return SYSTEM_VALUE_FRAG_INVOCATION_COUNT;
2582 case nir_intrinsic_load_shader_index:
2583 return SYSTEM_VALUE_SHADER_INDEX;
2584 case nir_intrinsic_load_coalesced_input_count:
2585 return SYSTEM_VALUE_COALESCED_INPUT_COUNT;
2586 case nir_intrinsic_load_warps_per_sm_nv:
2587 return SYSTEM_VALUE_WARPS_PER_SM_NV;
2588 case nir_intrinsic_load_sm_count_nv:
2589 return SYSTEM_VALUE_SM_COUNT_NV;
2590 case nir_intrinsic_load_warp_id_nv:
2591 return SYSTEM_VALUE_WARP_ID_NV;
2592 case nir_intrinsic_load_sm_id_nv:
2593 return SYSTEM_VALUE_SM_ID_NV;
2594 default:
2595 unreachable("intrinsic doesn't produce a system value");
2596 }
2597 }
2598
2599 /* OpenGL utility method that remaps the location attributes if they are
2600 * doubles. Not needed for vulkan due the differences on the input location
2601 * count for doubles on vulkan vs OpenGL
2602 *
2603 * The bitfield returned in dual_slot is one bit for each double input slot in
2604 * the original OpenGL single-slot input numbering. The mapping from old
2605 * locations to new locations is as follows:
2606 *
2607 * new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc))
2608 */
2609 void
nir_remap_dual_slot_attributes(nir_shader * shader,uint64_t * dual_slot)2610 nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot)
2611 {
2612 assert(shader->info.stage == MESA_SHADER_VERTEX);
2613
2614 *dual_slot = 0;
2615 nir_foreach_shader_in_variable(var, shader) {
2616 if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
2617 unsigned slots = glsl_count_attribute_slots(var->type, true);
2618 *dual_slot |= BITFIELD64_MASK(slots) << var->data.location;
2619 }
2620 }
2621
2622 nir_foreach_shader_in_variable(var, shader) {
2623 var->data.location +=
2624 util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location));
2625 }
2626 }
2627
2628 /* Returns an attribute mask that has been re-compacted using the given
2629 * dual_slot mask.
2630 */
2631 uint64_t
nir_get_single_slot_attribs_mask(uint64_t attribs,uint64_t dual_slot)2632 nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot)
2633 {
2634 while (dual_slot) {
2635 unsigned loc = u_bit_scan64(&dual_slot);
2636 /* mask of all bits up to and including loc */
2637 uint64_t mask = BITFIELD64_MASK(loc + 1);
2638 attribs = (attribs & mask) | ((attribs & ~mask) >> 1);
2639 }
2640 return attribs;
2641 }
2642
2643 void
nir_rewrite_image_intrinsic(nir_intrinsic_instr * intrin,nir_def * src,bool bindless)2644 nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_def *src,
2645 bool bindless)
2646 {
2647 enum gl_access_qualifier access = nir_intrinsic_access(intrin);
2648
2649 /* Image intrinsics only have one of these */
2650 assert(!nir_intrinsic_has_src_type(intrin) ||
2651 !nir_intrinsic_has_dest_type(intrin));
2652
2653 nir_alu_type data_type = nir_type_invalid;
2654 if (nir_intrinsic_has_src_type(intrin))
2655 data_type = nir_intrinsic_src_type(intrin);
2656 if (nir_intrinsic_has_dest_type(intrin))
2657 data_type = nir_intrinsic_dest_type(intrin);
2658
2659 nir_atomic_op atomic_op = 0;
2660 if (nir_intrinsic_has_atomic_op(intrin))
2661 atomic_op = nir_intrinsic_atomic_op(intrin);
2662
2663 switch (intrin->intrinsic) {
2664 #define CASE(op) \
2665 case nir_intrinsic_image_deref_##op: \
2666 intrin->intrinsic = bindless ? nir_intrinsic_bindless_image_##op \
2667 : nir_intrinsic_image_##op; \
2668 break;
2669 CASE(load)
2670 CASE(sparse_load)
2671 CASE(store)
2672 CASE(atomic)
2673 CASE(atomic_swap)
2674 CASE(size)
2675 CASE(samples)
2676 CASE(load_raw_intel)
2677 CASE(store_raw_intel)
2678 CASE(fragment_mask_load_amd)
2679 CASE(store_block_agx)
2680 #undef CASE
2681 default:
2682 unreachable("Unhanded image intrinsic");
2683 }
2684
2685 nir_variable *var = nir_intrinsic_get_var(intrin, 0);
2686
2687 /* Only update the format if the intrinsic doesn't have one set */
2688 if (nir_intrinsic_format(intrin) == PIPE_FORMAT_NONE)
2689 nir_intrinsic_set_format(intrin, var->data.image.format);
2690
2691 nir_intrinsic_set_access(intrin, access | var->data.access);
2692 if (nir_intrinsic_has_src_type(intrin))
2693 nir_intrinsic_set_src_type(intrin, data_type);
2694 if (nir_intrinsic_has_dest_type(intrin))
2695 nir_intrinsic_set_dest_type(intrin, data_type);
2696
2697 if (nir_intrinsic_has_atomic_op(intrin))
2698 nir_intrinsic_set_atomic_op(intrin, atomic_op);
2699
2700 nir_src_rewrite(&intrin->src[0], src);
2701 }
2702
2703 unsigned
nir_image_intrinsic_coord_components(const nir_intrinsic_instr * instr)2704 nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr)
2705 {
2706 enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
2707 int coords = glsl_get_sampler_dim_coordinate_components(dim);
2708 if (dim == GLSL_SAMPLER_DIM_CUBE)
2709 return coords;
2710 else
2711 return coords + nir_intrinsic_image_array(instr);
2712 }
2713
2714 bool
nir_intrinsic_can_reorder(nir_intrinsic_instr * instr)2715 nir_intrinsic_can_reorder(nir_intrinsic_instr *instr)
2716 {
2717 if (nir_intrinsic_has_access(instr)) {
2718 enum gl_access_qualifier access = nir_intrinsic_access(instr);
2719 if (access & ACCESS_VOLATILE)
2720 return false;
2721 if (access & ACCESS_CAN_REORDER)
2722 return true;
2723 }
2724
2725 const nir_intrinsic_info *info;
2726 if (instr->intrinsic == nir_intrinsic_load_deref) {
2727 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
2728 if (nir_deref_mode_is_in_set(deref, nir_var_system_value)) {
2729 nir_variable *var = nir_deref_instr_get_variable(deref);
2730 if (!var)
2731 return false;
2732
2733 nir_intrinsic_op sysval_op =
2734 nir_intrinsic_from_system_value((gl_system_value)var->data.location);
2735 if (sysval_op == nir_num_intrinsics)
2736 return true;
2737
2738 info = &nir_intrinsic_infos[sysval_op];
2739 } else {
2740 return nir_deref_mode_is_in_set(deref, nir_var_read_only_modes);
2741 }
2742 } else {
2743 info = &nir_intrinsic_infos[instr->intrinsic];
2744 }
2745
2746 return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
2747 (info->flags & NIR_INTRINSIC_CAN_REORDER);
2748 }
2749
2750 nir_src *
nir_get_shader_call_payload_src(nir_intrinsic_instr * call)2751 nir_get_shader_call_payload_src(nir_intrinsic_instr *call)
2752 {
2753 switch (call->intrinsic) {
2754 case nir_intrinsic_trace_ray:
2755 case nir_intrinsic_rt_trace_ray:
2756 return &call->src[10];
2757 case nir_intrinsic_execute_callable:
2758 case nir_intrinsic_rt_execute_callable:
2759 return &call->src[1];
2760 default:
2761 unreachable("Not a call intrinsic");
2762 return NULL;
2763 }
2764 }
2765
2766 nir_binding
nir_chase_binding(nir_src rsrc)2767 nir_chase_binding(nir_src rsrc)
2768 {
2769 nir_binding res = { 0 };
2770 if (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2771 const struct glsl_type *type = glsl_without_array(nir_src_as_deref(rsrc)->type);
2772 bool is_image = glsl_type_is_image(type) || glsl_type_is_sampler(type);
2773 while (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2774 nir_deref_instr *deref = nir_src_as_deref(rsrc);
2775
2776 if (deref->deref_type == nir_deref_type_var) {
2777 res.success = true;
2778 res.var = deref->var;
2779 res.desc_set = deref->var->data.descriptor_set;
2780 res.binding = deref->var->data.binding;
2781 return res;
2782 } else if (deref->deref_type == nir_deref_type_array && is_image) {
2783 if (res.num_indices == ARRAY_SIZE(res.indices))
2784 return (nir_binding){ 0 };
2785 res.indices[res.num_indices++] = deref->arr.index;
2786 }
2787
2788 rsrc = deref->parent;
2789 }
2790 }
2791
2792 /* Skip copies and trimming. Trimming can appear as nir_op_mov instructions
2793 * when removing the offset from addresses. We also consider
2794 * nir_op_is_vec_or_mov() instructions to skip trimming of
2795 * vec2_index_32bit_offset addresses after lowering ALU to scalar.
2796 */
2797 unsigned num_components = nir_src_num_components(rsrc);
2798 while (true) {
2799 nir_alu_instr *alu = nir_src_as_alu_instr(rsrc);
2800 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2801 if (alu && alu->op == nir_op_mov) {
2802 for (unsigned i = 0; i < num_components; i++) {
2803 if (alu->src[0].swizzle[i] != i)
2804 return (nir_binding){ 0 };
2805 }
2806 rsrc = alu->src[0].src;
2807 } else if (alu && nir_op_is_vec(alu->op)) {
2808 for (unsigned i = 0; i < num_components; i++) {
2809 if (alu->src[i].swizzle[0] != i || alu->src[i].src.ssa != alu->src[0].src.ssa)
2810 return (nir_binding){ 0 };
2811 }
2812 rsrc = alu->src[0].src;
2813 } else if (intrin && intrin->intrinsic == nir_intrinsic_read_first_invocation) {
2814 /* The caller might want to be aware if only the first invocation of
2815 * the indices are used.
2816 */
2817 res.read_first_invocation = true;
2818 rsrc = intrin->src[0];
2819 } else {
2820 break;
2821 }
2822 }
2823
2824 if (nir_src_is_const(rsrc)) {
2825 /* GL binding model after deref lowering */
2826 res.success = true;
2827 /* Can't use just nir_src_as_uint. Vulkan resource index produces a
2828 * vec2. Some drivers lower it to vec1 (to handle get_ssbo_size for
2829 * example) but others just keep it around as a vec2 (v3dv).
2830 */
2831 res.binding = nir_src_comp_as_uint(rsrc, 0);
2832 return res;
2833 }
2834
2835 /* otherwise, must be Vulkan binding model after deref lowering or GL bindless */
2836
2837 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2838 if (!intrin)
2839 return (nir_binding){ 0 };
2840
2841 /* Intel resource, similar to load_vulkan_descriptor after it has been
2842 * lowered.
2843 */
2844 if (intrin->intrinsic == nir_intrinsic_resource_intel) {
2845 res.success = true;
2846 res.desc_set = nir_intrinsic_desc_set(intrin);
2847 res.binding = nir_intrinsic_binding(intrin);
2848 /* nir_intrinsic_resource_intel has 3 sources, but src[2] is included in
2849 * src[1], it is kept around for other purposes.
2850 */
2851 res.num_indices = 2;
2852 res.indices[0] = intrin->src[0];
2853 res.indices[1] = intrin->src[1];
2854 return res;
2855 }
2856
2857 /* skip load_vulkan_descriptor */
2858 if (intrin->intrinsic == nir_intrinsic_load_vulkan_descriptor) {
2859 intrin = nir_src_as_intrinsic(intrin->src[0]);
2860 if (!intrin)
2861 return (nir_binding){ 0 };
2862 }
2863
2864 if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
2865 return (nir_binding){ 0 };
2866
2867 assert(res.num_indices == 0);
2868 res.success = true;
2869 res.desc_set = nir_intrinsic_desc_set(intrin);
2870 res.binding = nir_intrinsic_binding(intrin);
2871 res.num_indices = 1;
2872 res.indices[0] = intrin->src[0];
2873 return res;
2874 }
2875
2876 nir_variable *
nir_get_binding_variable(nir_shader * shader,nir_binding binding)2877 nir_get_binding_variable(nir_shader *shader, nir_binding binding)
2878 {
2879 nir_variable *binding_var = NULL;
2880 unsigned count = 0;
2881
2882 if (!binding.success)
2883 return NULL;
2884
2885 if (binding.var)
2886 return binding.var;
2887
2888 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ubo | nir_var_mem_ssbo) {
2889 if (var->data.descriptor_set == binding.desc_set && var->data.binding == binding.binding) {
2890 binding_var = var;
2891 count++;
2892 }
2893 }
2894
2895 /* Be conservative if another variable is using the same binding/desc_set
2896 * because the access mask might be different and we can't get it reliably.
2897 */
2898 if (count > 1)
2899 return NULL;
2900
2901 return binding_var;
2902 }
2903
2904 nir_scalar
nir_scalar_chase_movs(nir_scalar s)2905 nir_scalar_chase_movs(nir_scalar s)
2906 {
2907 while (nir_scalar_is_alu(s)) {
2908 nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
2909 if (alu->op == nir_op_mov) {
2910 s.def = alu->src[0].src.ssa;
2911 s.comp = alu->src[0].swizzle[s.comp];
2912 } else if (nir_op_is_vec(alu->op)) {
2913 s.def = alu->src[s.comp].src.ssa;
2914 s.comp = alu->src[s.comp].swizzle[0];
2915 } else {
2916 break;
2917 }
2918 }
2919
2920 return s;
2921 }
2922
2923 nir_alu_type
nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)2924 nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)
2925 {
2926 switch (base_type) {
2927 /* clang-format off */
2928 case GLSL_TYPE_BOOL: return nir_type_bool1;
2929 case GLSL_TYPE_UINT: return nir_type_uint32;
2930 case GLSL_TYPE_INT: return nir_type_int32;
2931 case GLSL_TYPE_UINT16: return nir_type_uint16;
2932 case GLSL_TYPE_INT16: return nir_type_int16;
2933 case GLSL_TYPE_UINT8: return nir_type_uint8;
2934 case GLSL_TYPE_INT8: return nir_type_int8;
2935 case GLSL_TYPE_UINT64: return nir_type_uint64;
2936 case GLSL_TYPE_INT64: return nir_type_int64;
2937 case GLSL_TYPE_FLOAT: return nir_type_float32;
2938 case GLSL_TYPE_FLOAT16: return nir_type_float16;
2939 case GLSL_TYPE_DOUBLE: return nir_type_float64;
2940 /* clang-format on */
2941
2942 case GLSL_TYPE_COOPERATIVE_MATRIX:
2943 case GLSL_TYPE_SAMPLER:
2944 case GLSL_TYPE_TEXTURE:
2945 case GLSL_TYPE_IMAGE:
2946 case GLSL_TYPE_ATOMIC_UINT:
2947 case GLSL_TYPE_STRUCT:
2948 case GLSL_TYPE_INTERFACE:
2949 case GLSL_TYPE_ARRAY:
2950 case GLSL_TYPE_VOID:
2951 case GLSL_TYPE_SUBROUTINE:
2952 case GLSL_TYPE_ERROR:
2953 return nir_type_invalid;
2954 }
2955
2956 unreachable("unknown type");
2957 }
2958
2959 enum glsl_base_type
nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)2960 nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)
2961 {
2962 /* clang-format off */
2963 switch (base_type) {
2964 case nir_type_bool1: return GLSL_TYPE_BOOL;
2965 case nir_type_uint32: return GLSL_TYPE_UINT;
2966 case nir_type_int32: return GLSL_TYPE_INT;
2967 case nir_type_uint16: return GLSL_TYPE_UINT16;
2968 case nir_type_int16: return GLSL_TYPE_INT16;
2969 case nir_type_uint8: return GLSL_TYPE_UINT8;
2970 case nir_type_int8: return GLSL_TYPE_INT8;
2971 case nir_type_uint64: return GLSL_TYPE_UINT64;
2972 case nir_type_int64: return GLSL_TYPE_INT64;
2973 case nir_type_float32: return GLSL_TYPE_FLOAT;
2974 case nir_type_float16: return GLSL_TYPE_FLOAT16;
2975 case nir_type_float64: return GLSL_TYPE_DOUBLE;
2976 default: unreachable("Not a sized nir_alu_type");
2977 }
2978 /* clang-format on */
2979 }
2980
2981 nir_op
nir_op_vec(unsigned num_components)2982 nir_op_vec(unsigned num_components)
2983 {
2984 /* clang-format off */
2985 switch (num_components) {
2986 case 1: return nir_op_mov;
2987 case 2: return nir_op_vec2;
2988 case 3: return nir_op_vec3;
2989 case 4: return nir_op_vec4;
2990 case 5: return nir_op_vec5;
2991 case 8: return nir_op_vec8;
2992 case 16: return nir_op_vec16;
2993 default: unreachable("bad component count");
2994 }
2995 /* clang-format on */
2996 }
2997
2998 bool
nir_op_is_vec(nir_op op)2999 nir_op_is_vec(nir_op op)
3000 {
3001 switch (op) {
3002 case nir_op_vec2:
3003 case nir_op_vec3:
3004 case nir_op_vec4:
3005 case nir_op_vec5:
3006 case nir_op_vec8:
3007 case nir_op_vec16:
3008 return true;
3009 default:
3010 return false;
3011 }
3012 }
3013
3014 nir_component_mask_t
nir_alu_instr_src_read_mask(const nir_alu_instr * instr,unsigned src)3015 nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src)
3016 {
3017 nir_component_mask_t read_mask = 0;
3018 for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
3019 if (!nir_alu_instr_channel_used(instr, src, c))
3020 continue;
3021
3022 read_mask |= (1 << instr->src[src].swizzle[c]);
3023 }
3024 return read_mask;
3025 }
3026
3027 unsigned
nir_ssa_alu_instr_src_components(const nir_alu_instr * instr,unsigned src)3028 nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
3029 {
3030 if (nir_op_infos[instr->op].input_sizes[src] > 0)
3031 return nir_op_infos[instr->op].input_sizes[src];
3032
3033 return instr->def.num_components;
3034 }
3035
3036 #define CASE_ALL_SIZES(op) \
3037 case op: \
3038 case op##8: \
3039 case op##16: \
3040 case op##32:
3041
3042 bool
nir_alu_instr_is_comparison(const nir_alu_instr * instr)3043 nir_alu_instr_is_comparison(const nir_alu_instr *instr)
3044 {
3045 switch (instr->op) {
3046 CASE_ALL_SIZES(nir_op_flt)
3047 CASE_ALL_SIZES(nir_op_fge)
3048 CASE_ALL_SIZES(nir_op_fltu)
3049 CASE_ALL_SIZES(nir_op_fgeu)
3050 CASE_ALL_SIZES(nir_op_feq)
3051 CASE_ALL_SIZES(nir_op_fneu)
3052 CASE_ALL_SIZES(nir_op_fequ)
3053 CASE_ALL_SIZES(nir_op_fneo)
3054 CASE_ALL_SIZES(nir_op_funord)
3055 CASE_ALL_SIZES(nir_op_ford)
3056 CASE_ALL_SIZES(nir_op_ilt)
3057 CASE_ALL_SIZES(nir_op_ult)
3058 CASE_ALL_SIZES(nir_op_ige)
3059 CASE_ALL_SIZES(nir_op_uge)
3060 CASE_ALL_SIZES(nir_op_ieq)
3061 CASE_ALL_SIZES(nir_op_ine)
3062 CASE_ALL_SIZES(nir_op_bitz)
3063 CASE_ALL_SIZES(nir_op_bitnz)
3064 case nir_op_inot:
3065 return true;
3066 default:
3067 return false;
3068 }
3069 }
3070
3071 #undef CASE_ALL_SIZES
3072
3073 unsigned
nir_intrinsic_src_components(const nir_intrinsic_instr * intr,unsigned srcn)3074 nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn)
3075 {
3076 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
3077 assert(srcn < info->num_srcs);
3078 if (info->src_components[srcn] > 0)
3079 return info->src_components[srcn];
3080 else if (info->src_components[srcn] == 0)
3081 return intr->num_components;
3082 else
3083 return nir_src_num_components(intr->src[srcn]);
3084 }
3085
3086 unsigned
nir_intrinsic_dest_components(nir_intrinsic_instr * intr)3087 nir_intrinsic_dest_components(nir_intrinsic_instr *intr)
3088 {
3089 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
3090 if (!info->has_dest)
3091 return 0;
3092 else if (info->dest_components)
3093 return info->dest_components;
3094 else
3095 return intr->num_components;
3096 }
3097
3098 nir_alu_type
nir_intrinsic_instr_src_type(const nir_intrinsic_instr * intrin,unsigned src)3099 nir_intrinsic_instr_src_type(const nir_intrinsic_instr *intrin, unsigned src)
3100 {
3101 /* We could go nuts here, but we'll just handle a few simple
3102 * cases and let everything else be untyped.
3103 */
3104 switch (intrin->intrinsic) {
3105 case nir_intrinsic_store_deref: {
3106 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
3107 if (src == 1)
3108 return nir_get_nir_type_for_glsl_type(deref->type);
3109 break;
3110 }
3111
3112 case nir_intrinsic_store_output:
3113 if (src == 0)
3114 return nir_intrinsic_src_type(intrin);
3115 break;
3116
3117 default:
3118 break;
3119 }
3120
3121 /* For the most part, we leave other intrinsics alone. Most
3122 * of them don't matter in OpenGL ES 2.0 drivers anyway.
3123 * However, we should at least check if this is some sort of
3124 * IO intrinsic and flag it's offset and index sources.
3125 */
3126 {
3127 int offset_src_idx = nir_get_io_offset_src_number(intrin);
3128 if (src == offset_src_idx) {
3129 const nir_src *offset_src = offset_src_idx >= 0 ? &intrin->src[offset_src_idx] : NULL;
3130 if (offset_src)
3131 return nir_type_int;
3132 }
3133 }
3134
3135 return nir_type_invalid;
3136 }
3137
3138 nir_alu_type
nir_intrinsic_instr_dest_type(const nir_intrinsic_instr * intrin)3139 nir_intrinsic_instr_dest_type(const nir_intrinsic_instr *intrin)
3140 {
3141 /* We could go nuts here, but we'll just handle a few simple
3142 * cases and let everything else be untyped.
3143 */
3144 switch (intrin->intrinsic) {
3145 case nir_intrinsic_load_deref: {
3146 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
3147 return nir_get_nir_type_for_glsl_type(deref->type);
3148 }
3149
3150 case nir_intrinsic_load_input:
3151 case nir_intrinsic_load_per_primitive_input:
3152 case nir_intrinsic_load_uniform:
3153 return nir_intrinsic_dest_type(intrin);
3154
3155 default:
3156 break;
3157 }
3158
3159 return nir_type_invalid;
3160 }
3161
3162 /**
3163 * Helper to copy const_index[] from src to dst, without assuming they
3164 * match in order.
3165 */
3166 void
nir_intrinsic_copy_const_indices(nir_intrinsic_instr * dst,nir_intrinsic_instr * src)3167 nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src)
3168 {
3169 if (src->intrinsic == dst->intrinsic) {
3170 memcpy(dst->const_index, src->const_index, sizeof(dst->const_index));
3171 return;
3172 }
3173
3174 const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic];
3175 const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic];
3176
3177 for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) {
3178 if (src_info->index_map[i] == 0)
3179 continue;
3180
3181 /* require that dst instruction also uses the same const_index[]: */
3182 assert(dst_info->index_map[i] > 0);
3183
3184 dst->const_index[dst_info->index_map[i] - 1] =
3185 src->const_index[src_info->index_map[i] - 1];
3186 }
3187 }
3188
3189 bool
nir_tex_instr_need_sampler(const nir_tex_instr * instr)3190 nir_tex_instr_need_sampler(const nir_tex_instr *instr)
3191 {
3192 switch (instr->op) {
3193 case nir_texop_txf:
3194 case nir_texop_txf_ms:
3195 case nir_texop_txs:
3196 case nir_texop_query_levels:
3197 case nir_texop_texture_samples:
3198 case nir_texop_samples_identical:
3199 case nir_texop_descriptor_amd:
3200 return false;
3201 default:
3202 return true;
3203 }
3204 }
3205
3206 unsigned
nir_tex_instr_result_size(const nir_tex_instr * instr)3207 nir_tex_instr_result_size(const nir_tex_instr *instr)
3208 {
3209 switch (instr->op) {
3210 case nir_texop_txs: {
3211 unsigned ret;
3212 switch (instr->sampler_dim) {
3213 case GLSL_SAMPLER_DIM_1D:
3214 case GLSL_SAMPLER_DIM_BUF:
3215 ret = 1;
3216 break;
3217 case GLSL_SAMPLER_DIM_2D:
3218 case GLSL_SAMPLER_DIM_CUBE:
3219 case GLSL_SAMPLER_DIM_MS:
3220 case GLSL_SAMPLER_DIM_RECT:
3221 case GLSL_SAMPLER_DIM_EXTERNAL:
3222 case GLSL_SAMPLER_DIM_SUBPASS:
3223 case GLSL_SAMPLER_DIM_SUBPASS_MS:
3224 ret = 2;
3225 break;
3226 case GLSL_SAMPLER_DIM_3D:
3227 ret = 3;
3228 break;
3229 default:
3230 unreachable("not reached");
3231 }
3232 if (instr->is_array)
3233 ret++;
3234 return ret;
3235 }
3236
3237 case nir_texop_lod:
3238 return 2;
3239
3240 case nir_texop_texture_samples:
3241 case nir_texop_query_levels:
3242 case nir_texop_samples_identical:
3243 case nir_texop_fragment_mask_fetch_amd:
3244 case nir_texop_lod_bias_agx:
3245 case nir_texop_has_custom_border_color_agx:
3246 return 1;
3247
3248 case nir_texop_descriptor_amd:
3249 return instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8;
3250
3251 case nir_texop_sampler_descriptor_amd:
3252 return 4;
3253
3254 case nir_texop_hdr_dim_nv:
3255 case nir_texop_tex_type_nv:
3256 return 4;
3257
3258 case nir_texop_custom_border_color_agx:
3259 return 4;
3260
3261 default:
3262 if (instr->is_shadow && instr->is_new_style_shadow)
3263 return 1;
3264
3265 return 4;
3266 }
3267 }
3268
3269 bool
nir_tex_instr_is_query(const nir_tex_instr * instr)3270 nir_tex_instr_is_query(const nir_tex_instr *instr)
3271 {
3272 switch (instr->op) {
3273 case nir_texop_txs:
3274 case nir_texop_lod:
3275 case nir_texop_texture_samples:
3276 case nir_texop_query_levels:
3277 case nir_texop_descriptor_amd:
3278 case nir_texop_sampler_descriptor_amd:
3279 case nir_texop_lod_bias_agx:
3280 case nir_texop_custom_border_color_agx:
3281 case nir_texop_has_custom_border_color_agx:
3282 case nir_texop_hdr_dim_nv:
3283 case nir_texop_tex_type_nv:
3284 return true;
3285 case nir_texop_tex:
3286 case nir_texop_txb:
3287 case nir_texop_txl:
3288 case nir_texop_txd:
3289 case nir_texop_txf:
3290 case nir_texop_txf_ms:
3291 case nir_texop_txf_ms_fb:
3292 case nir_texop_txf_ms_mcs_intel:
3293 case nir_texop_tg4:
3294 case nir_texop_samples_identical:
3295 case nir_texop_fragment_mask_fetch_amd:
3296 case nir_texop_fragment_fetch_amd:
3297 return false;
3298 default:
3299 unreachable("Invalid texture opcode");
3300 }
3301 }
3302
3303 bool
nir_tex_instr_has_implicit_derivative(const nir_tex_instr * instr)3304 nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr)
3305 {
3306 switch (instr->op) {
3307 case nir_texop_tex:
3308 case nir_texop_txb:
3309 case nir_texop_lod:
3310 return true;
3311 case nir_texop_tg4:
3312 return instr->is_gather_implicit_lod;
3313 default:
3314 return false;
3315 }
3316 }
3317
3318 nir_alu_type
nir_tex_instr_src_type(const nir_tex_instr * instr,unsigned src)3319 nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
3320 {
3321 switch (instr->src[src].src_type) {
3322 case nir_tex_src_coord:
3323 switch (instr->op) {
3324 case nir_texop_txf:
3325 case nir_texop_txf_ms:
3326 case nir_texop_txf_ms_fb:
3327 case nir_texop_txf_ms_mcs_intel:
3328 case nir_texop_samples_identical:
3329 case nir_texop_fragment_fetch_amd:
3330 case nir_texop_fragment_mask_fetch_amd:
3331 return nir_type_int;
3332
3333 default:
3334 return nir_type_float;
3335 }
3336
3337 case nir_tex_src_lod:
3338 switch (instr->op) {
3339 case nir_texop_txs:
3340 case nir_texop_txf:
3341 case nir_texop_txf_ms:
3342 case nir_texop_fragment_fetch_amd:
3343 case nir_texop_fragment_mask_fetch_amd:
3344 return nir_type_int;
3345
3346 default:
3347 return nir_type_float;
3348 }
3349
3350 case nir_tex_src_projector:
3351 case nir_tex_src_comparator:
3352 case nir_tex_src_bias:
3353 case nir_tex_src_min_lod:
3354 case nir_tex_src_ddx:
3355 case nir_tex_src_ddy:
3356 case nir_tex_src_backend1:
3357 case nir_tex_src_backend2:
3358 return nir_type_float;
3359
3360 case nir_tex_src_offset:
3361 case nir_tex_src_ms_index:
3362 case nir_tex_src_plane:
3363 return nir_type_int;
3364
3365 case nir_tex_src_sampler_deref_intrinsic:
3366 case nir_tex_src_texture_deref_intrinsic:
3367 case nir_tex_src_ms_mcs_intel:
3368 case nir_tex_src_texture_deref:
3369 case nir_tex_src_sampler_deref:
3370 case nir_tex_src_texture_offset:
3371 case nir_tex_src_sampler_offset:
3372 case nir_tex_src_texture_handle:
3373 case nir_tex_src_sampler_handle:
3374 return nir_type_uint;
3375
3376 case nir_num_tex_src_types:
3377 unreachable("nir_num_tex_src_types is not a valid source type");
3378 }
3379
3380 unreachable("Invalid texture source type");
3381 }
3382
3383 unsigned
nir_tex_instr_src_size(const nir_tex_instr * instr,unsigned src)3384 nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src)
3385 {
3386 if (instr->src[src].src_type == nir_tex_src_coord)
3387 return instr->coord_components;
3388
3389 /* The MCS value is expected to be a vec4 returned by a txf_ms_mcs_intel */
3390 if (instr->src[src].src_type == nir_tex_src_ms_mcs_intel)
3391 return 4;
3392
3393 if (instr->src[src].src_type == nir_tex_src_ddx ||
3394 instr->src[src].src_type == nir_tex_src_ddy) {
3395
3396 if (instr->is_array && !instr->array_is_lowered_cube)
3397 return instr->coord_components - 1;
3398 else
3399 return instr->coord_components;
3400 }
3401
3402 if (instr->src[src].src_type == nir_tex_src_offset) {
3403 if (instr->is_array)
3404 return instr->coord_components - 1;
3405 else
3406 return instr->coord_components;
3407 }
3408
3409 if (instr->src[src].src_type == nir_tex_src_backend1 ||
3410 instr->src[src].src_type == nir_tex_src_backend2)
3411 return nir_src_num_components(instr->src[src].src);
3412
3413 /* For AMD, this can be a vec8/vec4 image/sampler descriptor. */
3414 if (instr->src[src].src_type == nir_tex_src_texture_handle ||
3415 instr->src[src].src_type == nir_tex_src_sampler_handle)
3416 return 0;
3417
3418 return 1;
3419 }
3420
3421 /**
3422 * Return which components are written into transform feedback buffers.
3423 * The result is relative to 0, not "component".
3424 */
3425 unsigned
nir_instr_xfb_write_mask(nir_intrinsic_instr * instr)3426 nir_instr_xfb_write_mask(nir_intrinsic_instr *instr)
3427 {
3428 unsigned mask = 0;
3429
3430 if (nir_intrinsic_has_io_xfb(instr)) {
3431 unsigned wr_mask = nir_intrinsic_write_mask(instr) << nir_intrinsic_component(instr);
3432 assert((wr_mask & ~0xf) == 0); /* only 4 components allowed */
3433
3434 unsigned iter_mask = wr_mask;
3435 while (iter_mask) {
3436 unsigned i = u_bit_scan(&iter_mask);
3437 nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(instr) : nir_intrinsic_io_xfb2(instr);
3438 if (xfb.out[i % 2].num_components)
3439 mask |= BITFIELD_RANGE(i, xfb.out[i % 2].num_components) & wr_mask;
3440 }
3441 }
3442
3443 return mask;
3444 }
3445
3446 /**
3447 * Whether an output slot is consumed by fixed-function logic.
3448 */
3449 bool
nir_slot_is_sysval_output(gl_varying_slot slot,gl_shader_stage next_shader)3450 nir_slot_is_sysval_output(gl_varying_slot slot, gl_shader_stage next_shader)
3451 {
3452 switch (next_shader) {
3453 case MESA_SHADER_FRAGMENT:
3454 return slot == VARYING_SLOT_POS ||
3455 slot == VARYING_SLOT_PSIZ ||
3456 slot == VARYING_SLOT_EDGE ||
3457 slot == VARYING_SLOT_CLIP_VERTEX ||
3458 slot == VARYING_SLOT_CLIP_DIST0 ||
3459 slot == VARYING_SLOT_CLIP_DIST1 ||
3460 slot == VARYING_SLOT_CULL_DIST0 ||
3461 slot == VARYING_SLOT_CULL_DIST1 ||
3462 slot == VARYING_SLOT_LAYER ||
3463 slot == VARYING_SLOT_VIEWPORT ||
3464 slot == VARYING_SLOT_VIEW_INDEX ||
3465 slot == VARYING_SLOT_VIEWPORT_MASK ||
3466 slot == VARYING_SLOT_PRIMITIVE_SHADING_RATE ||
3467 /* NV_mesh_shader_only */
3468 slot == VARYING_SLOT_PRIMITIVE_COUNT ||
3469 slot == VARYING_SLOT_PRIMITIVE_INDICES;
3470
3471 case MESA_SHADER_TESS_EVAL:
3472 return slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
3473 slot == VARYING_SLOT_TESS_LEVEL_INNER ||
3474 slot == VARYING_SLOT_BOUNDING_BOX0 ||
3475 slot == VARYING_SLOT_BOUNDING_BOX1;
3476
3477 case MESA_SHADER_MESH:
3478 /* NV_mesh_shader only */
3479 return slot == VARYING_SLOT_TASK_COUNT;
3480
3481 case MESA_SHADER_NONE:
3482 /* NONE means unknown. Check all possibilities. */
3483 return nir_slot_is_sysval_output(slot, MESA_SHADER_FRAGMENT) ||
3484 nir_slot_is_sysval_output(slot, MESA_SHADER_TESS_EVAL) ||
3485 nir_slot_is_sysval_output(slot, MESA_SHADER_MESH);
3486
3487 default:
3488 /* No other shaders have preceding shaders with sysval outputs. */
3489 return false;
3490 }
3491 }
3492
3493 /**
3494 * Whether an input/output slot is consumed by the next shader stage,
3495 * or written by the previous shader stage.
3496 *
3497 * Pass MESA_SHADER_NONE if the next shader is unknown.
3498 */
3499 bool
nir_slot_is_varying(gl_varying_slot slot,gl_shader_stage next_shader)3500 nir_slot_is_varying(gl_varying_slot slot, gl_shader_stage next_shader)
3501 {
3502 bool unknown = next_shader == MESA_SHADER_NONE;
3503 bool exactly_before_fs = next_shader == MESA_SHADER_FRAGMENT || unknown;
3504 bool at_most_before_gs = next_shader <= MESA_SHADER_GEOMETRY || unknown;
3505
3506 return slot >= VARYING_SLOT_VAR0 ||
3507 (slot == VARYING_SLOT_POS && at_most_before_gs) ||
3508 slot == VARYING_SLOT_COL0 ||
3509 slot == VARYING_SLOT_COL1 ||
3510 slot == VARYING_SLOT_BFC0 ||
3511 slot == VARYING_SLOT_BFC1 ||
3512 slot == VARYING_SLOT_FOGC ||
3513 (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) ||
3514 slot == VARYING_SLOT_PNTC ||
3515 (slot == VARYING_SLOT_CLIP_VERTEX && at_most_before_gs) ||
3516 slot == VARYING_SLOT_CLIP_DIST0 ||
3517 slot == VARYING_SLOT_CLIP_DIST1 ||
3518 slot == VARYING_SLOT_CULL_DIST0 ||
3519 slot == VARYING_SLOT_CULL_DIST1 ||
3520 slot == VARYING_SLOT_PRIMITIVE_ID ||
3521 slot == VARYING_SLOT_LAYER ||
3522 slot == VARYING_SLOT_VIEWPORT ||
3523 slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
3524 slot == VARYING_SLOT_TESS_LEVEL_INNER ||
3525 (slot == VARYING_SLOT_VIEW_INDEX && exactly_before_fs);
3526 }
3527
3528 bool
nir_slot_is_sysval_output_and_varying(gl_varying_slot slot,gl_shader_stage next_shader)3529 nir_slot_is_sysval_output_and_varying(gl_varying_slot slot,
3530 gl_shader_stage next_shader)
3531 {
3532 return nir_slot_is_sysval_output(slot, next_shader) &&
3533 nir_slot_is_varying(slot, next_shader);
3534 }
3535
3536 /**
3537 * This marks the output store instruction as not feeding the next shader
3538 * stage. If the instruction has no other use, it's removed.
3539 */
3540 bool
nir_remove_varying(nir_intrinsic_instr * intr,gl_shader_stage next_shader)3541 nir_remove_varying(nir_intrinsic_instr *intr, gl_shader_stage next_shader)
3542 {
3543 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3544
3545 if ((!sem.no_sysval_output &&
3546 nir_slot_is_sysval_output(sem.location, next_shader)) ||
3547 nir_instr_xfb_write_mask(intr)) {
3548 /* Demote the store instruction. */
3549 sem.no_varying = true;
3550 nir_intrinsic_set_io_semantics(intr, sem);
3551 return false;
3552 } else {
3553 nir_instr_remove(&intr->instr);
3554 return true;
3555 }
3556 }
3557
3558 /**
3559 * This marks the output store instruction as not feeding fixed-function
3560 * logic. If the instruction has no other use, it's removed.
3561 */
3562 bool
nir_remove_sysval_output(nir_intrinsic_instr * intr,gl_shader_stage next_shader)3563 nir_remove_sysval_output(nir_intrinsic_instr *intr, gl_shader_stage next_shader)
3564 {
3565 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3566
3567 if ((!sem.no_varying && nir_slot_is_varying(sem.location, next_shader)) ||
3568 nir_instr_xfb_write_mask(intr)) {
3569 /* Demote the store instruction. */
3570 sem.no_sysval_output = true;
3571 nir_intrinsic_set_io_semantics(intr, sem);
3572 return false;
3573 } else {
3574 nir_instr_remove(&intr->instr);
3575 return true;
3576 }
3577 }
3578
3579 void
nir_remove_non_entrypoints(nir_shader * nir)3580 nir_remove_non_entrypoints(nir_shader *nir)
3581 {
3582 nir_foreach_function_safe(func, nir) {
3583 if (!func->is_entrypoint)
3584 exec_node_remove(&func->node);
3585 }
3586 assert(exec_list_length(&nir->functions) == 1);
3587 }
3588
3589 void
nir_remove_non_exported(nir_shader * nir)3590 nir_remove_non_exported(nir_shader *nir)
3591 {
3592 nir_foreach_function_safe(func, nir) {
3593 if (!func->is_exported)
3594 exec_node_remove(&func->node);
3595 }
3596 }
3597
3598 /*
3599 * After precompiling entrypoints from a kernel library, we want to garbage
3600 * collect the NIR entrypoints but leave the exported library functions. This
3601 * helper does that.
3602 */
3603 void
nir_remove_entrypoints(nir_shader * nir)3604 nir_remove_entrypoints(nir_shader *nir)
3605 {
3606 nir_foreach_entrypoint_safe(func, nir) {
3607 exec_node_remove(&func->node);
3608 }
3609 }
3610
3611 unsigned
nir_static_workgroup_size(const nir_shader * s)3612 nir_static_workgroup_size(const nir_shader *s)
3613 {
3614 return s->info.workgroup_size[0] * s->info.workgroup_size[1] *
3615 s->info.workgroup_size[2];
3616 }
3617
3618 bool
nir_block_contains_work(nir_block * block)3619 nir_block_contains_work(nir_block *block)
3620 {
3621 if (!nir_cf_node_is_last(&block->cf_node))
3622 return true;
3623
3624 nir_foreach_instr(instr, block) {
3625 if (instr->type == nir_instr_type_phi)
3626 continue;
3627 if (instr->type != nir_instr_type_alu ||
3628 !nir_op_is_vec_or_mov(nir_instr_as_alu(instr)->op))
3629 return true;
3630 }
3631
3632 return false;
3633 }
3634
3635 nir_op
nir_atomic_op_to_alu(nir_atomic_op op)3636 nir_atomic_op_to_alu(nir_atomic_op op)
3637 {
3638 switch (op) {
3639 case nir_atomic_op_iadd:
3640 return nir_op_iadd;
3641 case nir_atomic_op_imin:
3642 return nir_op_imin;
3643 case nir_atomic_op_umin:
3644 return nir_op_umin;
3645 case nir_atomic_op_imax:
3646 return nir_op_imax;
3647 case nir_atomic_op_umax:
3648 return nir_op_umax;
3649 case nir_atomic_op_iand:
3650 return nir_op_iand;
3651 case nir_atomic_op_ior:
3652 return nir_op_ior;
3653 case nir_atomic_op_ixor:
3654 return nir_op_ixor;
3655 case nir_atomic_op_fadd:
3656 return nir_op_fadd;
3657 case nir_atomic_op_fmin:
3658 return nir_op_fmin;
3659 case nir_atomic_op_fmax:
3660 return nir_op_fmax;
3661
3662 /* We don't handle exchanges or wraps */
3663 case nir_atomic_op_xchg:
3664 case nir_atomic_op_cmpxchg:
3665 case nir_atomic_op_fcmpxchg:
3666 case nir_atomic_op_inc_wrap:
3667 case nir_atomic_op_dec_wrap:
3668 case nir_atomic_op_ordered_add_gfx12_amd:
3669 return nir_num_opcodes;
3670 }
3671
3672 unreachable("Invalid nir_atomic_op");
3673 }
3674
3675