1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 *
26 */
27
28 #include "nir.h"
29 #include <assert.h>
30 #include <limits.h>
31 #include <math.h>
32 #include "util/half_float.h"
33 #include "util/macros.h"
34 #include "util/u_math.h"
35 #include "util/u_qsort.h"
36 #include "nir_builder.h"
37 #include "nir_control_flow_private.h"
38 #include "nir_worklist.h"
39
40 #ifndef NDEBUG
41 uint32_t nir_debug = 0;
42 bool nir_debug_print_shader[MESA_SHADER_KERNEL + 1] = { 0 };
43
44 static const struct debug_named_value nir_debug_control[] = {
45 { "clone", NIR_DEBUG_CLONE,
46 "Test cloning a shader at each successful lowering/optimization call" },
47 { "serialize", NIR_DEBUG_SERIALIZE,
48 "Test serialize and deserialize shader at each successful lowering/optimization call" },
49 { "novalidate", NIR_DEBUG_NOVALIDATE,
50 "Disable shader validation at each successful lowering/optimization call" },
51 { "validate_ssa_dominance", NIR_DEBUG_VALIDATE_SSA_DOMINANCE,
52 "Validate SSA dominance in shader at each successful lowering/optimization call" },
53 { "tgsi", NIR_DEBUG_TGSI,
54 "Dump NIR/TGSI shaders when doing a NIR<->TGSI translation" },
55 { "print", NIR_DEBUG_PRINT,
56 "Dump resulting shader after each successful lowering/optimization call" },
57 { "print_vs", NIR_DEBUG_PRINT_VS,
58 "Dump resulting vertex shader after each successful lowering/optimization call" },
59 { "print_tcs", NIR_DEBUG_PRINT_TCS,
60 "Dump resulting tessellation control shader after each successful lowering/optimization call" },
61 { "print_tes", NIR_DEBUG_PRINT_TES,
62 "Dump resulting tessellation evaluation shader after each successful lowering/optimization call" },
63 { "print_gs", NIR_DEBUG_PRINT_GS,
64 "Dump resulting geometry shader after each successful lowering/optimization call" },
65 { "print_fs", NIR_DEBUG_PRINT_FS,
66 "Dump resulting fragment shader after each successful lowering/optimization call" },
67 { "print_cs", NIR_DEBUG_PRINT_CS,
68 "Dump resulting compute shader after each successful lowering/optimization call" },
69 { "print_ts", NIR_DEBUG_PRINT_TS,
70 "Dump resulting task shader after each successful lowering/optimization call" },
71 { "print_ms", NIR_DEBUG_PRINT_MS,
72 "Dump resulting mesh shader after each successful lowering/optimization call" },
73 { "print_rgs", NIR_DEBUG_PRINT_RGS,
74 "Dump resulting raygen shader after each successful lowering/optimization call" },
75 { "print_ahs", NIR_DEBUG_PRINT_AHS,
76 "Dump resulting any-hit shader after each successful lowering/optimization call" },
77 { "print_chs", NIR_DEBUG_PRINT_CHS,
78 "Dump resulting closest-hit shader after each successful lowering/optimization call" },
79 { "print_mhs", NIR_DEBUG_PRINT_MHS,
80 "Dump resulting miss-hit shader after each successful lowering/optimization call" },
81 { "print_is", NIR_DEBUG_PRINT_IS,
82 "Dump resulting intersection shader after each successful lowering/optimization call" },
83 { "print_cbs", NIR_DEBUG_PRINT_CBS,
84 "Dump resulting callable shader after each successful lowering/optimization call" },
85 { "print_ks", NIR_DEBUG_PRINT_KS,
86 "Dump resulting kernel shader after each successful lowering/optimization call" },
87 { "print_no_inline_consts", NIR_DEBUG_PRINT_NO_INLINE_CONSTS,
88 "Do not print const value near each use of const SSA variable" },
89 { "print_internal", NIR_DEBUG_PRINT_INTERNAL,
90 "Print shaders even if they are marked as internal" },
91 { "print_pass_flags", NIR_DEBUG_PRINT_PASS_FLAGS,
92 "Print pass_flags for every instruction when pass_flags are non-zero" },
93 DEBUG_NAMED_VALUE_END
94 };
95
96 DEBUG_GET_ONCE_FLAGS_OPTION(nir_debug, "NIR_DEBUG", nir_debug_control, 0)
97
98 static void
nir_process_debug_variable_once(void)99 nir_process_debug_variable_once(void)
100 {
101 nir_debug = debug_get_option_nir_debug();
102
103 /* clang-format off */
104 nir_debug_print_shader[MESA_SHADER_VERTEX] = NIR_DEBUG(PRINT_VS);
105 nir_debug_print_shader[MESA_SHADER_TESS_CTRL] = NIR_DEBUG(PRINT_TCS);
106 nir_debug_print_shader[MESA_SHADER_TESS_EVAL] = NIR_DEBUG(PRINT_TES);
107 nir_debug_print_shader[MESA_SHADER_GEOMETRY] = NIR_DEBUG(PRINT_GS);
108 nir_debug_print_shader[MESA_SHADER_FRAGMENT] = NIR_DEBUG(PRINT_FS);
109 nir_debug_print_shader[MESA_SHADER_COMPUTE] = NIR_DEBUG(PRINT_CS);
110 nir_debug_print_shader[MESA_SHADER_TASK] = NIR_DEBUG(PRINT_TS);
111 nir_debug_print_shader[MESA_SHADER_MESH] = NIR_DEBUG(PRINT_MS);
112 nir_debug_print_shader[MESA_SHADER_RAYGEN] = NIR_DEBUG(PRINT_RGS);
113 nir_debug_print_shader[MESA_SHADER_ANY_HIT] = NIR_DEBUG(PRINT_AHS);
114 nir_debug_print_shader[MESA_SHADER_CLOSEST_HIT] = NIR_DEBUG(PRINT_CHS);
115 nir_debug_print_shader[MESA_SHADER_MISS] = NIR_DEBUG(PRINT_MHS);
116 nir_debug_print_shader[MESA_SHADER_INTERSECTION] = NIR_DEBUG(PRINT_IS);
117 nir_debug_print_shader[MESA_SHADER_CALLABLE] = NIR_DEBUG(PRINT_CBS);
118 nir_debug_print_shader[MESA_SHADER_KERNEL] = NIR_DEBUG(PRINT_KS);
119 /* clang-format on */
120 }
121
122 void
nir_process_debug_variable(void)123 nir_process_debug_variable(void)
124 {
125 static once_flag flag = ONCE_FLAG_INIT;
126 call_once(&flag, nir_process_debug_variable_once);
127 }
128 #endif
129
130 /** Return true if the component mask "mask" with bit size "old_bit_size" can
131 * be re-interpreted to be used with "new_bit_size".
132 */
133 bool
nir_component_mask_can_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)134 nir_component_mask_can_reinterpret(nir_component_mask_t mask,
135 unsigned old_bit_size,
136 unsigned new_bit_size)
137 {
138 assert(util_is_power_of_two_nonzero(old_bit_size));
139 assert(util_is_power_of_two_nonzero(new_bit_size));
140
141 if (old_bit_size == new_bit_size)
142 return true;
143
144 if (old_bit_size == 1 || new_bit_size == 1)
145 return false;
146
147 if (old_bit_size > new_bit_size) {
148 unsigned ratio = old_bit_size / new_bit_size;
149 return util_last_bit(mask) * ratio <= NIR_MAX_VEC_COMPONENTS;
150 }
151
152 unsigned iter = mask;
153 while (iter) {
154 int start, count;
155 u_bit_scan_consecutive_range(&iter, &start, &count);
156 start *= old_bit_size;
157 count *= old_bit_size;
158 if (start % new_bit_size != 0)
159 return false;
160 if (count % new_bit_size != 0)
161 return false;
162 }
163 return true;
164 }
165
166 /** Re-interprets a component mask "mask" with bit size "old_bit_size" so that
167 * it can be used can be used with "new_bit_size".
168 */
169 nir_component_mask_t
nir_component_mask_reinterpret(nir_component_mask_t mask,unsigned old_bit_size,unsigned new_bit_size)170 nir_component_mask_reinterpret(nir_component_mask_t mask,
171 unsigned old_bit_size,
172 unsigned new_bit_size)
173 {
174 assert(nir_component_mask_can_reinterpret(mask, old_bit_size, new_bit_size));
175
176 if (old_bit_size == new_bit_size)
177 return mask;
178
179 nir_component_mask_t new_mask = 0;
180 unsigned iter = mask;
181 while (iter) {
182 int start, count;
183 u_bit_scan_consecutive_range(&iter, &start, &count);
184 start = start * old_bit_size / new_bit_size;
185 count = count * old_bit_size / new_bit_size;
186 new_mask |= BITFIELD_RANGE(start, count);
187 }
188 return new_mask;
189 }
190
191 nir_shader *
nir_shader_create(void * mem_ctx,gl_shader_stage stage,const nir_shader_compiler_options * options,shader_info * si)192 nir_shader_create(void *mem_ctx,
193 gl_shader_stage stage,
194 const nir_shader_compiler_options *options,
195 shader_info *si)
196 {
197 nir_shader *shader = rzalloc(mem_ctx, nir_shader);
198
199 shader->gctx = gc_context(shader);
200
201 #ifndef NDEBUG
202 nir_process_debug_variable();
203 #endif
204
205 exec_list_make_empty(&shader->variables);
206
207 shader->options = options;
208
209 if (si) {
210 assert(si->stage == stage);
211 shader->info = *si;
212 } else {
213 shader->info.stage = stage;
214 }
215
216 exec_list_make_empty(&shader->functions);
217
218 shader->num_inputs = 0;
219 shader->num_outputs = 0;
220 shader->num_uniforms = 0;
221
222 return shader;
223 }
224
225 void
nir_shader_add_variable(nir_shader * shader,nir_variable * var)226 nir_shader_add_variable(nir_shader *shader, nir_variable *var)
227 {
228 switch (var->data.mode) {
229 case nir_var_function_temp:
230 assert(!"nir_shader_add_variable cannot be used for local variables");
231 return;
232
233 case nir_var_shader_temp:
234 case nir_var_shader_in:
235 case nir_var_shader_out:
236 case nir_var_uniform:
237 case nir_var_mem_ubo:
238 case nir_var_mem_ssbo:
239 case nir_var_image:
240 case nir_var_mem_shared:
241 case nir_var_system_value:
242 case nir_var_mem_push_const:
243 case nir_var_mem_constant:
244 case nir_var_shader_call_data:
245 case nir_var_ray_hit_attrib:
246 case nir_var_mem_task_payload:
247 case nir_var_mem_node_payload:
248 case nir_var_mem_node_payload_in:
249 case nir_var_mem_global:
250 break;
251
252 default:
253 assert(!"invalid mode");
254 return;
255 }
256
257 exec_list_push_tail(&shader->variables, &var->node);
258 }
259
260 nir_variable *
nir_variable_create(nir_shader * shader,nir_variable_mode mode,const struct glsl_type * type,const char * name)261 nir_variable_create(nir_shader *shader, nir_variable_mode mode,
262 const struct glsl_type *type, const char *name)
263 {
264 nir_variable *var = rzalloc(shader, nir_variable);
265 var->name = ralloc_strdup(var, name);
266 var->type = type;
267 var->data.mode = mode;
268 var->data.how_declared = nir_var_declared_normally;
269
270 if ((mode == nir_var_shader_in &&
271 shader->info.stage != MESA_SHADER_VERTEX &&
272 shader->info.stage != MESA_SHADER_KERNEL) ||
273 (mode == nir_var_shader_out &&
274 shader->info.stage != MESA_SHADER_FRAGMENT))
275 var->data.interpolation = INTERP_MODE_SMOOTH;
276
277 if (mode == nir_var_shader_in || mode == nir_var_uniform)
278 var->data.read_only = true;
279
280 nir_shader_add_variable(shader, var);
281
282 return var;
283 }
284
285 nir_variable *
nir_local_variable_create(nir_function_impl * impl,const struct glsl_type * type,const char * name)286 nir_local_variable_create(nir_function_impl *impl,
287 const struct glsl_type *type, const char *name)
288 {
289 nir_variable *var = rzalloc(impl->function->shader, nir_variable);
290 var->name = ralloc_strdup(var, name);
291 var->type = type;
292 var->data.mode = nir_var_function_temp;
293
294 nir_function_impl_add_variable(impl, var);
295
296 return var;
297 }
298
299 nir_variable *
nir_state_variable_create(nir_shader * shader,const struct glsl_type * type,const char * name,const gl_state_index16 tokens[STATE_LENGTH])300 nir_state_variable_create(nir_shader *shader,
301 const struct glsl_type *type,
302 const char *name,
303 const gl_state_index16 tokens[STATE_LENGTH])
304 {
305 nir_variable *var = nir_variable_create(shader, nir_var_uniform, type, name);
306 var->num_state_slots = 1;
307 var->state_slots = rzalloc_array(var, nir_state_slot, 1);
308 memcpy(var->state_slots[0].tokens, tokens,
309 sizeof(var->state_slots[0].tokens));
310 shader->num_uniforms++;
311 return var;
312 }
313
314 nir_variable *
nir_create_variable_with_location(nir_shader * shader,nir_variable_mode mode,int location,const struct glsl_type * type)315 nir_create_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location,
316 const struct glsl_type *type)
317 {
318 /* Only supporting non-array, or arrayed-io types, because otherwise we don't
319 * know how much to increment num_inputs/outputs
320 */
321 assert(glsl_type_is_vector_or_scalar(type) || glsl_type_is_unsized_array(type));
322
323 const char *name;
324 switch (mode) {
325 case nir_var_shader_in:
326 if (shader->info.stage == MESA_SHADER_VERTEX)
327 name = gl_vert_attrib_name(location);
328 else
329 name = gl_varying_slot_name_for_stage(location, shader->info.stage);
330 break;
331
332 case nir_var_shader_out:
333 if (shader->info.stage == MESA_SHADER_FRAGMENT)
334 name = gl_frag_result_name(location);
335 else
336 name = gl_varying_slot_name_for_stage(location, shader->info.stage);
337 break;
338
339 case nir_var_system_value:
340 name = gl_system_value_name(location);
341 break;
342
343 default:
344 unreachable("Unsupported variable mode");
345 }
346
347 nir_variable *var = nir_variable_create(shader, mode, type, name);
348 var->data.location = location;
349
350 switch (mode) {
351 case nir_var_shader_in:
352 var->data.driver_location = shader->num_inputs++;
353 break;
354
355 case nir_var_shader_out:
356 var->data.driver_location = shader->num_outputs++;
357 break;
358
359 case nir_var_system_value:
360 break;
361
362 default:
363 unreachable("Unsupported variable mode");
364 }
365
366 return var;
367 }
368
369 nir_variable *
nir_get_variable_with_location(nir_shader * shader,nir_variable_mode mode,int location,const struct glsl_type * type)370 nir_get_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location,
371 const struct glsl_type *type)
372 {
373 nir_variable *var = nir_find_variable_with_location(shader, mode, location);
374 if (var) {
375 /* If this shader has location_fracs, this builder function is not suitable. */
376 assert(var->data.location_frac == 0);
377
378 /* The variable for the slot should match what we expected. */
379 assert(type == var->type);
380 return var;
381 }
382
383 return nir_create_variable_with_location(shader, mode, location, type);
384 }
385
386 nir_variable *
nir_find_variable_with_location(nir_shader * shader,nir_variable_mode mode,unsigned location)387 nir_find_variable_with_location(nir_shader *shader,
388 nir_variable_mode mode,
389 unsigned location)
390 {
391 assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
392 nir_foreach_variable_with_modes(var, shader, mode) {
393 if (var->data.location == location)
394 return var;
395 }
396 return NULL;
397 }
398
399 nir_variable *
nir_find_variable_with_driver_location(nir_shader * shader,nir_variable_mode mode,unsigned location)400 nir_find_variable_with_driver_location(nir_shader *shader,
401 nir_variable_mode mode,
402 unsigned location)
403 {
404 assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
405 nir_foreach_variable_with_modes(var, shader, mode) {
406 if (var->data.driver_location == location)
407 return var;
408 }
409 return NULL;
410 }
411
412 nir_variable *
nir_find_state_variable(nir_shader * s,gl_state_index16 tokens[STATE_LENGTH])413 nir_find_state_variable(nir_shader *s,
414 gl_state_index16 tokens[STATE_LENGTH])
415 {
416 nir_foreach_variable_with_modes(var, s, nir_var_uniform) {
417 if (var->num_state_slots == 1 &&
418 !memcmp(var->state_slots[0].tokens, tokens,
419 sizeof(var->state_slots[0].tokens)))
420 return var;
421 }
422 return NULL;
423 }
424
nir_find_sampler_variable_with_tex_index(nir_shader * shader,unsigned texture_index)425 nir_variable *nir_find_sampler_variable_with_tex_index(nir_shader *shader,
426 unsigned texture_index)
427 {
428 nir_foreach_variable_with_modes(var, shader, nir_var_uniform) {
429 unsigned size =
430 glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
431 if ((glsl_type_is_texture(glsl_without_array(var->type)) ||
432 glsl_type_is_sampler(glsl_without_array(var->type))) &&
433 (var->data.binding == texture_index ||
434 (var->data.binding < texture_index &&
435 var->data.binding + size > texture_index)))
436 return var;
437 }
438 return NULL;
439 }
440
441 /* Annoyingly, qsort_r is not in the C standard library and, in particular, we
442 * can't count on it on MSV and Android. So we stuff the CMP function into
443 * each array element. It's a bit messy and burns more memory but the list of
444 * variables should hever be all that long.
445 */
446 struct var_cmp {
447 nir_variable *var;
448 int (*cmp)(const nir_variable *, const nir_variable *);
449 };
450
451 static int
var_sort_cmp(const void * _a,const void * _b,void * _cmp)452 var_sort_cmp(const void *_a, const void *_b, void *_cmp)
453 {
454 const struct var_cmp *a = _a;
455 const struct var_cmp *b = _b;
456 assert(a->cmp == b->cmp);
457 return a->cmp(a->var, b->var);
458 }
459
460 void
nir_sort_variables_with_modes(nir_shader * shader,int (* cmp)(const nir_variable *,const nir_variable *),nir_variable_mode modes)461 nir_sort_variables_with_modes(nir_shader *shader,
462 int (*cmp)(const nir_variable *,
463 const nir_variable *),
464 nir_variable_mode modes)
465 {
466 unsigned num_vars = 0;
467 nir_foreach_variable_with_modes(var, shader, modes) {
468 ++num_vars;
469 }
470 struct var_cmp *vars = ralloc_array(shader, struct var_cmp, num_vars);
471 unsigned i = 0;
472 nir_foreach_variable_with_modes_safe(var, shader, modes) {
473 exec_node_remove(&var->node);
474 vars[i++] = (struct var_cmp){
475 .var = var,
476 .cmp = cmp,
477 };
478 }
479 assert(i == num_vars);
480
481 util_qsort_r(vars, num_vars, sizeof(*vars), var_sort_cmp, cmp);
482
483 for (i = 0; i < num_vars; i++)
484 exec_list_push_tail(&shader->variables, &vars[i].var->node);
485
486 ralloc_free(vars);
487 }
488
489 nir_function *
nir_function_create(nir_shader * shader,const char * name)490 nir_function_create(nir_shader *shader, const char *name)
491 {
492 nir_function *func = ralloc(shader, nir_function);
493
494 exec_list_push_tail(&shader->functions, &func->node);
495
496 func->name = ralloc_strdup(func, name);
497 func->shader = shader;
498 func->num_params = 0;
499 func->params = NULL;
500 func->impl = NULL;
501 func->is_entrypoint = false;
502 func->is_preamble = false;
503 func->dont_inline = false;
504 func->should_inline = false;
505
506 /* Only meaningful for shader libraries, so don't export by default. */
507 func->is_exported = false;
508
509 return func;
510 }
511
512 void
nir_alu_src_copy(nir_alu_src * dest,const nir_alu_src * src)513 nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src)
514 {
515 dest->src = nir_src_for_ssa(src->src.ssa);
516 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
517 dest->swizzle[i] = src->swizzle[i];
518 }
519
520 bool
nir_alu_src_is_trivial_ssa(const nir_alu_instr * alu,unsigned srcn)521 nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn)
522 {
523 static uint8_t trivial_swizzle[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
524 STATIC_ASSERT(ARRAY_SIZE(trivial_swizzle) == NIR_MAX_VEC_COMPONENTS);
525
526 const nir_alu_src *src = &alu->src[srcn];
527 unsigned num_components = nir_ssa_alu_instr_src_components(alu, srcn);
528
529 return (src->src.ssa->num_components == num_components) &&
530 (memcmp(src->swizzle, trivial_swizzle, num_components) == 0);
531 }
532
533 static void
cf_init(nir_cf_node * node,nir_cf_node_type type)534 cf_init(nir_cf_node *node, nir_cf_node_type type)
535 {
536 exec_node_init(&node->node);
537 node->parent = NULL;
538 node->type = type;
539 }
540
541 nir_function_impl *
nir_function_impl_create_bare(nir_shader * shader)542 nir_function_impl_create_bare(nir_shader *shader)
543 {
544 nir_function_impl *impl = ralloc(shader, nir_function_impl);
545
546 impl->function = NULL;
547 impl->preamble = NULL;
548
549 cf_init(&impl->cf_node, nir_cf_node_function);
550
551 exec_list_make_empty(&impl->body);
552 exec_list_make_empty(&impl->locals);
553 impl->ssa_alloc = 0;
554 impl->num_blocks = 0;
555 impl->valid_metadata = nir_metadata_none;
556 impl->structured = true;
557
558 /* create start & end blocks */
559 nir_block *start_block = nir_block_create(shader);
560 nir_block *end_block = nir_block_create(shader);
561 start_block->cf_node.parent = &impl->cf_node;
562 end_block->cf_node.parent = &impl->cf_node;
563 impl->end_block = end_block;
564
565 exec_list_push_tail(&impl->body, &start_block->cf_node.node);
566
567 start_block->successors[0] = end_block;
568 _mesa_set_add(end_block->predecessors, start_block);
569 return impl;
570 }
571
572 nir_function_impl *
nir_function_impl_create(nir_function * function)573 nir_function_impl_create(nir_function *function)
574 {
575 assert(function->impl == NULL);
576
577 nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
578 nir_function_set_impl(function, impl);
579 return impl;
580 }
581
582 nir_block *
nir_block_create(nir_shader * shader)583 nir_block_create(nir_shader *shader)
584 {
585 nir_block *block = rzalloc(shader, nir_block);
586
587 cf_init(&block->cf_node, nir_cf_node_block);
588
589 block->successors[0] = block->successors[1] = NULL;
590 block->predecessors = _mesa_pointer_set_create(block);
591 block->imm_dom = NULL;
592 /* XXX maybe it would be worth it to defer allocation? This
593 * way it doesn't get allocated for shader refs that never run
594 * nir_calc_dominance? For example, state-tracker creates an
595 * initial IR, clones that, runs appropriate lowering pass, passes
596 * to driver which does common lowering/opt, and then stores ref
597 * which is later used to do state specific lowering and futher
598 * opt. Do any of the references not need dominance metadata?
599 */
600 block->dom_frontier = _mesa_pointer_set_create(block);
601
602 exec_list_make_empty(&block->instr_list);
603
604 return block;
605 }
606
607 static inline void
src_init(nir_src * src)608 src_init(nir_src *src)
609 {
610 src->ssa = NULL;
611 }
612
613 nir_if *
nir_if_create(nir_shader * shader)614 nir_if_create(nir_shader *shader)
615 {
616 nir_if *if_stmt = ralloc(shader, nir_if);
617
618 if_stmt->control = nir_selection_control_none;
619
620 cf_init(&if_stmt->cf_node, nir_cf_node_if);
621 src_init(&if_stmt->condition);
622
623 nir_block *then = nir_block_create(shader);
624 exec_list_make_empty(&if_stmt->then_list);
625 exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
626 then->cf_node.parent = &if_stmt->cf_node;
627
628 nir_block *else_stmt = nir_block_create(shader);
629 exec_list_make_empty(&if_stmt->else_list);
630 exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
631 else_stmt->cf_node.parent = &if_stmt->cf_node;
632
633 return if_stmt;
634 }
635
636 nir_loop *
nir_loop_create(nir_shader * shader)637 nir_loop_create(nir_shader *shader)
638 {
639 nir_loop *loop = rzalloc(shader, nir_loop);
640
641 cf_init(&loop->cf_node, nir_cf_node_loop);
642 /* Assume that loops are divergent until proven otherwise */
643 loop->divergent = true;
644
645 nir_block *body = nir_block_create(shader);
646 exec_list_make_empty(&loop->body);
647 exec_list_push_tail(&loop->body, &body->cf_node.node);
648 body->cf_node.parent = &loop->cf_node;
649
650 body->successors[0] = body;
651 _mesa_set_add(body->predecessors, body);
652
653 exec_list_make_empty(&loop->continue_list);
654
655 return loop;
656 }
657
658 static void
instr_init(nir_instr * instr,nir_instr_type type)659 instr_init(nir_instr *instr, nir_instr_type type)
660 {
661 instr->type = type;
662 instr->block = NULL;
663 exec_node_init(&instr->node);
664 }
665
666 static void
alu_src_init(nir_alu_src * src)667 alu_src_init(nir_alu_src *src)
668 {
669 src_init(&src->src);
670 for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
671 src->swizzle[i] = i;
672 }
673
674 nir_alu_instr *
nir_alu_instr_create(nir_shader * shader,nir_op op)675 nir_alu_instr_create(nir_shader *shader, nir_op op)
676 {
677 unsigned num_srcs = nir_op_infos[op].num_inputs;
678 nir_alu_instr *instr = gc_zalloc_zla(shader->gctx, nir_alu_instr, nir_alu_src, num_srcs);
679
680 instr_init(&instr->instr, nir_instr_type_alu);
681 instr->op = op;
682 for (unsigned i = 0; i < num_srcs; i++)
683 alu_src_init(&instr->src[i]);
684
685 return instr;
686 }
687
688 nir_deref_instr *
nir_deref_instr_create(nir_shader * shader,nir_deref_type deref_type)689 nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
690 {
691 nir_deref_instr *instr = gc_zalloc(shader->gctx, nir_deref_instr, 1);
692
693 instr_init(&instr->instr, nir_instr_type_deref);
694
695 instr->deref_type = deref_type;
696 if (deref_type != nir_deref_type_var)
697 src_init(&instr->parent);
698
699 if (deref_type == nir_deref_type_array ||
700 deref_type == nir_deref_type_ptr_as_array)
701 src_init(&instr->arr.index);
702
703 return instr;
704 }
705
706 nir_jump_instr *
nir_jump_instr_create(nir_shader * shader,nir_jump_type type)707 nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
708 {
709 nir_jump_instr *instr = gc_alloc(shader->gctx, nir_jump_instr, 1);
710 instr_init(&instr->instr, nir_instr_type_jump);
711 src_init(&instr->condition);
712 instr->type = type;
713 instr->target = NULL;
714 instr->else_target = NULL;
715
716 return instr;
717 }
718
719 nir_load_const_instr *
nir_load_const_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)720 nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
721 unsigned bit_size)
722 {
723 nir_load_const_instr *instr =
724 gc_zalloc_zla(shader->gctx, nir_load_const_instr, nir_const_value, num_components);
725 instr_init(&instr->instr, nir_instr_type_load_const);
726
727 nir_def_init(&instr->instr, &instr->def, num_components, bit_size);
728
729 return instr;
730 }
731
732 nir_intrinsic_instr *
nir_intrinsic_instr_create(nir_shader * shader,nir_intrinsic_op op)733 nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
734 {
735 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
736 nir_intrinsic_instr *instr =
737 gc_zalloc_zla(shader->gctx, nir_intrinsic_instr, nir_src, num_srcs);
738
739 instr_init(&instr->instr, nir_instr_type_intrinsic);
740 instr->intrinsic = op;
741
742 for (unsigned i = 0; i < num_srcs; i++)
743 src_init(&instr->src[i]);
744
745 return instr;
746 }
747
748 nir_call_instr *
nir_call_instr_create(nir_shader * shader,nir_function * callee)749 nir_call_instr_create(nir_shader *shader, nir_function *callee)
750 {
751 const unsigned num_params = callee->num_params;
752 nir_call_instr *instr =
753 gc_zalloc_zla(shader->gctx, nir_call_instr, nir_src, num_params);
754
755 instr_init(&instr->instr, nir_instr_type_call);
756 instr->callee = callee;
757 instr->num_params = num_params;
758 for (unsigned i = 0; i < num_params; i++)
759 src_init(&instr->params[i]);
760
761 return instr;
762 }
763
764 static int8_t default_tg4_offsets[4][2] = {
765 { 0, 1 },
766 { 1, 1 },
767 { 1, 0 },
768 { 0, 0 },
769 };
770
771 nir_tex_instr *
nir_tex_instr_create(nir_shader * shader,unsigned num_srcs)772 nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
773 {
774 nir_tex_instr *instr = gc_zalloc(shader->gctx, nir_tex_instr, 1);
775 instr_init(&instr->instr, nir_instr_type_tex);
776
777 instr->num_srcs = num_srcs;
778 instr->src = gc_alloc(shader->gctx, nir_tex_src, num_srcs);
779 for (unsigned i = 0; i < num_srcs; i++)
780 src_init(&instr->src[i].src);
781
782 instr->texture_index = 0;
783 instr->sampler_index = 0;
784 memcpy(instr->tg4_offsets, default_tg4_offsets, sizeof(instr->tg4_offsets));
785
786 return instr;
787 }
788
789 void
nir_tex_instr_add_src(nir_tex_instr * tex,nir_tex_src_type src_type,nir_def * src)790 nir_tex_instr_add_src(nir_tex_instr *tex,
791 nir_tex_src_type src_type,
792 nir_def *src)
793 {
794 nir_tex_src *new_srcs = gc_zalloc(gc_get_context(tex), nir_tex_src, tex->num_srcs + 1);
795
796 for (unsigned i = 0; i < tex->num_srcs; i++) {
797 new_srcs[i].src_type = tex->src[i].src_type;
798 nir_instr_move_src(&tex->instr, &new_srcs[i].src,
799 &tex->src[i].src);
800 }
801
802 gc_free(tex->src);
803 tex->src = new_srcs;
804
805 tex->src[tex->num_srcs].src_type = src_type;
806 nir_instr_init_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
807 tex->num_srcs++;
808 }
809
810 void
nir_tex_instr_remove_src(nir_tex_instr * tex,unsigned src_idx)811 nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
812 {
813 assert(src_idx < tex->num_srcs);
814
815 /* First rewrite the source to NIR_SRC_INIT */
816 nir_instr_clear_src(&tex->instr, &tex->src[src_idx].src);
817
818 /* Now, move all of the other sources down */
819 for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
820 tex->src[i - 1].src_type = tex->src[i].src_type;
821 nir_instr_move_src(&tex->instr, &tex->src[i - 1].src, &tex->src[i].src);
822 }
823 tex->num_srcs--;
824 }
825
826 bool
nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr * tex)827 nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex)
828 {
829 if (tex->op != nir_texop_tg4)
830 return false;
831 return memcmp(tex->tg4_offsets, default_tg4_offsets,
832 sizeof(tex->tg4_offsets)) != 0;
833 }
834
835 nir_phi_instr *
nir_phi_instr_create(nir_shader * shader)836 nir_phi_instr_create(nir_shader *shader)
837 {
838 nir_phi_instr *instr = gc_alloc(shader->gctx, nir_phi_instr, 1);
839 instr_init(&instr->instr, nir_instr_type_phi);
840
841 exec_list_make_empty(&instr->srcs);
842
843 return instr;
844 }
845
846 /**
847 * Adds a new source to a NIR instruction.
848 *
849 * Note that this does not update the def/use relationship for src, assuming
850 * that the instr is not in the shader. If it is, you have to do:
851 *
852 * list_addtail(&phi_src->src.use_link, &src.ssa->uses);
853 */
854 nir_phi_src *
nir_phi_instr_add_src(nir_phi_instr * instr,nir_block * pred,nir_def * src)855 nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_def *src)
856 {
857 nir_phi_src *phi_src;
858
859 phi_src = gc_zalloc(gc_get_context(instr), nir_phi_src, 1);
860 phi_src->pred = pred;
861 phi_src->src = nir_src_for_ssa(src);
862 nir_src_set_parent_instr(&phi_src->src, &instr->instr);
863 exec_list_push_tail(&instr->srcs, &phi_src->node);
864
865 return phi_src;
866 }
867
868 nir_parallel_copy_instr *
nir_parallel_copy_instr_create(nir_shader * shader)869 nir_parallel_copy_instr_create(nir_shader *shader)
870 {
871 nir_parallel_copy_instr *instr = gc_alloc(shader->gctx, nir_parallel_copy_instr, 1);
872 instr_init(&instr->instr, nir_instr_type_parallel_copy);
873
874 exec_list_make_empty(&instr->entries);
875
876 return instr;
877 }
878
879 nir_undef_instr *
nir_undef_instr_create(nir_shader * shader,unsigned num_components,unsigned bit_size)880 nir_undef_instr_create(nir_shader *shader,
881 unsigned num_components,
882 unsigned bit_size)
883 {
884 nir_undef_instr *instr = gc_alloc(shader->gctx, nir_undef_instr, 1);
885 instr_init(&instr->instr, nir_instr_type_undef);
886
887 nir_def_init(&instr->instr, &instr->def, num_components, bit_size);
888
889 return instr;
890 }
891
892 static nir_const_value
const_value_float(double d,unsigned bit_size)893 const_value_float(double d, unsigned bit_size)
894 {
895 nir_const_value v;
896 memset(&v, 0, sizeof(v));
897
898 /* clang-format off */
899 switch (bit_size) {
900 case 16: v.u16 = _mesa_float_to_half(d); break;
901 case 32: v.f32 = d; break;
902 case 64: v.f64 = d; break;
903 default:
904 unreachable("Invalid bit size");
905 }
906 /* clang-format on */
907
908 return v;
909 }
910
911 static nir_const_value
const_value_int(int64_t i,unsigned bit_size)912 const_value_int(int64_t i, unsigned bit_size)
913 {
914 nir_const_value v;
915 memset(&v, 0, sizeof(v));
916
917 /* clang-format off */
918 switch (bit_size) {
919 case 1: v.b = i & 1; break;
920 case 8: v.i8 = i; break;
921 case 16: v.i16 = i; break;
922 case 32: v.i32 = i; break;
923 case 64: v.i64 = i; break;
924 default:
925 unreachable("Invalid bit size");
926 }
927 /* clang-format on */
928
929 return v;
930 }
931
932 nir_const_value
nir_alu_binop_identity(nir_op binop,unsigned bit_size)933 nir_alu_binop_identity(nir_op binop, unsigned bit_size)
934 {
935 const int64_t max_int = (1ull << (bit_size - 1)) - 1;
936 const int64_t min_int = -max_int - 1;
937 switch (binop) {
938 case nir_op_iadd:
939 return const_value_int(0, bit_size);
940 case nir_op_fadd:
941 return const_value_float(0, bit_size);
942 case nir_op_imul:
943 return const_value_int(1, bit_size);
944 case nir_op_fmul:
945 return const_value_float(1, bit_size);
946 case nir_op_imin:
947 return const_value_int(max_int, bit_size);
948 case nir_op_umin:
949 return const_value_int(~0ull, bit_size);
950 case nir_op_fmin:
951 return const_value_float(INFINITY, bit_size);
952 case nir_op_imax:
953 return const_value_int(min_int, bit_size);
954 case nir_op_umax:
955 return const_value_int(0, bit_size);
956 case nir_op_fmax:
957 return const_value_float(-INFINITY, bit_size);
958 case nir_op_iand:
959 return const_value_int(~0ull, bit_size);
960 case nir_op_ior:
961 return const_value_int(0, bit_size);
962 case nir_op_ixor:
963 return const_value_int(0, bit_size);
964 default:
965 unreachable("Invalid reduction operation");
966 }
967 }
968
969 nir_function_impl *
nir_cf_node_get_function(nir_cf_node * node)970 nir_cf_node_get_function(nir_cf_node *node)
971 {
972 while (node->type != nir_cf_node_function) {
973 node = node->parent;
974 }
975
976 return nir_cf_node_as_function(node);
977 }
978
979 /* Reduces a cursor by trying to convert everything to after and trying to
980 * go up to block granularity when possible.
981 */
982 static nir_cursor
reduce_cursor(nir_cursor cursor)983 reduce_cursor(nir_cursor cursor)
984 {
985 switch (cursor.option) {
986 case nir_cursor_before_block:
987 if (exec_list_is_empty(&cursor.block->instr_list)) {
988 /* Empty block. After is as good as before. */
989 cursor.option = nir_cursor_after_block;
990 }
991 return cursor;
992
993 case nir_cursor_after_block:
994 return cursor;
995
996 case nir_cursor_before_instr: {
997 nir_instr *prev_instr = nir_instr_prev(cursor.instr);
998 if (prev_instr) {
999 /* Before this instruction is after the previous */
1000 cursor.instr = prev_instr;
1001 cursor.option = nir_cursor_after_instr;
1002 } else {
1003 /* No previous instruction. Switch to before block */
1004 cursor.block = cursor.instr->block;
1005 cursor.option = nir_cursor_before_block;
1006 }
1007 return reduce_cursor(cursor);
1008 }
1009
1010 case nir_cursor_after_instr:
1011 if (nir_instr_next(cursor.instr) == NULL) {
1012 /* This is the last instruction, switch to after block */
1013 cursor.option = nir_cursor_after_block;
1014 cursor.block = cursor.instr->block;
1015 }
1016 return cursor;
1017
1018 default:
1019 unreachable("Inavlid cursor option");
1020 }
1021 }
1022
1023 bool
nir_cursors_equal(nir_cursor a,nir_cursor b)1024 nir_cursors_equal(nir_cursor a, nir_cursor b)
1025 {
1026 /* Reduced cursors should be unique */
1027 a = reduce_cursor(a);
1028 b = reduce_cursor(b);
1029
1030 return a.block == b.block && a.option == b.option;
1031 }
1032
1033 static bool
add_use_cb(nir_src * src,void * state)1034 add_use_cb(nir_src *src, void *state)
1035 {
1036 nir_instr *instr = state;
1037
1038 nir_src_set_parent_instr(src, instr);
1039 list_addtail(&src->use_link, &src->ssa->uses);
1040
1041 return true;
1042 }
1043
1044 static bool
add_ssa_def_cb(nir_def * def,void * state)1045 add_ssa_def_cb(nir_def *def, void *state)
1046 {
1047 nir_instr *instr = state;
1048
1049 if (instr->block && def->index == UINT_MAX) {
1050 nir_function_impl *impl =
1051 nir_cf_node_get_function(&instr->block->cf_node);
1052
1053 def->index = impl->ssa_alloc++;
1054
1055 impl->valid_metadata &= ~nir_metadata_live_defs;
1056 }
1057
1058 return true;
1059 }
1060
1061 static void
add_defs_uses(nir_instr * instr)1062 add_defs_uses(nir_instr *instr)
1063 {
1064 nir_foreach_src(instr, add_use_cb, instr);
1065 nir_foreach_def(instr, add_ssa_def_cb, instr);
1066 }
1067
1068 void
nir_instr_insert(nir_cursor cursor,nir_instr * instr)1069 nir_instr_insert(nir_cursor cursor, nir_instr *instr)
1070 {
1071 switch (cursor.option) {
1072 case nir_cursor_before_block:
1073 /* Only allow inserting jumps into empty blocks. */
1074 if (instr->type == nir_instr_type_jump)
1075 assert(exec_list_is_empty(&cursor.block->instr_list));
1076
1077 instr->block = cursor.block;
1078 add_defs_uses(instr);
1079 exec_list_push_head(&cursor.block->instr_list, &instr->node);
1080 break;
1081 case nir_cursor_after_block: {
1082 /* Inserting instructions after a jump is illegal. */
1083 nir_instr *last = nir_block_last_instr(cursor.block);
1084 assert(last == NULL || last->type != nir_instr_type_jump);
1085 (void)last;
1086
1087 instr->block = cursor.block;
1088 add_defs_uses(instr);
1089 exec_list_push_tail(&cursor.block->instr_list, &instr->node);
1090 break;
1091 }
1092 case nir_cursor_before_instr:
1093 assert(instr->type != nir_instr_type_jump);
1094 instr->block = cursor.instr->block;
1095 add_defs_uses(instr);
1096 exec_node_insert_node_before(&cursor.instr->node, &instr->node);
1097 break;
1098 case nir_cursor_after_instr:
1099 /* Inserting instructions after a jump is illegal. */
1100 assert(cursor.instr->type != nir_instr_type_jump);
1101
1102 /* Only allow inserting jumps at the end of the block. */
1103 if (instr->type == nir_instr_type_jump)
1104 assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
1105
1106 instr->block = cursor.instr->block;
1107 add_defs_uses(instr);
1108 exec_node_insert_after(&cursor.instr->node, &instr->node);
1109 break;
1110 }
1111
1112 if (instr->type == nir_instr_type_jump)
1113 nir_handle_add_jump(instr->block);
1114
1115 nir_function_impl *impl = nir_cf_node_get_function(&instr->block->cf_node);
1116 impl->valid_metadata &= ~nir_metadata_instr_index;
1117 }
1118
1119 bool
nir_instr_move(nir_cursor cursor,nir_instr * instr)1120 nir_instr_move(nir_cursor cursor, nir_instr *instr)
1121 {
1122 /* If the cursor happens to refer to this instruction (either before or
1123 * after), don't do anything.
1124 */
1125 if ((cursor.option == nir_cursor_before_instr ||
1126 cursor.option == nir_cursor_after_instr) &&
1127 cursor.instr == instr)
1128 return false;
1129
1130 nir_instr_remove(instr);
1131 nir_instr_insert(cursor, instr);
1132 return true;
1133 }
1134
1135 static bool
src_is_valid(const nir_src * src)1136 src_is_valid(const nir_src *src)
1137 {
1138 return (src->ssa != NULL);
1139 }
1140
1141 static bool
remove_use_cb(nir_src * src,void * state)1142 remove_use_cb(nir_src *src, void *state)
1143 {
1144 (void)state;
1145
1146 if (src_is_valid(src))
1147 list_del(&src->use_link);
1148
1149 return true;
1150 }
1151
1152 static void
remove_defs_uses(nir_instr * instr)1153 remove_defs_uses(nir_instr *instr)
1154 {
1155 nir_foreach_src(instr, remove_use_cb, instr);
1156 }
1157
1158 void
nir_instr_remove_v(nir_instr * instr)1159 nir_instr_remove_v(nir_instr *instr)
1160 {
1161 remove_defs_uses(instr);
1162 exec_node_remove(&instr->node);
1163
1164 if (instr->type == nir_instr_type_jump) {
1165 nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
1166 nir_handle_remove_jump(instr->block, jump_instr->type);
1167 }
1168 }
1169
1170 void
nir_instr_free(nir_instr * instr)1171 nir_instr_free(nir_instr *instr)
1172 {
1173 switch (instr->type) {
1174 case nir_instr_type_tex:
1175 gc_free(nir_instr_as_tex(instr)->src);
1176 break;
1177
1178 case nir_instr_type_phi: {
1179 nir_phi_instr *phi = nir_instr_as_phi(instr);
1180 nir_foreach_phi_src_safe(phi_src, phi)
1181 gc_free(phi_src);
1182 break;
1183 }
1184
1185 default:
1186 break;
1187 }
1188
1189 gc_free(instr);
1190 }
1191
1192 void
nir_instr_free_list(struct exec_list * list)1193 nir_instr_free_list(struct exec_list *list)
1194 {
1195 struct exec_node *node;
1196 while ((node = exec_list_pop_head(list))) {
1197 nir_instr *removed_instr = exec_node_data(nir_instr, node, node);
1198 nir_instr_free(removed_instr);
1199 }
1200 }
1201
1202 static bool
nir_instr_free_and_dce_live_cb(nir_def * def,void * state)1203 nir_instr_free_and_dce_live_cb(nir_def *def, void *state)
1204 {
1205 bool *live = state;
1206
1207 if (!nir_def_is_unused(def)) {
1208 *live = true;
1209 return false;
1210 } else {
1211 return true;
1212 }
1213 }
1214
1215 static bool
nir_instr_free_and_dce_is_live(nir_instr * instr)1216 nir_instr_free_and_dce_is_live(nir_instr *instr)
1217 {
1218 /* Note: don't have to worry about jumps because they don't have dests to
1219 * become unused.
1220 */
1221 if (instr->type == nir_instr_type_intrinsic) {
1222 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1223 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
1224 if (!(info->flags & NIR_INTRINSIC_CAN_ELIMINATE))
1225 return true;
1226 }
1227
1228 bool live = false;
1229 nir_foreach_def(instr, nir_instr_free_and_dce_live_cb, &live);
1230 return live;
1231 }
1232
1233 static bool
nir_instr_dce_add_dead_srcs_cb(nir_src * src,void * state)1234 nir_instr_dce_add_dead_srcs_cb(nir_src *src, void *state)
1235 {
1236 nir_instr_worklist *wl = state;
1237
1238 list_del(&src->use_link);
1239 if (!nir_instr_free_and_dce_is_live(src->ssa->parent_instr))
1240 nir_instr_worklist_push_tail(wl, src->ssa->parent_instr);
1241
1242 /* Stop nir_instr_remove from trying to delete the link again. */
1243 src->ssa = NULL;
1244
1245 return true;
1246 }
1247
1248 static void
nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist * wl,nir_instr * instr)1249 nir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist *wl, nir_instr *instr)
1250 {
1251 nir_foreach_src(instr, nir_instr_dce_add_dead_srcs_cb, wl);
1252 }
1253
1254 /**
1255 * Frees an instruction and any SSA defs that it used that are now dead,
1256 * returning a nir_cursor where the instruction previously was.
1257 */
1258 nir_cursor
nir_instr_free_and_dce(nir_instr * instr)1259 nir_instr_free_and_dce(nir_instr *instr)
1260 {
1261 nir_instr_worklist *worklist = nir_instr_worklist_create();
1262
1263 nir_instr_dce_add_dead_ssa_srcs(worklist, instr);
1264 nir_cursor c = nir_instr_remove(instr);
1265
1266 struct exec_list to_free;
1267 exec_list_make_empty(&to_free);
1268
1269 nir_instr *dce_instr;
1270 while ((dce_instr = nir_instr_worklist_pop_head(worklist))) {
1271 nir_instr_dce_add_dead_ssa_srcs(worklist, dce_instr);
1272
1273 /* If we're removing the instr where our cursor is, then we have to
1274 * point the cursor elsewhere.
1275 */
1276 if ((c.option == nir_cursor_before_instr ||
1277 c.option == nir_cursor_after_instr) &&
1278 c.instr == dce_instr)
1279 c = nir_instr_remove(dce_instr);
1280 else
1281 nir_instr_remove(dce_instr);
1282 exec_list_push_tail(&to_free, &dce_instr->node);
1283 }
1284
1285 nir_instr_free_list(&to_free);
1286
1287 nir_instr_worklist_destroy(worklist);
1288
1289 return c;
1290 }
1291
1292 /*@}*/
1293
1294 nir_def *
nir_instr_def(nir_instr * instr)1295 nir_instr_def(nir_instr *instr)
1296 {
1297 switch (instr->type) {
1298 case nir_instr_type_alu:
1299 return &nir_instr_as_alu(instr)->def;
1300
1301 case nir_instr_type_deref:
1302 return &nir_instr_as_deref(instr)->def;
1303
1304 case nir_instr_type_tex:
1305 return &nir_instr_as_tex(instr)->def;
1306
1307 case nir_instr_type_intrinsic: {
1308 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1309 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
1310 return &intrin->def;
1311 } else {
1312 return NULL;
1313 }
1314 }
1315
1316 case nir_instr_type_phi:
1317 return &nir_instr_as_phi(instr)->def;
1318
1319 case nir_instr_type_parallel_copy:
1320 unreachable("Parallel copies are unsupported by this function");
1321
1322 case nir_instr_type_load_const:
1323 return &nir_instr_as_load_const(instr)->def;
1324
1325 case nir_instr_type_undef:
1326 return &nir_instr_as_undef(instr)->def;
1327
1328 case nir_instr_type_call:
1329 case nir_instr_type_jump:
1330 return NULL;
1331 }
1332
1333 unreachable("Invalid instruction type");
1334 }
1335
1336 bool
nir_foreach_phi_src_leaving_block(nir_block * block,nir_foreach_src_cb cb,void * state)1337 nir_foreach_phi_src_leaving_block(nir_block *block,
1338 nir_foreach_src_cb cb,
1339 void *state)
1340 {
1341 for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
1342 if (block->successors[i] == NULL)
1343 continue;
1344
1345 nir_foreach_phi(phi, block->successors[i]) {
1346 nir_foreach_phi_src(phi_src, phi) {
1347 if (phi_src->pred == block) {
1348 if (!cb(&phi_src->src, state))
1349 return false;
1350 }
1351 }
1352 }
1353 }
1354
1355 return true;
1356 }
1357
1358 nir_const_value
nir_const_value_for_float(double f,unsigned bit_size)1359 nir_const_value_for_float(double f, unsigned bit_size)
1360 {
1361 nir_const_value v;
1362 memset(&v, 0, sizeof(v));
1363
1364 /* clang-format off */
1365 switch (bit_size) {
1366 case 16: v.u16 = _mesa_float_to_half(f); break;
1367 case 32: v.f32 = f; break;
1368 case 64: v.f64 = f; break;
1369 default: unreachable("Invalid bit size");
1370 }
1371 /* clang-format on */
1372
1373 return v;
1374 }
1375
1376 double
nir_const_value_as_float(nir_const_value value,unsigned bit_size)1377 nir_const_value_as_float(nir_const_value value, unsigned bit_size)
1378 {
1379 /* clang-format off */
1380 switch (bit_size) {
1381 case 16: return _mesa_half_to_float(value.u16);
1382 case 32: return value.f32;
1383 case 64: return value.f64;
1384 default: unreachable("Invalid bit size");
1385 }
1386 /* clang-format on */
1387 }
1388
1389 nir_const_value *
nir_src_as_const_value(nir_src src)1390 nir_src_as_const_value(nir_src src)
1391 {
1392 if (src.ssa->parent_instr->type != nir_instr_type_load_const)
1393 return NULL;
1394
1395 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1396
1397 return load->value;
1398 }
1399
1400 /**
1401 * Returns true if the source is known to be always uniform. Otherwise it
1402 * returns false which means it may or may not be uniform but it can't be
1403 * determined.
1404 *
1405 * For a more precise analysis of uniform values, use nir_divergence_analysis.
1406 */
1407 bool
nir_src_is_always_uniform(nir_src src)1408 nir_src_is_always_uniform(nir_src src)
1409 {
1410 /* Constants are trivially uniform */
1411 if (src.ssa->parent_instr->type == nir_instr_type_load_const)
1412 return true;
1413
1414 if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
1415 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
1416 /* As are uniform variables */
1417 if (intr->intrinsic == nir_intrinsic_load_uniform &&
1418 nir_src_is_always_uniform(intr->src[0]))
1419 return true;
1420 /* From the Vulkan specification 15.6.1. Push Constant Interface:
1421 * "Any member of a push constant block that is declared as an array must
1422 * only be accessed with dynamically uniform indices."
1423 */
1424 if (intr->intrinsic == nir_intrinsic_load_push_constant)
1425 return true;
1426 if (intr->intrinsic == nir_intrinsic_load_deref &&
1427 nir_deref_mode_is(nir_src_as_deref(intr->src[0]), nir_var_mem_push_const))
1428 return true;
1429 }
1430
1431 /* Operating together uniform expressions produces a uniform result */
1432 if (src.ssa->parent_instr->type == nir_instr_type_alu) {
1433 nir_alu_instr *alu = nir_instr_as_alu(src.ssa->parent_instr);
1434 for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
1435 if (!nir_src_is_always_uniform(alu->src[i].src))
1436 return false;
1437 }
1438
1439 return true;
1440 }
1441
1442 /* XXX: this could have many more tests, such as when a sampler function is
1443 * called with uniform arguments.
1444 */
1445 return false;
1446 }
1447
1448 static void
src_remove_all_uses(nir_src * src)1449 src_remove_all_uses(nir_src *src)
1450 {
1451 if (src && src_is_valid(src))
1452 list_del(&src->use_link);
1453 }
1454
1455 static void
src_add_all_uses(nir_src * src,nir_instr * parent_instr,nir_if * parent_if)1456 src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
1457 {
1458 if (!src)
1459 return;
1460
1461 if (!src_is_valid(src))
1462 return;
1463
1464 if (parent_instr) {
1465 nir_src_set_parent_instr(src, parent_instr);
1466 } else {
1467 assert(parent_if);
1468 nir_src_set_parent_if(src, parent_if);
1469 }
1470
1471 list_addtail(&src->use_link, &src->ssa->uses);
1472 }
1473
1474 void
nir_instr_init_src(nir_instr * instr,nir_src * src,nir_def * def)1475 nir_instr_init_src(nir_instr *instr, nir_src *src, nir_def *def)
1476 {
1477 *src = nir_src_for_ssa(def);
1478 src_add_all_uses(src, instr, NULL);
1479 }
1480
1481 void
nir_instr_clear_src(nir_instr * instr,nir_src * src)1482 nir_instr_clear_src(nir_instr *instr, nir_src *src)
1483 {
1484 src_remove_all_uses(src);
1485 *src = NIR_SRC_INIT;
1486 }
1487
1488 void
nir_instr_move_src(nir_instr * dest_instr,nir_src * dest,nir_src * src)1489 nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
1490 {
1491 assert(!src_is_valid(dest) || nir_src_parent_instr(dest) == dest_instr);
1492
1493 src_remove_all_uses(dest);
1494 src_remove_all_uses(src);
1495 *dest = *src;
1496 *src = NIR_SRC_INIT;
1497 src_add_all_uses(dest, dest_instr, NULL);
1498 }
1499
1500 void
nir_def_init(nir_instr * instr,nir_def * def,unsigned num_components,unsigned bit_size)1501 nir_def_init(nir_instr *instr, nir_def *def,
1502 unsigned num_components,
1503 unsigned bit_size)
1504 {
1505 def->parent_instr = instr;
1506 list_inithead(&def->uses);
1507 def->num_components = num_components;
1508 def->bit_size = bit_size;
1509 def->divergent = true; /* This is the safer default */
1510
1511 if (instr->block) {
1512 nir_function_impl *impl =
1513 nir_cf_node_get_function(&instr->block->cf_node);
1514
1515 def->index = impl->ssa_alloc++;
1516
1517 impl->valid_metadata &= ~nir_metadata_live_defs;
1518 } else {
1519 def->index = UINT_MAX;
1520 }
1521 }
1522
1523 void
nir_def_rewrite_uses(nir_def * def,nir_def * new_ssa)1524 nir_def_rewrite_uses(nir_def *def, nir_def *new_ssa)
1525 {
1526 assert(def != new_ssa);
1527 nir_foreach_use_including_if_safe(use_src, def) {
1528 nir_src_rewrite(use_src, new_ssa);
1529 }
1530 }
1531
1532 void
nir_def_rewrite_uses_src(nir_def * def,nir_src new_src)1533 nir_def_rewrite_uses_src(nir_def *def, nir_src new_src)
1534 {
1535 nir_def_rewrite_uses(def, new_src.ssa);
1536 }
1537
1538 static bool
is_instr_between(nir_instr * start,nir_instr * end,nir_instr * between)1539 is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
1540 {
1541 assert(start->block == end->block);
1542
1543 if (between->block != start->block)
1544 return false;
1545
1546 /* Search backwards looking for "between" */
1547 while (start != end) {
1548 if (between == end)
1549 return true;
1550
1551 end = nir_instr_prev(end);
1552 assert(end);
1553 }
1554
1555 return false;
1556 }
1557
1558 /* Replaces all uses of the given SSA def with the given source but only if
1559 * the use comes after the after_me instruction. This can be useful if you
1560 * are emitting code to fix up the result of some instruction: you can freely
1561 * use the result in that code and then call rewrite_uses_after and pass the
1562 * last fixup instruction as after_me and it will replace all of the uses you
1563 * want without touching the fixup code.
1564 *
1565 * This function assumes that after_me is in the same block as
1566 * def->parent_instr and that after_me comes after def->parent_instr.
1567 */
1568 void
nir_def_rewrite_uses_after(nir_def * def,nir_def * new_ssa,nir_instr * after_me)1569 nir_def_rewrite_uses_after(nir_def *def, nir_def *new_ssa,
1570 nir_instr *after_me)
1571 {
1572 if (def == new_ssa)
1573 return;
1574
1575 nir_foreach_use_including_if_safe(use_src, def) {
1576 if (!nir_src_is_if(use_src)) {
1577 assert(nir_src_parent_instr(use_src) != def->parent_instr);
1578
1579 /* Since def already dominates all of its uses, the only way a use can
1580 * not be dominated by after_me is if it is between def and after_me in
1581 * the instruction list.
1582 */
1583 if (is_instr_between(def->parent_instr, after_me, nir_src_parent_instr(use_src)))
1584 continue;
1585 }
1586
1587 nir_src_rewrite(use_src, new_ssa);
1588 }
1589 }
1590
1591 static nir_def *
get_store_value(nir_intrinsic_instr * intrin)1592 get_store_value(nir_intrinsic_instr *intrin)
1593 {
1594 assert(nir_intrinsic_has_write_mask(intrin));
1595 /* deref stores have the deref in src[0] and the store value in src[1] */
1596 if (intrin->intrinsic == nir_intrinsic_store_deref ||
1597 intrin->intrinsic == nir_intrinsic_store_deref_block_intel)
1598 return intrin->src[1].ssa;
1599
1600 /* all other stores have the store value in src[0] */
1601 return intrin->src[0].ssa;
1602 }
1603
1604 nir_component_mask_t
nir_src_components_read(const nir_src * src)1605 nir_src_components_read(const nir_src *src)
1606 {
1607 assert(nir_src_parent_instr(src));
1608
1609 if (nir_src_parent_instr(src)->type == nir_instr_type_alu) {
1610 nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(src));
1611 nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src);
1612 int src_idx = alu_src - &alu->src[0];
1613 assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
1614 return nir_alu_instr_src_read_mask(alu, src_idx);
1615 } else if (nir_src_parent_instr(src)->type == nir_instr_type_intrinsic) {
1616 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(nir_src_parent_instr(src));
1617 if (nir_intrinsic_has_write_mask(intrin) && src->ssa == get_store_value(intrin))
1618 return nir_intrinsic_write_mask(intrin);
1619 else
1620 return (1 << src->ssa->num_components) - 1;
1621 } else {
1622 return (1 << src->ssa->num_components) - 1;
1623 }
1624 }
1625
1626 nir_component_mask_t
nir_def_components_read(const nir_def * def)1627 nir_def_components_read(const nir_def *def)
1628 {
1629 nir_component_mask_t read_mask = 0;
1630
1631 nir_foreach_use_including_if(use, def) {
1632 read_mask |= nir_src_is_if(use) ? 1 : nir_src_components_read(use);
1633
1634 if (read_mask == (1 << def->num_components) - 1)
1635 return read_mask;
1636 }
1637
1638 return read_mask;
1639 }
1640
1641 bool
nir_def_all_uses_are_fsat(const nir_def * def)1642 nir_def_all_uses_are_fsat(const nir_def *def)
1643 {
1644 nir_foreach_use(src, def) {
1645 if (nir_src_is_if(src))
1646 return false;
1647
1648 nir_instr *use = nir_src_parent_instr(src);
1649 if (use->type != nir_instr_type_alu)
1650 return false;
1651
1652 nir_alu_instr *alu = nir_instr_as_alu(use);
1653 if (alu->op != nir_op_fsat)
1654 return false;
1655 }
1656
1657 return true;
1658 }
1659
1660 nir_block *
nir_block_unstructured_next(nir_block * block)1661 nir_block_unstructured_next(nir_block *block)
1662 {
1663 if (block == NULL) {
1664 /* nir_foreach_block_unstructured_safe() will call this function on a
1665 * NULL block after the last iteration, but it won't use the result so
1666 * just return NULL here.
1667 */
1668 return NULL;
1669 }
1670
1671 nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1672 if (cf_next == NULL && block->cf_node.parent->type == nir_cf_node_function)
1673 return NULL;
1674
1675 if (cf_next && cf_next->type == nir_cf_node_block)
1676 return nir_cf_node_as_block(cf_next);
1677
1678 return nir_block_cf_tree_next(block);
1679 }
1680
1681 nir_block *
nir_unstructured_start_block(nir_function_impl * impl)1682 nir_unstructured_start_block(nir_function_impl *impl)
1683 {
1684 return nir_start_block(impl);
1685 }
1686
1687 nir_block *
nir_block_cf_tree_next(nir_block * block)1688 nir_block_cf_tree_next(nir_block *block)
1689 {
1690 if (block == NULL) {
1691 /* nir_foreach_block_safe() will call this function on a NULL block
1692 * after the last iteration, but it won't use the result so just return
1693 * NULL here.
1694 */
1695 return NULL;
1696 }
1697
1698 assert(nir_cf_node_get_function(&block->cf_node)->structured);
1699
1700 nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1701 if (cf_next)
1702 return nir_cf_node_cf_tree_first(cf_next);
1703
1704 nir_cf_node *parent = block->cf_node.parent;
1705 if (parent->type == nir_cf_node_function)
1706 return NULL;
1707
1708 /* Is this the last block of a cf_node? Return the following block */
1709 if (block == nir_cf_node_cf_tree_last(parent))
1710 return nir_cf_node_as_block(nir_cf_node_next(parent));
1711
1712 switch (parent->type) {
1713 case nir_cf_node_if: {
1714 /* We are at the end of the if. Go to the beginning of the else */
1715 nir_if *if_stmt = nir_cf_node_as_if(parent);
1716 assert(block == nir_if_last_then_block(if_stmt));
1717 return nir_if_first_else_block(if_stmt);
1718 }
1719
1720 case nir_cf_node_loop: {
1721 /* We are at the end of the body and there is a continue construct */
1722 nir_loop *loop = nir_cf_node_as_loop(parent);
1723 assert(block == nir_loop_last_block(loop) &&
1724 nir_loop_has_continue_construct(loop));
1725 return nir_loop_first_continue_block(loop);
1726 }
1727
1728 default:
1729 unreachable("unknown cf node type");
1730 }
1731 }
1732
1733 nir_block *
nir_block_cf_tree_prev(nir_block * block)1734 nir_block_cf_tree_prev(nir_block *block)
1735 {
1736 if (block == NULL) {
1737 /* do this for consistency with nir_block_cf_tree_next() */
1738 return NULL;
1739 }
1740
1741 assert(nir_cf_node_get_function(&block->cf_node)->structured);
1742
1743 nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
1744 if (cf_prev)
1745 return nir_cf_node_cf_tree_last(cf_prev);
1746
1747 nir_cf_node *parent = block->cf_node.parent;
1748 if (parent->type == nir_cf_node_function)
1749 return NULL;
1750
1751 /* Is this the first block of a cf_node? Return the previous block */
1752 if (block == nir_cf_node_cf_tree_first(parent))
1753 return nir_cf_node_as_block(nir_cf_node_prev(parent));
1754
1755 switch (parent->type) {
1756 case nir_cf_node_if: {
1757 /* We are at the beginning of the else. Go to the end of the if */
1758 nir_if *if_stmt = nir_cf_node_as_if(parent);
1759 assert(block == nir_if_first_else_block(if_stmt));
1760 return nir_if_last_then_block(if_stmt);
1761 }
1762 case nir_cf_node_loop: {
1763 /* We are at the beginning of the continue construct. */
1764 nir_loop *loop = nir_cf_node_as_loop(parent);
1765 assert(nir_loop_has_continue_construct(loop) &&
1766 block == nir_loop_first_continue_block(loop));
1767 return nir_loop_last_block(loop);
1768 }
1769
1770 default:
1771 unreachable("unknown cf node type");
1772 }
1773 }
1774
1775 nir_block *
nir_cf_node_cf_tree_first(nir_cf_node * node)1776 nir_cf_node_cf_tree_first(nir_cf_node *node)
1777 {
1778 switch (node->type) {
1779 case nir_cf_node_function: {
1780 nir_function_impl *impl = nir_cf_node_as_function(node);
1781 return nir_start_block(impl);
1782 }
1783
1784 case nir_cf_node_if: {
1785 nir_if *if_stmt = nir_cf_node_as_if(node);
1786 return nir_if_first_then_block(if_stmt);
1787 }
1788
1789 case nir_cf_node_loop: {
1790 nir_loop *loop = nir_cf_node_as_loop(node);
1791 return nir_loop_first_block(loop);
1792 }
1793
1794 case nir_cf_node_block: {
1795 return nir_cf_node_as_block(node);
1796 }
1797
1798 default:
1799 unreachable("unknown node type");
1800 }
1801 }
1802
1803 nir_block *
nir_cf_node_cf_tree_last(nir_cf_node * node)1804 nir_cf_node_cf_tree_last(nir_cf_node *node)
1805 {
1806 switch (node->type) {
1807 case nir_cf_node_function: {
1808 nir_function_impl *impl = nir_cf_node_as_function(node);
1809 return nir_impl_last_block(impl);
1810 }
1811
1812 case nir_cf_node_if: {
1813 nir_if *if_stmt = nir_cf_node_as_if(node);
1814 return nir_if_last_else_block(if_stmt);
1815 }
1816
1817 case nir_cf_node_loop: {
1818 nir_loop *loop = nir_cf_node_as_loop(node);
1819 if (nir_loop_has_continue_construct(loop))
1820 return nir_loop_last_continue_block(loop);
1821 else
1822 return nir_loop_last_block(loop);
1823 }
1824
1825 case nir_cf_node_block: {
1826 return nir_cf_node_as_block(node);
1827 }
1828
1829 default:
1830 unreachable("unknown node type");
1831 }
1832 }
1833
1834 nir_block *
nir_cf_node_cf_tree_next(nir_cf_node * node)1835 nir_cf_node_cf_tree_next(nir_cf_node *node)
1836 {
1837 if (node->type == nir_cf_node_block)
1838 return nir_block_cf_tree_next(nir_cf_node_as_block(node));
1839 else if (node->type == nir_cf_node_function)
1840 return NULL;
1841 else
1842 return nir_cf_node_as_block(nir_cf_node_next(node));
1843 }
1844
1845 nir_block *
nir_cf_node_cf_tree_prev(nir_cf_node * node)1846 nir_cf_node_cf_tree_prev(nir_cf_node *node)
1847 {
1848 if (node->type == nir_cf_node_block)
1849 return nir_block_cf_tree_prev(nir_cf_node_as_block(node));
1850 else if (node->type == nir_cf_node_function)
1851 return NULL;
1852 else
1853 return nir_cf_node_as_block(nir_cf_node_prev(node));
1854 }
1855
1856 nir_if *
nir_block_get_following_if(nir_block * block)1857 nir_block_get_following_if(nir_block *block)
1858 {
1859 if (exec_node_is_tail_sentinel(&block->cf_node.node))
1860 return NULL;
1861
1862 if (nir_cf_node_is_last(&block->cf_node))
1863 return NULL;
1864
1865 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1866
1867 if (next_node->type != nir_cf_node_if)
1868 return NULL;
1869
1870 return nir_cf_node_as_if(next_node);
1871 }
1872
1873 nir_loop *
nir_block_get_following_loop(nir_block * block)1874 nir_block_get_following_loop(nir_block *block)
1875 {
1876 if (exec_node_is_tail_sentinel(&block->cf_node.node))
1877 return NULL;
1878
1879 if (nir_cf_node_is_last(&block->cf_node))
1880 return NULL;
1881
1882 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1883
1884 if (next_node->type != nir_cf_node_loop)
1885 return NULL;
1886
1887 return nir_cf_node_as_loop(next_node);
1888 }
1889
1890 static int
compare_block_index(const void * p1,const void * p2)1891 compare_block_index(const void *p1, const void *p2)
1892 {
1893 const nir_block *block1 = *((const nir_block **)p1);
1894 const nir_block *block2 = *((const nir_block **)p2);
1895
1896 return (int)block1->index - (int)block2->index;
1897 }
1898
1899 nir_block **
nir_block_get_predecessors_sorted(const nir_block * block,void * mem_ctx)1900 nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx)
1901 {
1902 nir_block **preds =
1903 ralloc_array(mem_ctx, nir_block *, block->predecessors->entries);
1904
1905 unsigned i = 0;
1906 set_foreach(block->predecessors, entry)
1907 preds[i++] = (nir_block *)entry->key;
1908 assert(i == block->predecessors->entries);
1909
1910 qsort(preds, block->predecessors->entries, sizeof(nir_block *),
1911 compare_block_index);
1912
1913 return preds;
1914 }
1915
1916 void
nir_index_blocks(nir_function_impl * impl)1917 nir_index_blocks(nir_function_impl *impl)
1918 {
1919 unsigned index = 0;
1920
1921 if (impl->valid_metadata & nir_metadata_block_index)
1922 return;
1923
1924 nir_foreach_block_unstructured(block, impl) {
1925 block->index = index++;
1926 }
1927
1928 /* The end_block isn't really part of the program, which is why its index
1929 * is >= num_blocks.
1930 */
1931 impl->num_blocks = impl->end_block->index = index;
1932 }
1933
1934 static bool
index_ssa_def_cb(nir_def * def,void * state)1935 index_ssa_def_cb(nir_def *def, void *state)
1936 {
1937 unsigned *index = (unsigned *)state;
1938 def->index = (*index)++;
1939
1940 return true;
1941 }
1942
1943 /**
1944 * The indices are applied top-to-bottom which has the very nice property
1945 * that, if A dominates B, then A->index <= B->index.
1946 */
1947 void
nir_index_ssa_defs(nir_function_impl * impl)1948 nir_index_ssa_defs(nir_function_impl *impl)
1949 {
1950 unsigned index = 0;
1951
1952 impl->valid_metadata &= ~nir_metadata_live_defs;
1953
1954 nir_foreach_block_unstructured(block, impl) {
1955 nir_foreach_instr(instr, block)
1956 nir_foreach_def(instr, index_ssa_def_cb, &index);
1957 }
1958
1959 impl->ssa_alloc = index;
1960 }
1961
1962 /**
1963 * The indices are applied top-to-bottom which has the very nice property
1964 * that, if A dominates B, then A->index <= B->index.
1965 */
1966 unsigned
nir_index_instrs(nir_function_impl * impl)1967 nir_index_instrs(nir_function_impl *impl)
1968 {
1969 unsigned index = 0;
1970
1971 nir_foreach_block(block, impl) {
1972 block->start_ip = index++;
1973
1974 nir_foreach_instr(instr, block)
1975 instr->index = index++;
1976
1977 block->end_ip = index++;
1978 }
1979
1980 return index;
1981 }
1982
1983 void
nir_shader_clear_pass_flags(nir_shader * shader)1984 nir_shader_clear_pass_flags(nir_shader *shader)
1985 {
1986 nir_foreach_function_impl(impl, shader) {
1987 nir_foreach_block(block, impl) {
1988 nir_foreach_instr(instr, block) {
1989 instr->pass_flags = 0;
1990 }
1991 }
1992 }
1993 }
1994
1995 unsigned
nir_shader_index_vars(nir_shader * shader,nir_variable_mode modes)1996 nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes)
1997 {
1998 unsigned count = 0;
1999 nir_foreach_variable_with_modes(var, shader, modes)
2000 var->index = count++;
2001 return count;
2002 }
2003
2004 unsigned
nir_function_impl_index_vars(nir_function_impl * impl)2005 nir_function_impl_index_vars(nir_function_impl *impl)
2006 {
2007 unsigned count = 0;
2008 nir_foreach_function_temp_variable(var, impl)
2009 var->index = count++;
2010 return count;
2011 }
2012
2013 static nir_instr *
cursor_next_instr(nir_cursor cursor)2014 cursor_next_instr(nir_cursor cursor)
2015 {
2016 switch (cursor.option) {
2017 case nir_cursor_before_block:
2018 for (nir_block *block = cursor.block; block;
2019 block = nir_block_cf_tree_next(block)) {
2020 nir_instr *instr = nir_block_first_instr(block);
2021 if (instr)
2022 return instr;
2023 }
2024 return NULL;
2025
2026 case nir_cursor_after_block:
2027 cursor.block = nir_block_cf_tree_next(cursor.block);
2028 if (cursor.block == NULL)
2029 return NULL;
2030
2031 cursor.option = nir_cursor_before_block;
2032 return cursor_next_instr(cursor);
2033
2034 case nir_cursor_before_instr:
2035 return cursor.instr;
2036
2037 case nir_cursor_after_instr:
2038 if (nir_instr_next(cursor.instr))
2039 return nir_instr_next(cursor.instr);
2040
2041 cursor.option = nir_cursor_after_block;
2042 cursor.block = cursor.instr->block;
2043 return cursor_next_instr(cursor);
2044 }
2045
2046 unreachable("Inavlid cursor option");
2047 }
2048
2049 bool
nir_function_impl_lower_instructions(nir_function_impl * impl,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2050 nir_function_impl_lower_instructions(nir_function_impl *impl,
2051 nir_instr_filter_cb filter,
2052 nir_lower_instr_cb lower,
2053 void *cb_data)
2054 {
2055 nir_builder b = nir_builder_create(impl);
2056
2057 nir_metadata preserved = nir_metadata_block_index |
2058 nir_metadata_dominance;
2059
2060 bool progress = false;
2061 nir_cursor iter = nir_before_impl(impl);
2062 nir_instr *instr;
2063 while ((instr = cursor_next_instr(iter)) != NULL) {
2064 if (filter && !filter(instr, cb_data)) {
2065 iter = nir_after_instr(instr);
2066 continue;
2067 }
2068
2069 nir_def *old_def = nir_instr_def(instr);
2070 struct list_head old_uses;
2071 if (old_def != NULL) {
2072 /* We're about to ask the callback to generate a replacement for instr.
2073 * Save off the uses from instr's SSA def so we know what uses to
2074 * rewrite later. If we use nir_def_rewrite_uses, it fails in the
2075 * case where the generated replacement code uses the result of instr
2076 * itself. If we use nir_def_rewrite_uses_after (which is the
2077 * normal solution to this problem), it doesn't work well if control-
2078 * flow is inserted as part of the replacement, doesn't handle cases
2079 * where the replacement is something consumed by instr, and suffers
2080 * from performance issues. This is the only way to 100% guarantee
2081 * that we rewrite the correct set efficiently.
2082 */
2083
2084 list_replace(&old_def->uses, &old_uses);
2085 list_inithead(&old_def->uses);
2086 }
2087
2088 b.cursor = nir_after_instr(instr);
2089 nir_def *new_def = lower(&b, instr, cb_data);
2090 if (new_def && new_def != NIR_LOWER_INSTR_PROGRESS &&
2091 new_def != NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2092 assert(old_def != NULL);
2093 if (new_def->parent_instr->block != instr->block)
2094 preserved = nir_metadata_none;
2095
2096 list_for_each_entry_safe(nir_src, use_src, &old_uses, use_link)
2097 nir_src_rewrite(use_src, new_def);
2098
2099 if (nir_def_is_unused(old_def)) {
2100 iter = nir_instr_free_and_dce(instr);
2101 } else {
2102 iter = nir_after_instr(instr);
2103 }
2104 progress = true;
2105 } else {
2106 /* We didn't end up lowering after all. Put the uses back */
2107 if (old_def)
2108 list_replace(&old_uses, &old_def->uses);
2109
2110 if (new_def == NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2111 /* Only instructions without a return value can be removed like this */
2112 assert(!old_def);
2113 iter = nir_instr_free_and_dce(instr);
2114 progress = true;
2115 } else
2116 iter = nir_after_instr(instr);
2117
2118 if (new_def == NIR_LOWER_INSTR_PROGRESS)
2119 progress = true;
2120 }
2121 }
2122
2123 if (progress) {
2124 nir_metadata_preserve(impl, preserved);
2125 } else {
2126 nir_metadata_preserve(impl, nir_metadata_all);
2127 }
2128
2129 return progress;
2130 }
2131
2132 bool
nir_shader_lower_instructions(nir_shader * shader,nir_instr_filter_cb filter,nir_lower_instr_cb lower,void * cb_data)2133 nir_shader_lower_instructions(nir_shader *shader,
2134 nir_instr_filter_cb filter,
2135 nir_lower_instr_cb lower,
2136 void *cb_data)
2137 {
2138 bool progress = false;
2139
2140 nir_foreach_function_impl(impl, shader) {
2141 if (nir_function_impl_lower_instructions(impl, filter, lower, cb_data))
2142 progress = true;
2143 }
2144
2145 return progress;
2146 }
2147
2148 /**
2149 * Returns true if the shader supports quad-based implicit derivatives on
2150 * texture sampling.
2151 */
2152 bool
nir_shader_supports_implicit_lod(nir_shader * shader)2153 nir_shader_supports_implicit_lod(nir_shader *shader)
2154 {
2155 return (shader->info.stage == MESA_SHADER_FRAGMENT ||
2156 (shader->info.stage == MESA_SHADER_COMPUTE &&
2157 shader->info.cs.derivative_group != DERIVATIVE_GROUP_NONE));
2158 }
2159
2160 nir_intrinsic_op
nir_intrinsic_from_system_value(gl_system_value val)2161 nir_intrinsic_from_system_value(gl_system_value val)
2162 {
2163 switch (val) {
2164 case SYSTEM_VALUE_VERTEX_ID:
2165 return nir_intrinsic_load_vertex_id;
2166 case SYSTEM_VALUE_INSTANCE_ID:
2167 return nir_intrinsic_load_instance_id;
2168 case SYSTEM_VALUE_DRAW_ID:
2169 return nir_intrinsic_load_draw_id;
2170 case SYSTEM_VALUE_BASE_INSTANCE:
2171 return nir_intrinsic_load_base_instance;
2172 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
2173 return nir_intrinsic_load_vertex_id_zero_base;
2174 case SYSTEM_VALUE_IS_INDEXED_DRAW:
2175 return nir_intrinsic_load_is_indexed_draw;
2176 case SYSTEM_VALUE_FIRST_VERTEX:
2177 return nir_intrinsic_load_first_vertex;
2178 case SYSTEM_VALUE_BASE_VERTEX:
2179 return nir_intrinsic_load_base_vertex;
2180 case SYSTEM_VALUE_INVOCATION_ID:
2181 return nir_intrinsic_load_invocation_id;
2182 case SYSTEM_VALUE_FRAG_COORD:
2183 return nir_intrinsic_load_frag_coord;
2184 case SYSTEM_VALUE_POINT_COORD:
2185 return nir_intrinsic_load_point_coord;
2186 case SYSTEM_VALUE_LINE_COORD:
2187 return nir_intrinsic_load_line_coord;
2188 case SYSTEM_VALUE_FRONT_FACE:
2189 return nir_intrinsic_load_front_face;
2190 case SYSTEM_VALUE_SAMPLE_ID:
2191 return nir_intrinsic_load_sample_id;
2192 case SYSTEM_VALUE_SAMPLE_POS:
2193 return nir_intrinsic_load_sample_pos;
2194 case SYSTEM_VALUE_SAMPLE_POS_OR_CENTER:
2195 return nir_intrinsic_load_sample_pos_or_center;
2196 case SYSTEM_VALUE_SAMPLE_MASK_IN:
2197 return nir_intrinsic_load_sample_mask_in;
2198 case SYSTEM_VALUE_LAYER_ID:
2199 return nir_intrinsic_load_layer_id;
2200 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
2201 return nir_intrinsic_load_local_invocation_id;
2202 case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
2203 return nir_intrinsic_load_local_invocation_index;
2204 case SYSTEM_VALUE_WORKGROUP_ID:
2205 return nir_intrinsic_load_workgroup_id;
2206 case SYSTEM_VALUE_WORKGROUP_INDEX:
2207 return nir_intrinsic_load_workgroup_index;
2208 case SYSTEM_VALUE_NUM_WORKGROUPS:
2209 return nir_intrinsic_load_num_workgroups;
2210 case SYSTEM_VALUE_PRIMITIVE_ID:
2211 return nir_intrinsic_load_primitive_id;
2212 case SYSTEM_VALUE_TESS_COORD:
2213 return nir_intrinsic_load_tess_coord;
2214 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
2215 return nir_intrinsic_load_tess_level_outer;
2216 case SYSTEM_VALUE_TESS_LEVEL_INNER:
2217 return nir_intrinsic_load_tess_level_inner;
2218 case SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT:
2219 return nir_intrinsic_load_tess_level_outer_default;
2220 case SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT:
2221 return nir_intrinsic_load_tess_level_inner_default;
2222 case SYSTEM_VALUE_VERTICES_IN:
2223 return nir_intrinsic_load_patch_vertices_in;
2224 case SYSTEM_VALUE_HELPER_INVOCATION:
2225 return nir_intrinsic_load_helper_invocation;
2226 case SYSTEM_VALUE_COLOR0:
2227 return nir_intrinsic_load_color0;
2228 case SYSTEM_VALUE_COLOR1:
2229 return nir_intrinsic_load_color1;
2230 case SYSTEM_VALUE_VIEW_INDEX:
2231 return nir_intrinsic_load_view_index;
2232 case SYSTEM_VALUE_SUBGROUP_SIZE:
2233 return nir_intrinsic_load_subgroup_size;
2234 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
2235 return nir_intrinsic_load_subgroup_invocation;
2236 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
2237 return nir_intrinsic_load_subgroup_eq_mask;
2238 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
2239 return nir_intrinsic_load_subgroup_ge_mask;
2240 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
2241 return nir_intrinsic_load_subgroup_gt_mask;
2242 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
2243 return nir_intrinsic_load_subgroup_le_mask;
2244 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
2245 return nir_intrinsic_load_subgroup_lt_mask;
2246 case SYSTEM_VALUE_NUM_SUBGROUPS:
2247 return nir_intrinsic_load_num_subgroups;
2248 case SYSTEM_VALUE_SUBGROUP_ID:
2249 return nir_intrinsic_load_subgroup_id;
2250 case SYSTEM_VALUE_WORKGROUP_SIZE:
2251 return nir_intrinsic_load_workgroup_size;
2252 case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
2253 return nir_intrinsic_load_global_invocation_id;
2254 case SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID:
2255 return nir_intrinsic_load_base_global_invocation_id;
2256 case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX:
2257 return nir_intrinsic_load_global_invocation_index;
2258 case SYSTEM_VALUE_WORK_DIM:
2259 return nir_intrinsic_load_work_dim;
2260 case SYSTEM_VALUE_USER_DATA_AMD:
2261 return nir_intrinsic_load_user_data_amd;
2262 case SYSTEM_VALUE_RAY_LAUNCH_ID:
2263 return nir_intrinsic_load_ray_launch_id;
2264 case SYSTEM_VALUE_RAY_LAUNCH_SIZE:
2265 return nir_intrinsic_load_ray_launch_size;
2266 case SYSTEM_VALUE_RAY_WORLD_ORIGIN:
2267 return nir_intrinsic_load_ray_world_origin;
2268 case SYSTEM_VALUE_RAY_WORLD_DIRECTION:
2269 return nir_intrinsic_load_ray_world_direction;
2270 case SYSTEM_VALUE_RAY_OBJECT_ORIGIN:
2271 return nir_intrinsic_load_ray_object_origin;
2272 case SYSTEM_VALUE_RAY_OBJECT_DIRECTION:
2273 return nir_intrinsic_load_ray_object_direction;
2274 case SYSTEM_VALUE_RAY_T_MIN:
2275 return nir_intrinsic_load_ray_t_min;
2276 case SYSTEM_VALUE_RAY_T_MAX:
2277 return nir_intrinsic_load_ray_t_max;
2278 case SYSTEM_VALUE_RAY_OBJECT_TO_WORLD:
2279 return nir_intrinsic_load_ray_object_to_world;
2280 case SYSTEM_VALUE_RAY_WORLD_TO_OBJECT:
2281 return nir_intrinsic_load_ray_world_to_object;
2282 case SYSTEM_VALUE_RAY_HIT_KIND:
2283 return nir_intrinsic_load_ray_hit_kind;
2284 case SYSTEM_VALUE_RAY_FLAGS:
2285 return nir_intrinsic_load_ray_flags;
2286 case SYSTEM_VALUE_RAY_GEOMETRY_INDEX:
2287 return nir_intrinsic_load_ray_geometry_index;
2288 case SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX:
2289 return nir_intrinsic_load_ray_instance_custom_index;
2290 case SYSTEM_VALUE_CULL_MASK:
2291 return nir_intrinsic_load_cull_mask;
2292 case SYSTEM_VALUE_RAY_TRIANGLE_VERTEX_POSITIONS:
2293 return nir_intrinsic_load_ray_triangle_vertex_positions;
2294 case SYSTEM_VALUE_MESH_VIEW_COUNT:
2295 return nir_intrinsic_load_mesh_view_count;
2296 case SYSTEM_VALUE_FRAG_SHADING_RATE:
2297 return nir_intrinsic_load_frag_shading_rate;
2298 case SYSTEM_VALUE_FULLY_COVERED:
2299 return nir_intrinsic_load_fully_covered;
2300 case SYSTEM_VALUE_FRAG_SIZE:
2301 return nir_intrinsic_load_frag_size;
2302 case SYSTEM_VALUE_FRAG_INVOCATION_COUNT:
2303 return nir_intrinsic_load_frag_invocation_count;
2304 case SYSTEM_VALUE_SHADER_INDEX:
2305 return nir_intrinsic_load_shader_index;
2306 case SYSTEM_VALUE_COALESCED_INPUT_COUNT:
2307 return nir_intrinsic_load_coalesced_input_count;
2308 case SYSTEM_VALUE_WARPS_PER_SM_NV:
2309 return nir_intrinsic_load_warps_per_sm_nv;
2310 case SYSTEM_VALUE_SM_COUNT_NV:
2311 return nir_intrinsic_load_sm_count_nv;
2312 case SYSTEM_VALUE_WARP_ID_NV:
2313 return nir_intrinsic_load_warp_id_nv;
2314 case SYSTEM_VALUE_SM_ID_NV:
2315 return nir_intrinsic_load_sm_id_nv;
2316 default:
2317 unreachable("system value does not directly correspond to intrinsic");
2318 }
2319 }
2320
2321 gl_system_value
nir_system_value_from_intrinsic(nir_intrinsic_op intrin)2322 nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
2323 {
2324 switch (intrin) {
2325 case nir_intrinsic_load_vertex_id:
2326 return SYSTEM_VALUE_VERTEX_ID;
2327 case nir_intrinsic_load_instance_id:
2328 return SYSTEM_VALUE_INSTANCE_ID;
2329 case nir_intrinsic_load_draw_id:
2330 return SYSTEM_VALUE_DRAW_ID;
2331 case nir_intrinsic_load_base_instance:
2332 return SYSTEM_VALUE_BASE_INSTANCE;
2333 case nir_intrinsic_load_vertex_id_zero_base:
2334 return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
2335 case nir_intrinsic_load_first_vertex:
2336 return SYSTEM_VALUE_FIRST_VERTEX;
2337 case nir_intrinsic_load_is_indexed_draw:
2338 return SYSTEM_VALUE_IS_INDEXED_DRAW;
2339 case nir_intrinsic_load_base_vertex:
2340 return SYSTEM_VALUE_BASE_VERTEX;
2341 case nir_intrinsic_load_invocation_id:
2342 return SYSTEM_VALUE_INVOCATION_ID;
2343 case nir_intrinsic_load_frag_coord:
2344 return SYSTEM_VALUE_FRAG_COORD;
2345 case nir_intrinsic_load_point_coord:
2346 return SYSTEM_VALUE_POINT_COORD;
2347 case nir_intrinsic_load_line_coord:
2348 return SYSTEM_VALUE_LINE_COORD;
2349 case nir_intrinsic_load_front_face:
2350 return SYSTEM_VALUE_FRONT_FACE;
2351 case nir_intrinsic_load_sample_id:
2352 return SYSTEM_VALUE_SAMPLE_ID;
2353 case nir_intrinsic_load_sample_pos:
2354 return SYSTEM_VALUE_SAMPLE_POS;
2355 case nir_intrinsic_load_sample_pos_or_center:
2356 return SYSTEM_VALUE_SAMPLE_POS_OR_CENTER;
2357 case nir_intrinsic_load_sample_mask_in:
2358 return SYSTEM_VALUE_SAMPLE_MASK_IN;
2359 case nir_intrinsic_load_layer_id:
2360 return SYSTEM_VALUE_LAYER_ID;
2361 case nir_intrinsic_load_local_invocation_id:
2362 return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
2363 case nir_intrinsic_load_local_invocation_index:
2364 return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
2365 case nir_intrinsic_load_num_workgroups:
2366 return SYSTEM_VALUE_NUM_WORKGROUPS;
2367 case nir_intrinsic_load_workgroup_id:
2368 return SYSTEM_VALUE_WORKGROUP_ID;
2369 case nir_intrinsic_load_workgroup_index:
2370 return SYSTEM_VALUE_WORKGROUP_INDEX;
2371 case nir_intrinsic_load_primitive_id:
2372 return SYSTEM_VALUE_PRIMITIVE_ID;
2373 case nir_intrinsic_load_tess_coord:
2374 case nir_intrinsic_load_tess_coord_xy:
2375 return SYSTEM_VALUE_TESS_COORD;
2376 case nir_intrinsic_load_tess_level_outer:
2377 return SYSTEM_VALUE_TESS_LEVEL_OUTER;
2378 case nir_intrinsic_load_tess_level_inner:
2379 return SYSTEM_VALUE_TESS_LEVEL_INNER;
2380 case nir_intrinsic_load_tess_level_outer_default:
2381 return SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT;
2382 case nir_intrinsic_load_tess_level_inner_default:
2383 return SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT;
2384 case nir_intrinsic_load_patch_vertices_in:
2385 return SYSTEM_VALUE_VERTICES_IN;
2386 case nir_intrinsic_load_helper_invocation:
2387 return SYSTEM_VALUE_HELPER_INVOCATION;
2388 case nir_intrinsic_load_color0:
2389 return SYSTEM_VALUE_COLOR0;
2390 case nir_intrinsic_load_color1:
2391 return SYSTEM_VALUE_COLOR1;
2392 case nir_intrinsic_load_view_index:
2393 return SYSTEM_VALUE_VIEW_INDEX;
2394 case nir_intrinsic_load_subgroup_size:
2395 return SYSTEM_VALUE_SUBGROUP_SIZE;
2396 case nir_intrinsic_load_subgroup_invocation:
2397 return SYSTEM_VALUE_SUBGROUP_INVOCATION;
2398 case nir_intrinsic_load_subgroup_eq_mask:
2399 return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
2400 case nir_intrinsic_load_subgroup_ge_mask:
2401 return SYSTEM_VALUE_SUBGROUP_GE_MASK;
2402 case nir_intrinsic_load_subgroup_gt_mask:
2403 return SYSTEM_VALUE_SUBGROUP_GT_MASK;
2404 case nir_intrinsic_load_subgroup_le_mask:
2405 return SYSTEM_VALUE_SUBGROUP_LE_MASK;
2406 case nir_intrinsic_load_subgroup_lt_mask:
2407 return SYSTEM_VALUE_SUBGROUP_LT_MASK;
2408 case nir_intrinsic_load_num_subgroups:
2409 return SYSTEM_VALUE_NUM_SUBGROUPS;
2410 case nir_intrinsic_load_subgroup_id:
2411 return SYSTEM_VALUE_SUBGROUP_ID;
2412 case nir_intrinsic_load_workgroup_size:
2413 return SYSTEM_VALUE_WORKGROUP_SIZE;
2414 case nir_intrinsic_load_global_invocation_id:
2415 return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
2416 case nir_intrinsic_load_base_global_invocation_id:
2417 return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID;
2418 case nir_intrinsic_load_global_invocation_index:
2419 return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX;
2420 case nir_intrinsic_load_work_dim:
2421 return SYSTEM_VALUE_WORK_DIM;
2422 case nir_intrinsic_load_user_data_amd:
2423 return SYSTEM_VALUE_USER_DATA_AMD;
2424 case nir_intrinsic_load_barycentric_model:
2425 return SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL;
2426 case nir_intrinsic_load_gs_header_ir3:
2427 return SYSTEM_VALUE_GS_HEADER_IR3;
2428 case nir_intrinsic_load_tcs_header_ir3:
2429 return SYSTEM_VALUE_TCS_HEADER_IR3;
2430 case nir_intrinsic_load_ray_launch_id:
2431 return SYSTEM_VALUE_RAY_LAUNCH_ID;
2432 case nir_intrinsic_load_ray_launch_size:
2433 return SYSTEM_VALUE_RAY_LAUNCH_SIZE;
2434 case nir_intrinsic_load_ray_world_origin:
2435 return SYSTEM_VALUE_RAY_WORLD_ORIGIN;
2436 case nir_intrinsic_load_ray_world_direction:
2437 return SYSTEM_VALUE_RAY_WORLD_DIRECTION;
2438 case nir_intrinsic_load_ray_object_origin:
2439 return SYSTEM_VALUE_RAY_OBJECT_ORIGIN;
2440 case nir_intrinsic_load_ray_object_direction:
2441 return SYSTEM_VALUE_RAY_OBJECT_DIRECTION;
2442 case nir_intrinsic_load_ray_t_min:
2443 return SYSTEM_VALUE_RAY_T_MIN;
2444 case nir_intrinsic_load_ray_t_max:
2445 return SYSTEM_VALUE_RAY_T_MAX;
2446 case nir_intrinsic_load_ray_object_to_world:
2447 return SYSTEM_VALUE_RAY_OBJECT_TO_WORLD;
2448 case nir_intrinsic_load_ray_world_to_object:
2449 return SYSTEM_VALUE_RAY_WORLD_TO_OBJECT;
2450 case nir_intrinsic_load_ray_hit_kind:
2451 return SYSTEM_VALUE_RAY_HIT_KIND;
2452 case nir_intrinsic_load_ray_flags:
2453 return SYSTEM_VALUE_RAY_FLAGS;
2454 case nir_intrinsic_load_ray_geometry_index:
2455 return SYSTEM_VALUE_RAY_GEOMETRY_INDEX;
2456 case nir_intrinsic_load_ray_instance_custom_index:
2457 return SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX;
2458 case nir_intrinsic_load_cull_mask:
2459 return SYSTEM_VALUE_CULL_MASK;
2460 case nir_intrinsic_load_ray_triangle_vertex_positions:
2461 return SYSTEM_VALUE_RAY_TRIANGLE_VERTEX_POSITIONS;
2462 case nir_intrinsic_load_frag_shading_rate:
2463 return SYSTEM_VALUE_FRAG_SHADING_RATE;
2464 case nir_intrinsic_load_mesh_view_count:
2465 return SYSTEM_VALUE_MESH_VIEW_COUNT;
2466 case nir_intrinsic_load_fully_covered:
2467 return SYSTEM_VALUE_FULLY_COVERED;
2468 case nir_intrinsic_load_frag_size:
2469 return SYSTEM_VALUE_FRAG_SIZE;
2470 case nir_intrinsic_load_frag_invocation_count:
2471 return SYSTEM_VALUE_FRAG_INVOCATION_COUNT;
2472 case nir_intrinsic_load_shader_index:
2473 return SYSTEM_VALUE_SHADER_INDEX;
2474 case nir_intrinsic_load_coalesced_input_count:
2475 return SYSTEM_VALUE_COALESCED_INPUT_COUNT;
2476 case nir_intrinsic_load_warps_per_sm_nv:
2477 return SYSTEM_VALUE_WARPS_PER_SM_NV;
2478 case nir_intrinsic_load_sm_count_nv:
2479 return SYSTEM_VALUE_SM_COUNT_NV;
2480 case nir_intrinsic_load_warp_id_nv:
2481 return SYSTEM_VALUE_WARP_ID_NV;
2482 case nir_intrinsic_load_sm_id_nv:
2483 return SYSTEM_VALUE_SM_ID_NV;
2484 default:
2485 unreachable("intrinsic doesn't produce a system value");
2486 }
2487 }
2488
2489 /* OpenGL utility method that remaps the location attributes if they are
2490 * doubles. Not needed for vulkan due the differences on the input location
2491 * count for doubles on vulkan vs OpenGL
2492 *
2493 * The bitfield returned in dual_slot is one bit for each double input slot in
2494 * the original OpenGL single-slot input numbering. The mapping from old
2495 * locations to new locations is as follows:
2496 *
2497 * new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc))
2498 */
2499 void
nir_remap_dual_slot_attributes(nir_shader * shader,uint64_t * dual_slot)2500 nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot)
2501 {
2502 assert(shader->info.stage == MESA_SHADER_VERTEX);
2503
2504 *dual_slot = 0;
2505 nir_foreach_shader_in_variable(var, shader) {
2506 if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
2507 unsigned slots = glsl_count_attribute_slots(var->type, true);
2508 *dual_slot |= BITFIELD64_MASK(slots) << var->data.location;
2509 }
2510 }
2511
2512 nir_foreach_shader_in_variable(var, shader) {
2513 var->data.location +=
2514 util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location));
2515 }
2516 }
2517
2518 /* Returns an attribute mask that has been re-compacted using the given
2519 * dual_slot mask.
2520 */
2521 uint64_t
nir_get_single_slot_attribs_mask(uint64_t attribs,uint64_t dual_slot)2522 nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot)
2523 {
2524 while (dual_slot) {
2525 unsigned loc = u_bit_scan64(&dual_slot);
2526 /* mask of all bits up to and including loc */
2527 uint64_t mask = BITFIELD64_MASK(loc + 1);
2528 attribs = (attribs & mask) | ((attribs & ~mask) >> 1);
2529 }
2530 return attribs;
2531 }
2532
2533 void
nir_rewrite_image_intrinsic(nir_intrinsic_instr * intrin,nir_def * src,bool bindless)2534 nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_def *src,
2535 bool bindless)
2536 {
2537 enum gl_access_qualifier access = nir_intrinsic_access(intrin);
2538
2539 /* Image intrinsics only have one of these */
2540 assert(!nir_intrinsic_has_src_type(intrin) ||
2541 !nir_intrinsic_has_dest_type(intrin));
2542
2543 nir_alu_type data_type = nir_type_invalid;
2544 if (nir_intrinsic_has_src_type(intrin))
2545 data_type = nir_intrinsic_src_type(intrin);
2546 if (nir_intrinsic_has_dest_type(intrin))
2547 data_type = nir_intrinsic_dest_type(intrin);
2548
2549 nir_atomic_op atomic_op = 0;
2550 if (nir_intrinsic_has_atomic_op(intrin))
2551 atomic_op = nir_intrinsic_atomic_op(intrin);
2552
2553 switch (intrin->intrinsic) {
2554 #define CASE(op) \
2555 case nir_intrinsic_image_deref_##op: \
2556 intrin->intrinsic = bindless ? nir_intrinsic_bindless_image_##op \
2557 : nir_intrinsic_image_##op; \
2558 break;
2559 CASE(load)
2560 CASE(sparse_load)
2561 CASE(store)
2562 CASE(atomic)
2563 CASE(atomic_swap)
2564 CASE(size)
2565 CASE(samples)
2566 CASE(load_raw_intel)
2567 CASE(store_raw_intel)
2568 CASE(fragment_mask_load_amd)
2569 #undef CASE
2570 default:
2571 unreachable("Unhanded image intrinsic");
2572 }
2573
2574 nir_variable *var = nir_intrinsic_get_var(intrin, 0);
2575
2576 /* Only update the format if the intrinsic doesn't have one set */
2577 if (nir_intrinsic_format(intrin) == PIPE_FORMAT_NONE)
2578 nir_intrinsic_set_format(intrin, var->data.image.format);
2579
2580 nir_intrinsic_set_access(intrin, access | var->data.access);
2581 if (nir_intrinsic_has_src_type(intrin))
2582 nir_intrinsic_set_src_type(intrin, data_type);
2583 if (nir_intrinsic_has_dest_type(intrin))
2584 nir_intrinsic_set_dest_type(intrin, data_type);
2585
2586 if (nir_intrinsic_has_atomic_op(intrin))
2587 nir_intrinsic_set_atomic_op(intrin, atomic_op);
2588
2589 nir_src_rewrite(&intrin->src[0], src);
2590 }
2591
2592 unsigned
nir_image_intrinsic_coord_components(const nir_intrinsic_instr * instr)2593 nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr)
2594 {
2595 enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
2596 int coords = glsl_get_sampler_dim_coordinate_components(dim);
2597 if (dim == GLSL_SAMPLER_DIM_CUBE)
2598 return coords;
2599 else
2600 return coords + nir_intrinsic_image_array(instr);
2601 }
2602
2603 nir_src *
nir_get_shader_call_payload_src(nir_intrinsic_instr * call)2604 nir_get_shader_call_payload_src(nir_intrinsic_instr *call)
2605 {
2606 switch (call->intrinsic) {
2607 case nir_intrinsic_trace_ray:
2608 case nir_intrinsic_rt_trace_ray:
2609 return &call->src[10];
2610 case nir_intrinsic_execute_callable:
2611 case nir_intrinsic_rt_execute_callable:
2612 return &call->src[1];
2613 default:
2614 unreachable("Not a call intrinsic");
2615 return NULL;
2616 }
2617 }
2618
2619 nir_binding
nir_chase_binding(nir_src rsrc)2620 nir_chase_binding(nir_src rsrc)
2621 {
2622 nir_binding res = { 0 };
2623 if (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2624 const struct glsl_type *type = glsl_without_array(nir_src_as_deref(rsrc)->type);
2625 bool is_image = glsl_type_is_image(type) || glsl_type_is_sampler(type);
2626 while (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2627 nir_deref_instr *deref = nir_src_as_deref(rsrc);
2628
2629 if (deref->deref_type == nir_deref_type_var) {
2630 res.success = true;
2631 res.var = deref->var;
2632 res.desc_set = deref->var->data.descriptor_set;
2633 res.binding = deref->var->data.binding;
2634 return res;
2635 } else if (deref->deref_type == nir_deref_type_array && is_image) {
2636 if (res.num_indices == ARRAY_SIZE(res.indices))
2637 return (nir_binding){ 0 };
2638 res.indices[res.num_indices++] = deref->arr.index;
2639 }
2640
2641 rsrc = deref->parent;
2642 }
2643 }
2644
2645 /* Skip copies and trimming. Trimming can appear as nir_op_mov instructions
2646 * when removing the offset from addresses. We also consider
2647 * nir_op_is_vec_or_mov() instructions to skip trimming of
2648 * vec2_index_32bit_offset addresses after lowering ALU to scalar.
2649 */
2650 unsigned num_components = nir_src_num_components(rsrc);
2651 while (true) {
2652 nir_alu_instr *alu = nir_src_as_alu_instr(rsrc);
2653 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2654 if (alu && alu->op == nir_op_mov) {
2655 for (unsigned i = 0; i < num_components; i++) {
2656 if (alu->src[0].swizzle[i] != i)
2657 return (nir_binding){ 0 };
2658 }
2659 rsrc = alu->src[0].src;
2660 } else if (alu && nir_op_is_vec(alu->op)) {
2661 for (unsigned i = 0; i < num_components; i++) {
2662 if (alu->src[i].swizzle[0] != i || alu->src[i].src.ssa != alu->src[0].src.ssa)
2663 return (nir_binding){ 0 };
2664 }
2665 rsrc = alu->src[0].src;
2666 } else if (intrin && intrin->intrinsic == nir_intrinsic_read_first_invocation) {
2667 /* The caller might want to be aware if only the first invocation of
2668 * the indices are used.
2669 */
2670 res.read_first_invocation = true;
2671 rsrc = intrin->src[0];
2672 } else {
2673 break;
2674 }
2675 }
2676
2677 if (nir_src_is_const(rsrc)) {
2678 /* GL binding model after deref lowering */
2679 res.success = true;
2680 /* Can't use just nir_src_as_uint. Vulkan resource index produces a
2681 * vec2. Some drivers lower it to vec1 (to handle get_ssbo_size for
2682 * example) but others just keep it around as a vec2 (v3dv).
2683 */
2684 res.binding = nir_src_comp_as_uint(rsrc, 0);
2685 return res;
2686 }
2687
2688 /* otherwise, must be Vulkan binding model after deref lowering or GL bindless */
2689
2690 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2691 if (!intrin)
2692 return (nir_binding){ 0 };
2693
2694 /* Intel resource, similar to load_vulkan_descriptor after it has been
2695 * lowered.
2696 */
2697 if (intrin->intrinsic == nir_intrinsic_resource_intel) {
2698 res.success = true;
2699 res.desc_set = nir_intrinsic_desc_set(intrin);
2700 res.binding = nir_intrinsic_binding(intrin);
2701 /* nir_intrinsic_resource_intel has 3 sources, but src[2] is included in
2702 * src[1], it is kept around for other purposes.
2703 */
2704 res.num_indices = 2;
2705 res.indices[0] = intrin->src[0];
2706 res.indices[1] = intrin->src[1];
2707 return res;
2708 }
2709
2710 /* skip load_vulkan_descriptor */
2711 if (intrin->intrinsic == nir_intrinsic_load_vulkan_descriptor) {
2712 intrin = nir_src_as_intrinsic(intrin->src[0]);
2713 if (!intrin)
2714 return (nir_binding){ 0 };
2715 }
2716
2717 if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
2718 return (nir_binding){ 0 };
2719
2720 assert(res.num_indices == 0);
2721 res.success = true;
2722 res.desc_set = nir_intrinsic_desc_set(intrin);
2723 res.binding = nir_intrinsic_binding(intrin);
2724 res.num_indices = 1;
2725 res.indices[0] = intrin->src[0];
2726 return res;
2727 }
2728
2729 nir_variable *
nir_get_binding_variable(nir_shader * shader,nir_binding binding)2730 nir_get_binding_variable(nir_shader *shader, nir_binding binding)
2731 {
2732 nir_variable *binding_var = NULL;
2733 unsigned count = 0;
2734
2735 if (!binding.success)
2736 return NULL;
2737
2738 if (binding.var)
2739 return binding.var;
2740
2741 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ubo | nir_var_mem_ssbo) {
2742 if (var->data.descriptor_set == binding.desc_set && var->data.binding == binding.binding) {
2743 binding_var = var;
2744 count++;
2745 }
2746 }
2747
2748 /* Be conservative if another variable is using the same binding/desc_set
2749 * because the access mask might be different and we can't get it reliably.
2750 */
2751 if (count > 1)
2752 return NULL;
2753
2754 return binding_var;
2755 }
2756
2757 nir_scalar
nir_scalar_chase_movs(nir_scalar s)2758 nir_scalar_chase_movs(nir_scalar s)
2759 {
2760 while (nir_scalar_is_alu(s)) {
2761 nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
2762 if (alu->op == nir_op_mov) {
2763 s.def = alu->src[0].src.ssa;
2764 s.comp = alu->src[0].swizzle[s.comp];
2765 } else if (nir_op_is_vec(alu->op)) {
2766 s.def = alu->src[s.comp].src.ssa;
2767 s.comp = alu->src[s.comp].swizzle[0];
2768 } else {
2769 break;
2770 }
2771 }
2772
2773 return s;
2774 }
2775
2776 nir_alu_type
nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)2777 nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)
2778 {
2779 switch (base_type) {
2780 /* clang-format off */
2781 case GLSL_TYPE_BOOL: return nir_type_bool1;
2782 case GLSL_TYPE_UINT: return nir_type_uint32;
2783 case GLSL_TYPE_INT: return nir_type_int32;
2784 case GLSL_TYPE_UINT16: return nir_type_uint16;
2785 case GLSL_TYPE_INT16: return nir_type_int16;
2786 case GLSL_TYPE_UINT8: return nir_type_uint8;
2787 case GLSL_TYPE_INT8: return nir_type_int8;
2788 case GLSL_TYPE_UINT64: return nir_type_uint64;
2789 case GLSL_TYPE_INT64: return nir_type_int64;
2790 case GLSL_TYPE_FLOAT: return nir_type_float32;
2791 case GLSL_TYPE_FLOAT16: return nir_type_float16;
2792 case GLSL_TYPE_DOUBLE: return nir_type_float64;
2793 /* clang-format on */
2794
2795 case GLSL_TYPE_COOPERATIVE_MATRIX:
2796 case GLSL_TYPE_SAMPLER:
2797 case GLSL_TYPE_TEXTURE:
2798 case GLSL_TYPE_IMAGE:
2799 case GLSL_TYPE_ATOMIC_UINT:
2800 case GLSL_TYPE_STRUCT:
2801 case GLSL_TYPE_INTERFACE:
2802 case GLSL_TYPE_ARRAY:
2803 case GLSL_TYPE_VOID:
2804 case GLSL_TYPE_SUBROUTINE:
2805 case GLSL_TYPE_ERROR:
2806 return nir_type_invalid;
2807 }
2808
2809 unreachable("unknown type");
2810 }
2811
2812 enum glsl_base_type
nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)2813 nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)
2814 {
2815 /* clang-format off */
2816 switch (base_type) {
2817 case nir_type_bool1: return GLSL_TYPE_BOOL;
2818 case nir_type_uint32: return GLSL_TYPE_UINT;
2819 case nir_type_int32: return GLSL_TYPE_INT;
2820 case nir_type_uint16: return GLSL_TYPE_UINT16;
2821 case nir_type_int16: return GLSL_TYPE_INT16;
2822 case nir_type_uint8: return GLSL_TYPE_UINT8;
2823 case nir_type_int8: return GLSL_TYPE_INT8;
2824 case nir_type_uint64: return GLSL_TYPE_UINT64;
2825 case nir_type_int64: return GLSL_TYPE_INT64;
2826 case nir_type_float32: return GLSL_TYPE_FLOAT;
2827 case nir_type_float16: return GLSL_TYPE_FLOAT16;
2828 case nir_type_float64: return GLSL_TYPE_DOUBLE;
2829 default: unreachable("Not a sized nir_alu_type");
2830 }
2831 /* clang-format on */
2832 }
2833
2834 nir_op
nir_op_vec(unsigned num_components)2835 nir_op_vec(unsigned num_components)
2836 {
2837 /* clang-format off */
2838 switch (num_components) {
2839 case 1: return nir_op_mov;
2840 case 2: return nir_op_vec2;
2841 case 3: return nir_op_vec3;
2842 case 4: return nir_op_vec4;
2843 case 5: return nir_op_vec5;
2844 case 8: return nir_op_vec8;
2845 case 16: return nir_op_vec16;
2846 default: unreachable("bad component count");
2847 }
2848 /* clang-format on */
2849 }
2850
2851 bool
nir_op_is_vec(nir_op op)2852 nir_op_is_vec(nir_op op)
2853 {
2854 switch (op) {
2855 case nir_op_vec2:
2856 case nir_op_vec3:
2857 case nir_op_vec4:
2858 case nir_op_vec5:
2859 case nir_op_vec8:
2860 case nir_op_vec16:
2861 return true;
2862 default:
2863 return false;
2864 }
2865 }
2866
2867 nir_component_mask_t
nir_alu_instr_src_read_mask(const nir_alu_instr * instr,unsigned src)2868 nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src)
2869 {
2870 nir_component_mask_t read_mask = 0;
2871 for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
2872 if (!nir_alu_instr_channel_used(instr, src, c))
2873 continue;
2874
2875 read_mask |= (1 << instr->src[src].swizzle[c]);
2876 }
2877 return read_mask;
2878 }
2879
2880 unsigned
nir_ssa_alu_instr_src_components(const nir_alu_instr * instr,unsigned src)2881 nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
2882 {
2883 if (nir_op_infos[instr->op].input_sizes[src] > 0)
2884 return nir_op_infos[instr->op].input_sizes[src];
2885
2886 return instr->def.num_components;
2887 }
2888
2889 #define CASE_ALL_SIZES(op) \
2890 case op: \
2891 case op##8: \
2892 case op##16: \
2893 case op##32:
2894
2895 bool
nir_alu_instr_is_comparison(const nir_alu_instr * instr)2896 nir_alu_instr_is_comparison(const nir_alu_instr *instr)
2897 {
2898 switch (instr->op) {
2899 CASE_ALL_SIZES(nir_op_flt)
2900 CASE_ALL_SIZES(nir_op_fge)
2901 CASE_ALL_SIZES(nir_op_feq)
2902 CASE_ALL_SIZES(nir_op_fneu)
2903 CASE_ALL_SIZES(nir_op_ilt)
2904 CASE_ALL_SIZES(nir_op_ult)
2905 CASE_ALL_SIZES(nir_op_ige)
2906 CASE_ALL_SIZES(nir_op_uge)
2907 CASE_ALL_SIZES(nir_op_ieq)
2908 CASE_ALL_SIZES(nir_op_ine)
2909 CASE_ALL_SIZES(nir_op_bitz)
2910 CASE_ALL_SIZES(nir_op_bitnz)
2911 case nir_op_inot:
2912 return true;
2913 default:
2914 return false;
2915 }
2916 }
2917
2918 #undef CASE_ALL_SIZES
2919
2920 unsigned
nir_intrinsic_src_components(const nir_intrinsic_instr * intr,unsigned srcn)2921 nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn)
2922 {
2923 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
2924 assert(srcn < info->num_srcs);
2925 if (info->src_components[srcn] > 0)
2926 return info->src_components[srcn];
2927 else if (info->src_components[srcn] == 0)
2928 return intr->num_components;
2929 else
2930 return nir_src_num_components(intr->src[srcn]);
2931 }
2932
2933 unsigned
nir_intrinsic_dest_components(nir_intrinsic_instr * intr)2934 nir_intrinsic_dest_components(nir_intrinsic_instr *intr)
2935 {
2936 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
2937 if (!info->has_dest)
2938 return 0;
2939 else if (info->dest_components)
2940 return info->dest_components;
2941 else
2942 return intr->num_components;
2943 }
2944
2945 nir_alu_type
nir_intrinsic_instr_src_type(const nir_intrinsic_instr * intrin,unsigned src)2946 nir_intrinsic_instr_src_type(const nir_intrinsic_instr *intrin, unsigned src)
2947 {
2948 /* We could go nuts here, but we'll just handle a few simple
2949 * cases and let everything else be untyped.
2950 */
2951 switch (intrin->intrinsic) {
2952 case nir_intrinsic_store_deref: {
2953 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2954 if (src == 1)
2955 return nir_get_nir_type_for_glsl_type(deref->type);
2956 break;
2957 }
2958
2959 case nir_intrinsic_store_output:
2960 if (src == 0)
2961 return nir_intrinsic_src_type(intrin);
2962 break;
2963
2964 default:
2965 break;
2966 }
2967
2968 /* For the most part, we leave other intrinsics alone. Most
2969 * of them don't matter in OpenGL ES 2.0 drivers anyway.
2970 * However, we should at least check if this is some sort of
2971 * IO intrinsic and flag it's offset and index sources.
2972 */
2973 {
2974 int offset_src_idx = nir_get_io_offset_src_number(intrin);
2975 if (src == offset_src_idx) {
2976 const nir_src *offset_src = offset_src_idx >= 0 ? &intrin->src[offset_src_idx] : NULL;
2977 if (offset_src)
2978 return nir_type_int;
2979 }
2980 }
2981
2982 return nir_type_invalid;
2983 }
2984
2985 nir_alu_type
nir_intrinsic_instr_dest_type(const nir_intrinsic_instr * intrin)2986 nir_intrinsic_instr_dest_type(const nir_intrinsic_instr *intrin)
2987 {
2988 /* We could go nuts here, but we'll just handle a few simple
2989 * cases and let everything else be untyped.
2990 */
2991 switch (intrin->intrinsic) {
2992 case nir_intrinsic_load_deref: {
2993 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2994 return nir_get_nir_type_for_glsl_type(deref->type);
2995 }
2996
2997 case nir_intrinsic_load_input:
2998 case nir_intrinsic_load_uniform:
2999 return nir_intrinsic_dest_type(intrin);
3000
3001 default:
3002 break;
3003 }
3004
3005 return nir_type_invalid;
3006 }
3007
3008 /**
3009 * Helper to copy const_index[] from src to dst, without assuming they
3010 * match in order.
3011 */
3012 void
nir_intrinsic_copy_const_indices(nir_intrinsic_instr * dst,nir_intrinsic_instr * src)3013 nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src)
3014 {
3015 if (src->intrinsic == dst->intrinsic) {
3016 memcpy(dst->const_index, src->const_index, sizeof(dst->const_index));
3017 return;
3018 }
3019
3020 const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic];
3021 const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic];
3022
3023 for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) {
3024 if (src_info->index_map[i] == 0)
3025 continue;
3026
3027 /* require that dst instruction also uses the same const_index[]: */
3028 assert(dst_info->index_map[i] > 0);
3029
3030 dst->const_index[dst_info->index_map[i] - 1] =
3031 src->const_index[src_info->index_map[i] - 1];
3032 }
3033 }
3034
3035 bool
nir_tex_instr_need_sampler(const nir_tex_instr * instr)3036 nir_tex_instr_need_sampler(const nir_tex_instr *instr)
3037 {
3038 switch (instr->op) {
3039 case nir_texop_txf:
3040 case nir_texop_txf_ms:
3041 case nir_texop_txs:
3042 case nir_texop_query_levels:
3043 case nir_texop_texture_samples:
3044 case nir_texop_samples_identical:
3045 case nir_texop_descriptor_amd:
3046 return false;
3047 default:
3048 return true;
3049 }
3050 }
3051
3052 unsigned
nir_tex_instr_result_size(const nir_tex_instr * instr)3053 nir_tex_instr_result_size(const nir_tex_instr *instr)
3054 {
3055 switch (instr->op) {
3056 case nir_texop_txs: {
3057 unsigned ret;
3058 switch (instr->sampler_dim) {
3059 case GLSL_SAMPLER_DIM_1D:
3060 case GLSL_SAMPLER_DIM_BUF:
3061 ret = 1;
3062 break;
3063 case GLSL_SAMPLER_DIM_2D:
3064 case GLSL_SAMPLER_DIM_CUBE:
3065 case GLSL_SAMPLER_DIM_MS:
3066 case GLSL_SAMPLER_DIM_RECT:
3067 case GLSL_SAMPLER_DIM_EXTERNAL:
3068 case GLSL_SAMPLER_DIM_SUBPASS:
3069 ret = 2;
3070 break;
3071 case GLSL_SAMPLER_DIM_3D:
3072 ret = 3;
3073 break;
3074 default:
3075 unreachable("not reached");
3076 }
3077 if (instr->is_array)
3078 ret++;
3079 return ret;
3080 }
3081
3082 case nir_texop_lod:
3083 return 2;
3084
3085 case nir_texop_texture_samples:
3086 case nir_texop_query_levels:
3087 case nir_texop_samples_identical:
3088 case nir_texop_fragment_mask_fetch_amd:
3089 case nir_texop_lod_bias_agx:
3090 return 1;
3091
3092 case nir_texop_descriptor_amd:
3093 return instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8;
3094
3095 case nir_texop_sampler_descriptor_amd:
3096 return 4;
3097
3098 case nir_texop_hdr_dim_nv:
3099 case nir_texop_tex_type_nv:
3100 return 4;
3101
3102 default:
3103 if (instr->is_shadow && instr->is_new_style_shadow)
3104 return 1;
3105
3106 return 4;
3107 }
3108 }
3109
3110 bool
nir_tex_instr_is_query(const nir_tex_instr * instr)3111 nir_tex_instr_is_query(const nir_tex_instr *instr)
3112 {
3113 switch (instr->op) {
3114 case nir_texop_txs:
3115 case nir_texop_lod:
3116 case nir_texop_texture_samples:
3117 case nir_texop_query_levels:
3118 case nir_texop_descriptor_amd:
3119 case nir_texop_sampler_descriptor_amd:
3120 case nir_texop_lod_bias_agx:
3121 return true;
3122 case nir_texop_tex:
3123 case nir_texop_txb:
3124 case nir_texop_txl:
3125 case nir_texop_txd:
3126 case nir_texop_txf:
3127 case nir_texop_txf_ms:
3128 case nir_texop_txf_ms_fb:
3129 case nir_texop_txf_ms_mcs_intel:
3130 case nir_texop_tg4:
3131 case nir_texop_samples_identical:
3132 case nir_texop_fragment_mask_fetch_amd:
3133 case nir_texop_fragment_fetch_amd:
3134 return false;
3135 default:
3136 unreachable("Invalid texture opcode");
3137 }
3138 }
3139
3140 bool
nir_tex_instr_has_implicit_derivative(const nir_tex_instr * instr)3141 nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr)
3142 {
3143 switch (instr->op) {
3144 case nir_texop_tex:
3145 case nir_texop_txb:
3146 case nir_texop_lod:
3147 return true;
3148 case nir_texop_tg4:
3149 return instr->is_gather_implicit_lod;
3150 default:
3151 return false;
3152 }
3153 }
3154
3155 nir_alu_type
nir_tex_instr_src_type(const nir_tex_instr * instr,unsigned src)3156 nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
3157 {
3158 switch (instr->src[src].src_type) {
3159 case nir_tex_src_coord:
3160 switch (instr->op) {
3161 case nir_texop_txf:
3162 case nir_texop_txf_ms:
3163 case nir_texop_txf_ms_fb:
3164 case nir_texop_txf_ms_mcs_intel:
3165 case nir_texop_samples_identical:
3166 case nir_texop_fragment_fetch_amd:
3167 case nir_texop_fragment_mask_fetch_amd:
3168 return nir_type_int;
3169
3170 default:
3171 return nir_type_float;
3172 }
3173
3174 case nir_tex_src_lod:
3175 switch (instr->op) {
3176 case nir_texop_txs:
3177 case nir_texop_txf:
3178 case nir_texop_txf_ms:
3179 case nir_texop_fragment_fetch_amd:
3180 case nir_texop_fragment_mask_fetch_amd:
3181 return nir_type_int;
3182
3183 default:
3184 return nir_type_float;
3185 }
3186
3187 case nir_tex_src_projector:
3188 case nir_tex_src_comparator:
3189 case nir_tex_src_bias:
3190 case nir_tex_src_min_lod:
3191 case nir_tex_src_ddx:
3192 case nir_tex_src_ddy:
3193 case nir_tex_src_backend1:
3194 case nir_tex_src_backend2:
3195 return nir_type_float;
3196
3197 case nir_tex_src_offset:
3198 case nir_tex_src_ms_index:
3199 case nir_tex_src_plane:
3200 return nir_type_int;
3201
3202 case nir_tex_src_ms_mcs_intel:
3203 case nir_tex_src_texture_deref:
3204 case nir_tex_src_sampler_deref:
3205 case nir_tex_src_texture_offset:
3206 case nir_tex_src_sampler_offset:
3207 case nir_tex_src_texture_handle:
3208 case nir_tex_src_sampler_handle:
3209 return nir_type_uint;
3210
3211 case nir_num_tex_src_types:
3212 unreachable("nir_num_tex_src_types is not a valid source type");
3213 }
3214
3215 unreachable("Invalid texture source type");
3216 }
3217
3218 unsigned
nir_tex_instr_src_size(const nir_tex_instr * instr,unsigned src)3219 nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src)
3220 {
3221 if (instr->src[src].src_type == nir_tex_src_coord)
3222 return instr->coord_components;
3223
3224 /* The MCS value is expected to be a vec4 returned by a txf_ms_mcs_intel */
3225 if (instr->src[src].src_type == nir_tex_src_ms_mcs_intel)
3226 return 4;
3227
3228 if (instr->src[src].src_type == nir_tex_src_ddx ||
3229 instr->src[src].src_type == nir_tex_src_ddy) {
3230
3231 if (instr->is_array && !instr->array_is_lowered_cube)
3232 return instr->coord_components - 1;
3233 else
3234 return instr->coord_components;
3235 }
3236
3237 if (instr->src[src].src_type == nir_tex_src_offset) {
3238 if (instr->is_array)
3239 return instr->coord_components - 1;
3240 else
3241 return instr->coord_components;
3242 }
3243
3244 if (instr->src[src].src_type == nir_tex_src_backend1 ||
3245 instr->src[src].src_type == nir_tex_src_backend2)
3246 return nir_src_num_components(instr->src[src].src);
3247
3248 /* For AMD, this can be a vec8/vec4 image/sampler descriptor. */
3249 if (instr->src[src].src_type == nir_tex_src_texture_handle ||
3250 instr->src[src].src_type == nir_tex_src_sampler_handle)
3251 return 0;
3252
3253 return 1;
3254 }
3255
3256 /**
3257 * Return which components are written into transform feedback buffers.
3258 * The result is relative to 0, not "component".
3259 */
3260 unsigned
nir_instr_xfb_write_mask(nir_intrinsic_instr * instr)3261 nir_instr_xfb_write_mask(nir_intrinsic_instr *instr)
3262 {
3263 unsigned mask = 0;
3264
3265 if (nir_intrinsic_has_io_xfb(instr)) {
3266 unsigned wr_mask = nir_intrinsic_write_mask(instr) << nir_intrinsic_component(instr);
3267 assert((wr_mask & ~0xf) == 0); /* only 4 components allowed */
3268
3269 unsigned iter_mask = wr_mask;
3270 while (iter_mask) {
3271 unsigned i = u_bit_scan(&iter_mask);
3272 nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(instr) : nir_intrinsic_io_xfb2(instr);
3273 if (xfb.out[i % 2].num_components)
3274 mask |= BITFIELD_RANGE(i, xfb.out[i % 2].num_components) & wr_mask;
3275 }
3276 }
3277
3278 return mask;
3279 }
3280
3281 /**
3282 * Whether an output slot is consumed by fixed-function logic.
3283 */
3284 bool
nir_slot_is_sysval_output(gl_varying_slot slot,gl_shader_stage next_shader)3285 nir_slot_is_sysval_output(gl_varying_slot slot, gl_shader_stage next_shader)
3286 {
3287 switch (next_shader) {
3288 case MESA_SHADER_FRAGMENT:
3289 return slot == VARYING_SLOT_POS ||
3290 slot == VARYING_SLOT_PSIZ ||
3291 slot == VARYING_SLOT_EDGE ||
3292 slot == VARYING_SLOT_CLIP_VERTEX ||
3293 slot == VARYING_SLOT_CLIP_DIST0 ||
3294 slot == VARYING_SLOT_CLIP_DIST1 ||
3295 slot == VARYING_SLOT_CULL_DIST0 ||
3296 slot == VARYING_SLOT_CULL_DIST1 ||
3297 slot == VARYING_SLOT_LAYER ||
3298 slot == VARYING_SLOT_VIEWPORT ||
3299 slot == VARYING_SLOT_VIEW_INDEX ||
3300 slot == VARYING_SLOT_VIEWPORT_MASK ||
3301 slot == VARYING_SLOT_PRIMITIVE_SHADING_RATE ||
3302 /* NV_mesh_shader_only */
3303 slot == VARYING_SLOT_PRIMITIVE_COUNT ||
3304 slot == VARYING_SLOT_PRIMITIVE_INDICES;
3305
3306 case MESA_SHADER_TESS_EVAL:
3307 return slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
3308 slot == VARYING_SLOT_TESS_LEVEL_INNER ||
3309 slot == VARYING_SLOT_BOUNDING_BOX0 ||
3310 slot == VARYING_SLOT_BOUNDING_BOX1;
3311
3312 case MESA_SHADER_MESH:
3313 /* NV_mesh_shader only */
3314 return slot == VARYING_SLOT_TASK_COUNT;
3315
3316 case MESA_SHADER_NONE:
3317 /* NONE means unknown. Check all possibilities. */
3318 return nir_slot_is_sysval_output(slot, MESA_SHADER_FRAGMENT) ||
3319 nir_slot_is_sysval_output(slot, MESA_SHADER_TESS_EVAL) ||
3320 nir_slot_is_sysval_output(slot, MESA_SHADER_MESH);
3321
3322 default:
3323 /* No other shaders have preceding shaders with sysval outputs. */
3324 return false;
3325 }
3326 }
3327
3328 /**
3329 * Whether an input/output slot is consumed by the next shader stage,
3330 * or written by the previous shader stage.
3331 */
3332 bool
nir_slot_is_varying(gl_varying_slot slot)3333 nir_slot_is_varying(gl_varying_slot slot)
3334 {
3335 return slot >= VARYING_SLOT_VAR0 ||
3336 slot == VARYING_SLOT_COL0 ||
3337 slot == VARYING_SLOT_COL1 ||
3338 slot == VARYING_SLOT_BFC0 ||
3339 slot == VARYING_SLOT_BFC1 ||
3340 slot == VARYING_SLOT_FOGC ||
3341 (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) ||
3342 slot == VARYING_SLOT_PNTC ||
3343 slot == VARYING_SLOT_CLIP_DIST0 ||
3344 slot == VARYING_SLOT_CLIP_DIST1 ||
3345 slot == VARYING_SLOT_CULL_DIST0 ||
3346 slot == VARYING_SLOT_CULL_DIST1 ||
3347 slot == VARYING_SLOT_PRIMITIVE_ID ||
3348 slot == VARYING_SLOT_LAYER ||
3349 slot == VARYING_SLOT_VIEWPORT ||
3350 slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
3351 slot == VARYING_SLOT_TESS_LEVEL_INNER;
3352 }
3353
3354 bool
nir_slot_is_sysval_output_and_varying(gl_varying_slot slot,gl_shader_stage next_shader)3355 nir_slot_is_sysval_output_and_varying(gl_varying_slot slot,
3356 gl_shader_stage next_shader)
3357 {
3358 return nir_slot_is_sysval_output(slot, next_shader) &&
3359 nir_slot_is_varying(slot);
3360 }
3361
3362 /**
3363 * This marks the output store instruction as not feeding the next shader
3364 * stage. If the instruction has no other use, it's removed.
3365 */
3366 bool
nir_remove_varying(nir_intrinsic_instr * intr,gl_shader_stage next_shader)3367 nir_remove_varying(nir_intrinsic_instr *intr, gl_shader_stage next_shader)
3368 {
3369 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3370
3371 if ((!sem.no_sysval_output &&
3372 nir_slot_is_sysval_output(sem.location, next_shader)) ||
3373 nir_instr_xfb_write_mask(intr)) {
3374 /* Demote the store instruction. */
3375 sem.no_varying = true;
3376 nir_intrinsic_set_io_semantics(intr, sem);
3377 return false;
3378 } else {
3379 nir_instr_remove(&intr->instr);
3380 return true;
3381 }
3382 }
3383
3384 /**
3385 * This marks the output store instruction as not feeding fixed-function
3386 * logic. If the instruction has no other use, it's removed.
3387 */
3388 bool
nir_remove_sysval_output(nir_intrinsic_instr * intr)3389 nir_remove_sysval_output(nir_intrinsic_instr *intr)
3390 {
3391 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3392
3393 if ((!sem.no_varying && nir_slot_is_varying(sem.location)) ||
3394 nir_instr_xfb_write_mask(intr)) {
3395 /* Demote the store instruction. */
3396 sem.no_sysval_output = true;
3397 nir_intrinsic_set_io_semantics(intr, sem);
3398 return false;
3399 } else {
3400 nir_instr_remove(&intr->instr);
3401 return true;
3402 }
3403 }
3404
3405 void
nir_remove_non_entrypoints(nir_shader * nir)3406 nir_remove_non_entrypoints(nir_shader *nir)
3407 {
3408 nir_foreach_function_safe(func, nir) {
3409 if (!func->is_entrypoint)
3410 exec_node_remove(&func->node);
3411 }
3412 assert(exec_list_length(&nir->functions) == 1);
3413 }
3414
3415 void
nir_remove_non_exported(nir_shader * nir)3416 nir_remove_non_exported(nir_shader *nir)
3417 {
3418 nir_foreach_function_safe(func, nir) {
3419 if (!func->is_exported)
3420 exec_node_remove(&func->node);
3421 }
3422 }
3423
3424 unsigned
nir_static_workgroup_size(const nir_shader * s)3425 nir_static_workgroup_size(const nir_shader *s)
3426 {
3427 return s->info.workgroup_size[0] * s->info.workgroup_size[1] *
3428 s->info.workgroup_size[2];
3429 }
3430