1 /*
2 * Copyright © 2014 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 *
26 */
27
28 #ifndef NIR_H
29 #define NIR_H
30
31 #include "util/hash_table.h"
32 #include "compiler/glsl/list.h"
33 #include "GL/gl.h" /* GLenum */
34 #include "util/list.h"
35 #include "util/log.h"
36 #include "util/ralloc.h"
37 #include "util/set.h"
38 #include "util/bitscan.h"
39 #include "util/bitset.h"
40 #include "util/compiler.h"
41 #include "util/enum_operators.h"
42 #include "util/macros.h"
43 #include "util/format/u_format.h"
44 #include "compiler/nir_types.h"
45 #include "compiler/shader_enums.h"
46 #include "compiler/shader_info.h"
47 #define XXH_INLINE_ALL
48 #include "util/xxhash.h"
49 #include <stdio.h>
50
51 #ifndef NDEBUG
52 #include "util/debug.h"
53 #endif /* NDEBUG */
54
55 #include "nir_opcodes.h"
56
57 #if defined(_WIN32) && !defined(snprintf)
58 #define snprintf _snprintf
59 #endif
60
61 #ifdef __cplusplus
62 extern "C" {
63 #endif
64
65 #define NIR_FALSE 0u
66 #define NIR_TRUE (~0u)
67 #define NIR_MAX_VEC_COMPONENTS 16
68 #define NIR_MAX_MATRIX_COLUMNS 4
69 #define NIR_STREAM_PACKED (1 << 8)
70 typedef uint16_t nir_component_mask_t;
71
72 static inline bool
nir_num_components_valid(unsigned num_components)73 nir_num_components_valid(unsigned num_components)
74 {
75 return (num_components >= 1 &&
76 num_components <= 5) ||
77 num_components == 8 ||
78 num_components == 16;
79 }
80
81 bool nir_component_mask_can_reinterpret(nir_component_mask_t mask,
82 unsigned old_bit_size,
83 unsigned new_bit_size);
84 nir_component_mask_t
85 nir_component_mask_reinterpret(nir_component_mask_t mask,
86 unsigned old_bit_size,
87 unsigned new_bit_size);
88
89 /** Defines a cast function
90 *
91 * This macro defines a cast function from in_type to out_type where
92 * out_type is some structure type that contains a field of type out_type.
93 *
94 * Note that you have to be a bit careful as the generated cast function
95 * destroys constness.
96 */
97 #define NIR_DEFINE_CAST(name, in_type, out_type, field, \
98 type_field, type_value) \
99 static inline out_type * \
100 name(const in_type *parent) \
101 { \
102 assert(parent && parent->type_field == type_value); \
103 return exec_node_data(out_type, parent, field); \
104 }
105
106 struct nir_function;
107 struct nir_shader;
108 struct nir_instr;
109 struct nir_builder;
110
111
112 /**
113 * Description of built-in state associated with a uniform
114 *
115 * \sa nir_variable::state_slots
116 */
117 typedef struct {
118 gl_state_index16 tokens[STATE_LENGTH];
119 uint16_t swizzle;
120 } nir_state_slot;
121
122 typedef enum {
123 nir_var_shader_in = (1 << 0),
124 nir_var_shader_out = (1 << 1),
125 nir_var_shader_temp = (1 << 2),
126 nir_var_function_temp = (1 << 3),
127 nir_var_uniform = (1 << 4),
128 nir_var_mem_ubo = (1 << 5),
129 nir_var_system_value = (1 << 6),
130 nir_var_mem_ssbo = (1 << 7),
131 nir_var_mem_shared = (1 << 8),
132 nir_var_mem_global = (1 << 9),
133 nir_var_mem_generic = (nir_var_shader_temp |
134 nir_var_function_temp |
135 nir_var_mem_shared |
136 nir_var_mem_global),
137 nir_var_mem_push_const = (1 << 10), /* not actually used for variables */
138 nir_var_mem_constant = (1 << 11),
139 /** Incoming call or ray payload data for ray-tracing shaders */
140 nir_var_shader_call_data = (1 << 12),
141 /** Ray hit attributes */
142 nir_var_ray_hit_attrib = (1 << 13),
143 nir_var_read_only_modes = nir_var_shader_in | nir_var_uniform |
144 nir_var_system_value | nir_var_mem_constant |
145 nir_var_mem_ubo,
146 /** Modes where vector derefs can be indexed as arrays */
147 nir_var_vec_indexable_modes = nir_var_mem_ubo | nir_var_mem_ssbo |
148 nir_var_mem_shared | nir_var_mem_global |
149 nir_var_mem_push_const,
150 nir_num_variable_modes = 14,
151 nir_var_all = (1 << nir_num_variable_modes) - 1,
152 } nir_variable_mode;
153 MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_variable_mode)
154
155 /**
156 * Rounding modes.
157 */
158 typedef enum {
159 nir_rounding_mode_undef = 0,
160 nir_rounding_mode_rtne = 1, /* round to nearest even */
161 nir_rounding_mode_ru = 2, /* round up */
162 nir_rounding_mode_rd = 3, /* round down */
163 nir_rounding_mode_rtz = 4, /* round towards zero */
164 } nir_rounding_mode;
165
166 typedef union {
167 bool b;
168 float f32;
169 double f64;
170 int8_t i8;
171 uint8_t u8;
172 int16_t i16;
173 uint16_t u16;
174 int32_t i32;
175 uint32_t u32;
176 int64_t i64;
177 uint64_t u64;
178 } nir_const_value;
179
180 #define nir_const_value_to_array(arr, c, components, m) \
181 { \
182 for (unsigned i = 0; i < components; ++i) \
183 arr[i] = c[i].m; \
184 } while (false)
185
186 static inline nir_const_value
nir_const_value_for_raw_uint(uint64_t x,unsigned bit_size)187 nir_const_value_for_raw_uint(uint64_t x, unsigned bit_size)
188 {
189 nir_const_value v;
190 memset(&v, 0, sizeof(v));
191
192 switch (bit_size) {
193 case 1: v.b = x; break;
194 case 8: v.u8 = x; break;
195 case 16: v.u16 = x; break;
196 case 32: v.u32 = x; break;
197 case 64: v.u64 = x; break;
198 default:
199 unreachable("Invalid bit size");
200 }
201
202 return v;
203 }
204
205 static inline nir_const_value
nir_const_value_for_int(int64_t i,unsigned bit_size)206 nir_const_value_for_int(int64_t i, unsigned bit_size)
207 {
208 nir_const_value v;
209 memset(&v, 0, sizeof(v));
210
211 assert(bit_size <= 64);
212 if (bit_size < 64) {
213 assert(i >= (-(1ll << (bit_size - 1))));
214 assert(i < (1ll << (bit_size - 1)));
215 }
216
217 return nir_const_value_for_raw_uint(i, bit_size);
218 }
219
220 static inline nir_const_value
nir_const_value_for_uint(uint64_t u,unsigned bit_size)221 nir_const_value_for_uint(uint64_t u, unsigned bit_size)
222 {
223 nir_const_value v;
224 memset(&v, 0, sizeof(v));
225
226 assert(bit_size <= 64);
227 if (bit_size < 64)
228 assert(u < (1ull << bit_size));
229
230 return nir_const_value_for_raw_uint(u, bit_size);
231 }
232
233 static inline nir_const_value
nir_const_value_for_bool(bool b,unsigned bit_size)234 nir_const_value_for_bool(bool b, unsigned bit_size)
235 {
236 /* Booleans use a 0/-1 convention */
237 return nir_const_value_for_int(-(int)b, bit_size);
238 }
239
240 /* This one isn't inline because it requires half-float conversion */
241 nir_const_value nir_const_value_for_float(double b, unsigned bit_size);
242
243 static inline int64_t
nir_const_value_as_int(nir_const_value value,unsigned bit_size)244 nir_const_value_as_int(nir_const_value value, unsigned bit_size)
245 {
246 switch (bit_size) {
247 /* int1_t uses 0/-1 convention */
248 case 1: return -(int)value.b;
249 case 8: return value.i8;
250 case 16: return value.i16;
251 case 32: return value.i32;
252 case 64: return value.i64;
253 default:
254 unreachable("Invalid bit size");
255 }
256 }
257
258 static inline uint64_t
nir_const_value_as_uint(nir_const_value value,unsigned bit_size)259 nir_const_value_as_uint(nir_const_value value, unsigned bit_size)
260 {
261 switch (bit_size) {
262 case 1: return value.b;
263 case 8: return value.u8;
264 case 16: return value.u16;
265 case 32: return value.u32;
266 case 64: return value.u64;
267 default:
268 unreachable("Invalid bit size");
269 }
270 }
271
272 static inline bool
nir_const_value_as_bool(nir_const_value value,unsigned bit_size)273 nir_const_value_as_bool(nir_const_value value, unsigned bit_size)
274 {
275 int64_t i = nir_const_value_as_int(value, bit_size);
276
277 /* Booleans of any size use 0/-1 convention */
278 assert(i == 0 || i == -1);
279
280 return i;
281 }
282
283 /* This one isn't inline because it requires half-float conversion */
284 double nir_const_value_as_float(nir_const_value value, unsigned bit_size);
285
286 typedef struct nir_constant {
287 /**
288 * Value of the constant.
289 *
290 * The field used to back the values supplied by the constant is determined
291 * by the type associated with the \c nir_variable. Constants may be
292 * scalars, vectors, or matrices.
293 */
294 nir_const_value values[NIR_MAX_VEC_COMPONENTS];
295
296 /* we could get this from the var->type but makes clone *much* easier to
297 * not have to care about the type.
298 */
299 unsigned num_elements;
300
301 /* Array elements / Structure Fields */
302 struct nir_constant **elements;
303 } nir_constant;
304
305 /**
306 * \brief Layout qualifiers for gl_FragDepth.
307 *
308 * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared
309 * with a layout qualifier.
310 */
311 typedef enum {
312 nir_depth_layout_none, /**< No depth layout is specified. */
313 nir_depth_layout_any,
314 nir_depth_layout_greater,
315 nir_depth_layout_less,
316 nir_depth_layout_unchanged
317 } nir_depth_layout;
318
319 /**
320 * Enum keeping track of how a variable was declared.
321 */
322 typedef enum {
323 /**
324 * Normal declaration.
325 */
326 nir_var_declared_normally = 0,
327
328 /**
329 * Variable is implicitly generated by the compiler and should not be
330 * visible via the API.
331 */
332 nir_var_hidden,
333 } nir_var_declaration_type;
334
335 /**
336 * Either a uniform, global variable, shader input, or shader output. Based on
337 * ir_variable - it should be easy to translate between the two.
338 */
339
340 typedef struct nir_variable {
341 struct exec_node node;
342
343 /**
344 * Declared type of the variable
345 */
346 const struct glsl_type *type;
347
348 /**
349 * Declared name of the variable
350 */
351 char *name;
352
353 struct nir_variable_data {
354 /**
355 * Storage class of the variable.
356 *
357 * \sa nir_variable_mode
358 */
359 unsigned mode:14;
360
361 /**
362 * Is the variable read-only?
363 *
364 * This is set for variables declared as \c const, shader inputs,
365 * and uniforms.
366 */
367 unsigned read_only:1;
368 unsigned centroid:1;
369 unsigned sample:1;
370 unsigned patch:1;
371 unsigned invariant:1;
372
373 /**
374 * Precision qualifier.
375 *
376 * In desktop GLSL we do not care about precision qualifiers at all, in
377 * fact, the spec says that precision qualifiers are ignored.
378 *
379 * To make things easy, we make it so that this field is always
380 * GLSL_PRECISION_NONE on desktop shaders. This way all the variables
381 * have the same precision value and the checks we add in the compiler
382 * for this field will never break a desktop shader compile.
383 */
384 unsigned precision:2;
385
386 /**
387 * Can this variable be coalesced with another?
388 *
389 * This is set by nir_lower_io_to_temporaries to say that any
390 * copies involving this variable should stay put. Propagating it can
391 * duplicate the resulting load/store, which is not wanted, and may
392 * result in a load/store of the variable with an indirect offset which
393 * the backend may not be able to handle.
394 */
395 unsigned cannot_coalesce:1;
396
397 /**
398 * When separate shader programs are enabled, only input/outputs between
399 * the stages of a multi-stage separate program can be safely removed
400 * from the shader interface. Other input/outputs must remains active.
401 *
402 * This is also used to make sure xfb varyings that are unused by the
403 * fragment shader are not removed.
404 */
405 unsigned always_active_io:1;
406
407 /**
408 * Interpolation mode for shader inputs / outputs
409 *
410 * \sa glsl_interp_mode
411 */
412 unsigned interpolation:3;
413
414 /**
415 * If non-zero, then this variable may be packed along with other variables
416 * into a single varying slot, so this offset should be applied when
417 * accessing components. For example, an offset of 1 means that the x
418 * component of this variable is actually stored in component y of the
419 * location specified by \c location.
420 */
421 unsigned location_frac:2;
422
423 /**
424 * If true, this variable represents an array of scalars that should
425 * be tightly packed. In other words, consecutive array elements
426 * should be stored one component apart, rather than one slot apart.
427 */
428 unsigned compact:1;
429
430 /**
431 * Whether this is a fragment shader output implicitly initialized with
432 * the previous contents of the specified render target at the
433 * framebuffer location corresponding to this shader invocation.
434 */
435 unsigned fb_fetch_output:1;
436
437 /**
438 * Non-zero if this variable is considered bindless as defined by
439 * ARB_bindless_texture.
440 */
441 unsigned bindless:1;
442
443 /**
444 * Was an explicit binding set in the shader?
445 */
446 unsigned explicit_binding:1;
447
448 /**
449 * Was the location explicitly set in the shader?
450 *
451 * If the location is explicitly set in the shader, it \b cannot be changed
452 * by the linker or by the API (e.g., calls to \c glBindAttribLocation have
453 * no effect).
454 */
455 unsigned explicit_location:1;
456
457 /**
458 * Was a transfer feedback buffer set in the shader?
459 */
460 unsigned explicit_xfb_buffer:1;
461
462 /**
463 * Was a transfer feedback stride set in the shader?
464 */
465 unsigned explicit_xfb_stride:1;
466
467 /**
468 * Was an explicit offset set in the shader?
469 */
470 unsigned explicit_offset:1;
471
472 /**
473 * Layout of the matrix. Uses glsl_matrix_layout values.
474 */
475 unsigned matrix_layout:2;
476
477 /**
478 * Non-zero if this variable was created by lowering a named interface
479 * block.
480 */
481 unsigned from_named_ifc_block:1;
482
483 /**
484 * How the variable was declared. See nir_var_declaration_type.
485 *
486 * This is used to detect variables generated by the compiler, so should
487 * not be visible via the API.
488 */
489 unsigned how_declared:2;
490
491 /**
492 * Is this variable per-view? If so, we know it must be an array with
493 * size corresponding to the number of views.
494 */
495 unsigned per_view:1;
496
497 /**
498 * Whether the variable is per-primitive.
499 * Can be use by Mesh Shader outputs and corresponding Fragment Shader inputs.
500 */
501 unsigned per_primitive:1;
502
503 /**
504 * \brief Layout qualifier for gl_FragDepth. See nir_depth_layout.
505 *
506 * This is not equal to \c ir_depth_layout_none if and only if this
507 * variable is \c gl_FragDepth and a layout qualifier is specified.
508 */
509 unsigned depth_layout:3;
510
511 /**
512 * Vertex stream output identifier.
513 *
514 * For packed outputs, NIR_STREAM_PACKED is set and bits [2*i+1,2*i]
515 * indicate the stream of the i-th component.
516 */
517 unsigned stream:9;
518
519 /**
520 * See gl_access_qualifier.
521 *
522 * Access flags for memory variables (SSBO/global), image uniforms, and
523 * bindless images in uniforms/inputs/outputs.
524 */
525 unsigned access:8;
526
527 /**
528 * Descriptor set binding for sampler or UBO.
529 */
530 unsigned descriptor_set:5;
531
532 /**
533 * output index for dual source blending.
534 */
535 unsigned index;
536
537 /**
538 * Initial binding point for a sampler or UBO.
539 *
540 * For array types, this represents the binding point for the first element.
541 */
542 unsigned binding;
543
544 /**
545 * Storage location of the base of this variable
546 *
547 * The precise meaning of this field depends on the nature of the variable.
548 *
549 * - Vertex shader input: one of the values from \c gl_vert_attrib.
550 * - Vertex shader output: one of the values from \c gl_varying_slot.
551 * - Geometry shader input: one of the values from \c gl_varying_slot.
552 * - Geometry shader output: one of the values from \c gl_varying_slot.
553 * - Fragment shader input: one of the values from \c gl_varying_slot.
554 * - Fragment shader output: one of the values from \c gl_frag_result.
555 * - Task shader output: one of the values from \c gl_varying_slot.
556 * - Mesh shader input: one of the values from \c gl_varying_slot.
557 * - Mesh shader output: one of the values from \c gl_varying_slot.
558 * - Uniforms: Per-stage uniform slot number for default uniform block.
559 * - Uniforms: Index within the uniform block definition for UBO members.
560 * - Non-UBO Uniforms: uniform slot number.
561 * - Other: This field is not currently used.
562 *
563 * If the variable is a uniform, shader input, or shader output, and the
564 * slot has not been assigned, the value will be -1.
565 */
566 int location;
567
568 /**
569 * The actual location of the variable in the IR. Only valid for inputs,
570 * outputs, uniforms (including samplers and images), and for UBO and SSBO
571 * variables in GLSL.
572 */
573 unsigned driver_location;
574
575 /**
576 * Location an atomic counter or transform feedback is stored at.
577 */
578 unsigned offset;
579
580 union {
581 struct {
582 /** Image internal format if specified explicitly, otherwise PIPE_FORMAT_NONE. */
583 enum pipe_format format;
584 } image;
585
586 struct {
587 /**
588 * For OpenCL inline samplers. See cl_sampler_addressing_mode and cl_sampler_filter_mode
589 */
590 unsigned is_inline_sampler : 1;
591 unsigned addressing_mode : 3;
592 unsigned normalized_coordinates : 1;
593 unsigned filter_mode : 1;
594 } sampler;
595
596 struct {
597 /**
598 * Transform feedback buffer.
599 */
600 uint16_t buffer:2;
601
602 /**
603 * Transform feedback stride.
604 */
605 uint16_t stride;
606 } xfb;
607 };
608 } data;
609
610 /**
611 * Identifier for this variable generated by nir_index_vars() that is unique
612 * among other variables in the same exec_list.
613 */
614 unsigned index;
615
616 /* Number of nir_variable_data members */
617 uint16_t num_members;
618
619 /**
620 * Built-in state that backs this uniform
621 *
622 * Once set at variable creation, \c state_slots must remain invariant.
623 * This is because, ideally, this array would be shared by all clones of
624 * this variable in the IR tree. In other words, we'd really like for it
625 * to be a fly-weight.
626 *
627 * If the variable is not a uniform, \c num_state_slots will be zero and
628 * \c state_slots will be \c NULL.
629 */
630 /*@{*/
631 uint16_t num_state_slots; /**< Number of state slots used */
632 nir_state_slot *state_slots; /**< State descriptors. */
633 /*@}*/
634
635 /**
636 * Constant expression assigned in the initializer of the variable
637 *
638 * This field should only be used temporarily by creators of NIR shaders
639 * and then nir_lower_variable_initializers can be used to get rid of them.
640 * Most of the rest of NIR ignores this field or asserts that it's NULL.
641 */
642 nir_constant *constant_initializer;
643
644 /**
645 * Global variable assigned in the initializer of the variable
646 * This field should only be used temporarily by creators of NIR shaders
647 * and then nir_lower_variable_initializers can be used to get rid of them.
648 * Most of the rest of NIR ignores this field or asserts that it's NULL.
649 */
650 struct nir_variable *pointer_initializer;
651
652 /**
653 * For variables that are in an interface block or are an instance of an
654 * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block.
655 *
656 * \sa ir_variable::location
657 */
658 const struct glsl_type *interface_type;
659
660 /**
661 * Description of per-member data for per-member struct variables
662 *
663 * This is used for variables which are actually an amalgamation of
664 * multiple entities such as a struct of built-in values or a struct of
665 * inputs each with their own layout specifier. This is only allowed on
666 * variables with a struct or array of array of struct type.
667 */
668 struct nir_variable_data *members;
669 } nir_variable;
670
671 static inline bool
_nir_shader_variable_has_mode(nir_variable * var,unsigned modes)672 _nir_shader_variable_has_mode(nir_variable *var, unsigned modes)
673 {
674 /* This isn't a shader variable */
675 assert(!(modes & nir_var_function_temp));
676 return var->data.mode & modes;
677 }
678
679 #define nir_foreach_variable_in_list(var, var_list) \
680 foreach_list_typed(nir_variable, var, node, var_list)
681
682 #define nir_foreach_variable_in_list_safe(var, var_list) \
683 foreach_list_typed_safe(nir_variable, var, node, var_list)
684
685 #define nir_foreach_variable_in_shader(var, shader) \
686 nir_foreach_variable_in_list(var, &(shader)->variables)
687
688 #define nir_foreach_variable_in_shader_safe(var, shader) \
689 nir_foreach_variable_in_list_safe(var, &(shader)->variables)
690
691 #define nir_foreach_variable_with_modes(var, shader, modes) \
692 nir_foreach_variable_in_shader(var, shader) \
693 if (_nir_shader_variable_has_mode(var, modes))
694
695 #define nir_foreach_variable_with_modes_safe(var, shader, modes) \
696 nir_foreach_variable_in_shader_safe(var, shader) \
697 if (_nir_shader_variable_has_mode(var, modes))
698
699 #define nir_foreach_shader_in_variable(var, shader) \
700 nir_foreach_variable_with_modes(var, shader, nir_var_shader_in)
701
702 #define nir_foreach_shader_in_variable_safe(var, shader) \
703 nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_in)
704
705 #define nir_foreach_shader_out_variable(var, shader) \
706 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out)
707
708 #define nir_foreach_shader_out_variable_safe(var, shader) \
709 nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_out)
710
711 #define nir_foreach_uniform_variable(var, shader) \
712 nir_foreach_variable_with_modes(var, shader, nir_var_uniform)
713
714 #define nir_foreach_uniform_variable_safe(var, shader) \
715 nir_foreach_variable_with_modes_safe(var, shader, nir_var_uniform)
716
717 static inline bool
nir_variable_is_global(const nir_variable * var)718 nir_variable_is_global(const nir_variable *var)
719 {
720 return var->data.mode != nir_var_function_temp;
721 }
722
723 typedef struct nir_register {
724 struct exec_node node;
725
726 unsigned num_components; /** < number of vector components */
727 unsigned num_array_elems; /** < size of array (0 for no array) */
728
729 /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
730 uint8_t bit_size;
731
732 /**
733 * True if this register may have different values in different SIMD
734 * invocations of the shader.
735 */
736 bool divergent;
737
738 /** generic register index. */
739 unsigned index;
740
741 /** set of nir_srcs where this register is used (read from) */
742 struct list_head uses;
743
744 /** set of nir_dests where this register is defined (written to) */
745 struct list_head defs;
746
747 /** set of nir_ifs where this register is used as a condition */
748 struct list_head if_uses;
749 } nir_register;
750
751 #define nir_foreach_register(reg, reg_list) \
752 foreach_list_typed(nir_register, reg, node, reg_list)
753 #define nir_foreach_register_safe(reg, reg_list) \
754 foreach_list_typed_safe(nir_register, reg, node, reg_list)
755
756 typedef enum PACKED {
757 nir_instr_type_alu,
758 nir_instr_type_deref,
759 nir_instr_type_call,
760 nir_instr_type_tex,
761 nir_instr_type_intrinsic,
762 nir_instr_type_load_const,
763 nir_instr_type_jump,
764 nir_instr_type_ssa_undef,
765 nir_instr_type_phi,
766 nir_instr_type_parallel_copy,
767 } nir_instr_type;
768
769 typedef struct nir_instr {
770 struct exec_node node;
771 struct list_head gc_node;
772 struct nir_block *block;
773 nir_instr_type type;
774
775 /* A temporary for optimization and analysis passes to use for storing
776 * flags. For instance, DCE uses this to store the "dead/live" info.
777 */
778 uint8_t pass_flags;
779
780 /** generic instruction index. */
781 uint32_t index;
782 } nir_instr;
783
784 static inline nir_instr *
nir_instr_next(nir_instr * instr)785 nir_instr_next(nir_instr *instr)
786 {
787 struct exec_node *next = exec_node_get_next(&instr->node);
788 if (exec_node_is_tail_sentinel(next))
789 return NULL;
790 else
791 return exec_node_data(nir_instr, next, node);
792 }
793
794 static inline nir_instr *
nir_instr_prev(nir_instr * instr)795 nir_instr_prev(nir_instr *instr)
796 {
797 struct exec_node *prev = exec_node_get_prev(&instr->node);
798 if (exec_node_is_head_sentinel(prev))
799 return NULL;
800 else
801 return exec_node_data(nir_instr, prev, node);
802 }
803
804 static inline bool
nir_instr_is_first(const nir_instr * instr)805 nir_instr_is_first(const nir_instr *instr)
806 {
807 return exec_node_is_head_sentinel(exec_node_get_prev_const(&instr->node));
808 }
809
810 static inline bool
nir_instr_is_last(const nir_instr * instr)811 nir_instr_is_last(const nir_instr *instr)
812 {
813 return exec_node_is_tail_sentinel(exec_node_get_next_const(&instr->node));
814 }
815
816 typedef struct nir_ssa_def {
817 /** Instruction which produces this SSA value. */
818 nir_instr *parent_instr;
819
820 /** set of nir_instrs where this register is used (read from) */
821 struct list_head uses;
822
823 /** set of nir_ifs where this register is used as a condition */
824 struct list_head if_uses;
825
826 /** generic SSA definition index. */
827 unsigned index;
828
829 uint8_t num_components;
830
831 /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
832 uint8_t bit_size;
833
834 /**
835 * True if this SSA value may have different values in different SIMD
836 * invocations of the shader. This is set by nir_divergence_analysis.
837 */
838 bool divergent;
839 } nir_ssa_def;
840
841 struct nir_src;
842
843 typedef struct {
844 nir_register *reg;
845 struct nir_src *indirect; /** < NULL for no indirect offset */
846 unsigned base_offset;
847
848 /* TODO use-def chain goes here */
849 } nir_reg_src;
850
851 typedef struct {
852 nir_instr *parent_instr;
853 struct list_head def_link;
854
855 nir_register *reg;
856 struct nir_src *indirect; /** < NULL for no indirect offset */
857 unsigned base_offset;
858
859 /* TODO def-use chain goes here */
860 } nir_reg_dest;
861
862 struct nir_if;
863
864 typedef struct nir_src {
865 union {
866 /** Instruction that consumes this value as a source. */
867 nir_instr *parent_instr;
868 struct nir_if *parent_if;
869 };
870
871 struct list_head use_link;
872
873 union {
874 nir_reg_src reg;
875 nir_ssa_def *ssa;
876 };
877
878 bool is_ssa;
879 } nir_src;
880
881 static inline nir_src
nir_src_init(void)882 nir_src_init(void)
883 {
884 nir_src src = { { NULL } };
885 return src;
886 }
887
888 #define NIR_SRC_INIT nir_src_init()
889
890 #define nir_foreach_use(src, reg_or_ssa_def) \
891 list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
892
893 #define nir_foreach_use_safe(src, reg_or_ssa_def) \
894 list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
895
896 #define nir_foreach_if_use(src, reg_or_ssa_def) \
897 list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link)
898
899 #define nir_foreach_if_use_safe(src, reg_or_ssa_def) \
900 list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link)
901
902 typedef struct {
903 union {
904 nir_reg_dest reg;
905 nir_ssa_def ssa;
906 };
907
908 bool is_ssa;
909 } nir_dest;
910
911 static inline nir_dest
nir_dest_init(void)912 nir_dest_init(void)
913 {
914 nir_dest dest = { { { NULL } } };
915 return dest;
916 }
917
918 #define NIR_DEST_INIT nir_dest_init()
919
920 #define nir_foreach_def(dest, reg) \
921 list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link)
922
923 #define nir_foreach_def_safe(dest, reg) \
924 list_for_each_entry_safe(nir_dest, dest, &(reg)->defs, reg.def_link)
925
926 static inline nir_src
nir_src_for_ssa(nir_ssa_def * def)927 nir_src_for_ssa(nir_ssa_def *def)
928 {
929 nir_src src = NIR_SRC_INIT;
930
931 src.is_ssa = true;
932 src.ssa = def;
933
934 return src;
935 }
936
937 static inline nir_src
nir_src_for_reg(nir_register * reg)938 nir_src_for_reg(nir_register *reg)
939 {
940 nir_src src = NIR_SRC_INIT;
941
942 src.is_ssa = false;
943 src.reg.reg = reg;
944 src.reg.indirect = NULL;
945 src.reg.base_offset = 0;
946
947 return src;
948 }
949
950 static inline nir_dest
nir_dest_for_reg(nir_register * reg)951 nir_dest_for_reg(nir_register *reg)
952 {
953 nir_dest dest = NIR_DEST_INIT;
954
955 dest.reg.reg = reg;
956
957 return dest;
958 }
959
960 static inline unsigned
nir_src_bit_size(nir_src src)961 nir_src_bit_size(nir_src src)
962 {
963 return src.is_ssa ? src.ssa->bit_size : src.reg.reg->bit_size;
964 }
965
966 static inline unsigned
nir_src_num_components(nir_src src)967 nir_src_num_components(nir_src src)
968 {
969 return src.is_ssa ? src.ssa->num_components : src.reg.reg->num_components;
970 }
971
972 static inline bool
nir_src_is_const(nir_src src)973 nir_src_is_const(nir_src src)
974 {
975 return src.is_ssa &&
976 src.ssa->parent_instr->type == nir_instr_type_load_const;
977 }
978
979 static inline bool
nir_src_is_undef(nir_src src)980 nir_src_is_undef(nir_src src)
981 {
982 return src.is_ssa &&
983 src.ssa->parent_instr->type == nir_instr_type_ssa_undef;
984 }
985
986 static inline bool
nir_src_is_divergent(nir_src src)987 nir_src_is_divergent(nir_src src)
988 {
989 return src.is_ssa ? src.ssa->divergent : src.reg.reg->divergent;
990 }
991
992 static inline unsigned
nir_dest_bit_size(nir_dest dest)993 nir_dest_bit_size(nir_dest dest)
994 {
995 return dest.is_ssa ? dest.ssa.bit_size : dest.reg.reg->bit_size;
996 }
997
998 static inline unsigned
nir_dest_num_components(nir_dest dest)999 nir_dest_num_components(nir_dest dest)
1000 {
1001 return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components;
1002 }
1003
1004 static inline bool
nir_dest_is_divergent(nir_dest dest)1005 nir_dest_is_divergent(nir_dest dest)
1006 {
1007 return dest.is_ssa ? dest.ssa.divergent : dest.reg.reg->divergent;
1008 }
1009
1010 /* Are all components the same, ie. .xxxx */
1011 static inline bool
nir_is_same_comp_swizzle(uint8_t * swiz,unsigned nr_comp)1012 nir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp)
1013 {
1014 for (unsigned i = 1; i < nr_comp; i++)
1015 if (swiz[i] != swiz[0])
1016 return false;
1017 return true;
1018 }
1019
1020 /* Are all components sequential, ie. .yzw */
1021 static inline bool
nir_is_sequential_comp_swizzle(uint8_t * swiz,unsigned nr_comp)1022 nir_is_sequential_comp_swizzle(uint8_t *swiz, unsigned nr_comp)
1023 {
1024 for (unsigned i = 1; i < nr_comp; i++)
1025 if (swiz[i] != (swiz[0] + i))
1026 return false;
1027 return true;
1028 }
1029
1030 void nir_src_copy(nir_src *dest, const nir_src *src);
1031 void nir_dest_copy(nir_dest *dest, const nir_dest *src);
1032
1033 typedef struct {
1034 /** Base source */
1035 nir_src src;
1036
1037 /**
1038 * \name input modifiers
1039 */
1040 /*@{*/
1041 /**
1042 * For inputs interpreted as floating point, flips the sign bit. For
1043 * inputs interpreted as integers, performs the two's complement negation.
1044 */
1045 bool negate;
1046
1047 /**
1048 * Clears the sign bit for floating point values, and computes the integer
1049 * absolute value for integers. Note that the negate modifier acts after
1050 * the absolute value modifier, therefore if both are set then all inputs
1051 * will become negative.
1052 */
1053 bool abs;
1054 /*@}*/
1055
1056 /**
1057 * For each input component, says which component of the register it is
1058 * chosen from.
1059 *
1060 * Note that which elements of the swizzle are used and which are ignored
1061 * are based on the write mask for most opcodes - for example, a statement
1062 * like "foo.xzw = bar.zyx" would have a writemask of 1101b and a swizzle
1063 * of {2, 1, x, 0} where x means "don't care."
1064 */
1065 uint8_t swizzle[NIR_MAX_VEC_COMPONENTS];
1066 } nir_alu_src;
1067
1068 typedef struct {
1069 /** Base destination */
1070 nir_dest dest;
1071
1072 /**
1073 * Saturate output modifier
1074 *
1075 * Only valid for opcodes that output floating-point numbers. Clamps the
1076 * output to between 0.0 and 1.0 inclusive.
1077 */
1078 bool saturate;
1079
1080 /**
1081 * Write-mask
1082 *
1083 * Ignored if dest.is_ssa is true
1084 */
1085 unsigned write_mask : NIR_MAX_VEC_COMPONENTS;
1086 } nir_alu_dest;
1087
1088 /** NIR sized and unsized types
1089 *
1090 * The values in this enum are carefully chosen so that the sized type is
1091 * just the unsized type OR the number of bits.
1092 */
1093 typedef enum PACKED {
1094 nir_type_invalid = 0, /* Not a valid type */
1095 nir_type_int = 2,
1096 nir_type_uint = 4,
1097 nir_type_bool = 6,
1098 nir_type_float = 128,
1099 nir_type_bool1 = 1 | nir_type_bool,
1100 nir_type_bool8 = 8 | nir_type_bool,
1101 nir_type_bool16 = 16 | nir_type_bool,
1102 nir_type_bool32 = 32 | nir_type_bool,
1103 nir_type_int1 = 1 | nir_type_int,
1104 nir_type_int8 = 8 | nir_type_int,
1105 nir_type_int16 = 16 | nir_type_int,
1106 nir_type_int32 = 32 | nir_type_int,
1107 nir_type_int64 = 64 | nir_type_int,
1108 nir_type_uint1 = 1 | nir_type_uint,
1109 nir_type_uint8 = 8 | nir_type_uint,
1110 nir_type_uint16 = 16 | nir_type_uint,
1111 nir_type_uint32 = 32 | nir_type_uint,
1112 nir_type_uint64 = 64 | nir_type_uint,
1113 nir_type_float16 = 16 | nir_type_float,
1114 nir_type_float32 = 32 | nir_type_float,
1115 nir_type_float64 = 64 | nir_type_float,
1116 } nir_alu_type;
1117
1118 #define NIR_ALU_TYPE_SIZE_MASK 0x79
1119 #define NIR_ALU_TYPE_BASE_TYPE_MASK 0x86
1120
1121 static inline unsigned
nir_alu_type_get_type_size(nir_alu_type type)1122 nir_alu_type_get_type_size(nir_alu_type type)
1123 {
1124 return type & NIR_ALU_TYPE_SIZE_MASK;
1125 }
1126
1127 static inline nir_alu_type
nir_alu_type_get_base_type(nir_alu_type type)1128 nir_alu_type_get_base_type(nir_alu_type type)
1129 {
1130 return (nir_alu_type)(type & NIR_ALU_TYPE_BASE_TYPE_MASK);
1131 }
1132
1133 static inline nir_alu_type
nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)1134 nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)
1135 {
1136 switch (base_type) {
1137 case GLSL_TYPE_BOOL:
1138 return nir_type_bool1;
1139 break;
1140 case GLSL_TYPE_UINT:
1141 return nir_type_uint32;
1142 break;
1143 case GLSL_TYPE_INT:
1144 return nir_type_int32;
1145 break;
1146 case GLSL_TYPE_UINT16:
1147 return nir_type_uint16;
1148 break;
1149 case GLSL_TYPE_INT16:
1150 return nir_type_int16;
1151 break;
1152 case GLSL_TYPE_UINT8:
1153 return nir_type_uint8;
1154 case GLSL_TYPE_INT8:
1155 return nir_type_int8;
1156 case GLSL_TYPE_UINT64:
1157 return nir_type_uint64;
1158 break;
1159 case GLSL_TYPE_INT64:
1160 return nir_type_int64;
1161 break;
1162 case GLSL_TYPE_FLOAT:
1163 return nir_type_float32;
1164 break;
1165 case GLSL_TYPE_FLOAT16:
1166 return nir_type_float16;
1167 break;
1168 case GLSL_TYPE_DOUBLE:
1169 return nir_type_float64;
1170 break;
1171
1172 case GLSL_TYPE_SAMPLER:
1173 case GLSL_TYPE_IMAGE:
1174 case GLSL_TYPE_ATOMIC_UINT:
1175 case GLSL_TYPE_STRUCT:
1176 case GLSL_TYPE_INTERFACE:
1177 case GLSL_TYPE_ARRAY:
1178 case GLSL_TYPE_VOID:
1179 case GLSL_TYPE_SUBROUTINE:
1180 case GLSL_TYPE_FUNCTION:
1181 case GLSL_TYPE_ERROR:
1182 return nir_type_invalid;
1183 }
1184
1185 unreachable("unknown type");
1186 }
1187
1188 static inline nir_alu_type
nir_get_nir_type_for_glsl_type(const struct glsl_type * type)1189 nir_get_nir_type_for_glsl_type(const struct glsl_type *type)
1190 {
1191 return nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(type));
1192 }
1193
1194 static inline enum glsl_base_type
nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)1195 nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)
1196 {
1197 switch (base_type) {
1198 case nir_type_bool1:
1199 return GLSL_TYPE_BOOL;
1200 case nir_type_uint32:
1201 return GLSL_TYPE_UINT;
1202 case nir_type_int32:
1203 return GLSL_TYPE_INT;
1204 case nir_type_uint16:
1205 return GLSL_TYPE_UINT16;
1206 case nir_type_int16:
1207 return GLSL_TYPE_INT16;
1208 case nir_type_uint8:
1209 return GLSL_TYPE_UINT8;
1210 case nir_type_int8:
1211 return GLSL_TYPE_INT8;
1212 case nir_type_uint64:
1213 return GLSL_TYPE_UINT64;
1214 case nir_type_int64:
1215 return GLSL_TYPE_INT64;
1216 case nir_type_float32:
1217 return GLSL_TYPE_FLOAT;
1218 case nir_type_float16:
1219 return GLSL_TYPE_FLOAT16;
1220 case nir_type_float64:
1221 return GLSL_TYPE_DOUBLE;
1222
1223 default: unreachable("Not a sized nir_alu_type");
1224 }
1225 }
1226
1227 nir_op nir_type_conversion_op(nir_alu_type src, nir_alu_type dst,
1228 nir_rounding_mode rnd);
1229
1230 static inline nir_op
nir_op_vec(unsigned components)1231 nir_op_vec(unsigned components)
1232 {
1233 switch (components) {
1234 case 1: return nir_op_mov;
1235 case 2: return nir_op_vec2;
1236 case 3: return nir_op_vec3;
1237 case 4: return nir_op_vec4;
1238 case 5: return nir_op_vec5;
1239 case 8: return nir_op_vec8;
1240 case 16: return nir_op_vec16;
1241 default: unreachable("bad component count");
1242 }
1243 }
1244
1245 static inline bool
nir_op_is_vec(nir_op op)1246 nir_op_is_vec(nir_op op)
1247 {
1248 switch (op) {
1249 case nir_op_mov:
1250 case nir_op_vec2:
1251 case nir_op_vec3:
1252 case nir_op_vec4:
1253 case nir_op_vec5:
1254 case nir_op_vec8:
1255 case nir_op_vec16:
1256 return true;
1257 default:
1258 return false;
1259 }
1260 }
1261
1262 static inline bool
nir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode,unsigned bit_size)1263 nir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode, unsigned bit_size)
1264 {
1265 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) ||
1266 (32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32) ||
1267 (64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64);
1268 }
1269
1270 static inline bool
nir_is_denorm_flush_to_zero(unsigned execution_mode,unsigned bit_size)1271 nir_is_denorm_flush_to_zero(unsigned execution_mode, unsigned bit_size)
1272 {
1273 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16) ||
1274 (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) ||
1275 (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64);
1276 }
1277
1278 static inline bool
nir_is_denorm_preserve(unsigned execution_mode,unsigned bit_size)1279 nir_is_denorm_preserve(unsigned execution_mode, unsigned bit_size)
1280 {
1281 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP16) ||
1282 (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32) ||
1283 (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP64);
1284 }
1285
1286 static inline bool
nir_is_rounding_mode_rtne(unsigned execution_mode,unsigned bit_size)1287 nir_is_rounding_mode_rtne(unsigned execution_mode, unsigned bit_size)
1288 {
1289 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) ||
1290 (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) ||
1291 (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64);
1292 }
1293
1294 static inline bool
nir_is_rounding_mode_rtz(unsigned execution_mode,unsigned bit_size)1295 nir_is_rounding_mode_rtz(unsigned execution_mode, unsigned bit_size)
1296 {
1297 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) ||
1298 (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) ||
1299 (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64);
1300 }
1301
1302 static inline bool
nir_has_any_rounding_mode_rtz(unsigned execution_mode)1303 nir_has_any_rounding_mode_rtz(unsigned execution_mode)
1304 {
1305 return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) ||
1306 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) ||
1307 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64);
1308 }
1309
1310 static inline bool
nir_has_any_rounding_mode_rtne(unsigned execution_mode)1311 nir_has_any_rounding_mode_rtne(unsigned execution_mode)
1312 {
1313 return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) ||
1314 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) ||
1315 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64);
1316 }
1317
1318 static inline nir_rounding_mode
nir_get_rounding_mode_from_float_controls(unsigned execution_mode,nir_alu_type type)1319 nir_get_rounding_mode_from_float_controls(unsigned execution_mode,
1320 nir_alu_type type)
1321 {
1322 if (nir_alu_type_get_base_type(type) != nir_type_float)
1323 return nir_rounding_mode_undef;
1324
1325 unsigned bit_size = nir_alu_type_get_type_size(type);
1326
1327 if (nir_is_rounding_mode_rtz(execution_mode, bit_size))
1328 return nir_rounding_mode_rtz;
1329 if (nir_is_rounding_mode_rtne(execution_mode, bit_size))
1330 return nir_rounding_mode_rtne;
1331 return nir_rounding_mode_undef;
1332 }
1333
1334 static inline bool
nir_has_any_rounding_mode_enabled(unsigned execution_mode)1335 nir_has_any_rounding_mode_enabled(unsigned execution_mode)
1336 {
1337 bool result =
1338 nir_has_any_rounding_mode_rtne(execution_mode) ||
1339 nir_has_any_rounding_mode_rtz(execution_mode);
1340 return result;
1341 }
1342
1343 typedef enum {
1344 /**
1345 * Operation where the first two sources are commutative.
1346 *
1347 * For 2-source operations, this just mathematical commutativity. Some
1348 * 3-source operations, like ffma, are only commutative in the first two
1349 * sources.
1350 */
1351 NIR_OP_IS_2SRC_COMMUTATIVE = (1 << 0),
1352
1353 /**
1354 * Operation is associative
1355 */
1356 NIR_OP_IS_ASSOCIATIVE = (1 << 1),
1357 } nir_op_algebraic_property;
1358
1359 /* vec16 is the widest ALU op in NIR, making the max number of input of ALU
1360 * instructions to be the same as NIR_MAX_VEC_COMPONENTS.
1361 */
1362 #define NIR_ALU_MAX_INPUTS NIR_MAX_VEC_COMPONENTS
1363
1364 typedef struct nir_op_info {
1365 /** Name of the NIR ALU opcode */
1366 const char *name;
1367
1368 /** Number of inputs (sources) */
1369 uint8_t num_inputs;
1370
1371 /**
1372 * The number of components in the output
1373 *
1374 * If non-zero, this is the size of the output and input sizes are
1375 * explicitly given; swizzle and writemask are still in effect, but if
1376 * the output component is masked out, then the input component may
1377 * still be in use.
1378 *
1379 * If zero, the opcode acts in the standard, per-component manner; the
1380 * operation is performed on each component (except the ones that are
1381 * masked out) with the input being taken from the input swizzle for
1382 * that component.
1383 *
1384 * The size of some of the inputs may be given (i.e. non-zero) even
1385 * though output_size is zero; in that case, the inputs with a zero
1386 * size act per-component, while the inputs with non-zero size don't.
1387 */
1388 uint8_t output_size;
1389
1390 /**
1391 * The type of vector that the instruction outputs. Note that the
1392 * staurate modifier is only allowed on outputs with the float type.
1393 */
1394 nir_alu_type output_type;
1395
1396 /**
1397 * The number of components in each input
1398 *
1399 * See nir_op_infos::output_size for more detail about the relationship
1400 * between input and output sizes.
1401 */
1402 uint8_t input_sizes[NIR_ALU_MAX_INPUTS];
1403
1404 /**
1405 * The type of vector that each input takes. Note that negate and
1406 * absolute value are only allowed on inputs with int or float type and
1407 * behave differently on the two.
1408 */
1409 nir_alu_type input_types[NIR_ALU_MAX_INPUTS];
1410
1411 /** Algebraic properties of this opcode */
1412 nir_op_algebraic_property algebraic_properties;
1413
1414 /** Whether this represents a numeric conversion opcode */
1415 bool is_conversion;
1416 } nir_op_info;
1417
1418 /** Metadata for each nir_op, indexed by opcode */
1419 extern const nir_op_info nir_op_infos[nir_num_opcodes];
1420
1421 typedef struct nir_alu_instr {
1422 /** Base instruction */
1423 nir_instr instr;
1424
1425 /** Opcode */
1426 nir_op op;
1427
1428 /** Indicates that this ALU instruction generates an exact value
1429 *
1430 * This is kind of a mixture of GLSL "precise" and "invariant" and not
1431 * really equivalent to either. This indicates that the value generated by
1432 * this operation is high-precision and any code transformations that touch
1433 * it must ensure that the resulting value is bit-for-bit identical to the
1434 * original.
1435 */
1436 bool exact:1;
1437
1438 /**
1439 * Indicates that this instruction doese not cause signed integer wrapping
1440 * to occur, in the form of overflow or underflow.
1441 */
1442 bool no_signed_wrap:1;
1443
1444 /**
1445 * Indicates that this instruction does not cause unsigned integer wrapping
1446 * to occur, in the form of overflow or underflow.
1447 */
1448 bool no_unsigned_wrap:1;
1449
1450 /** Destination */
1451 nir_alu_dest dest;
1452
1453 /** Sources
1454 *
1455 * The size of the array is given by nir_op_info::num_inputs.
1456 */
1457 nir_alu_src src[];
1458 } nir_alu_instr;
1459
1460 void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src);
1461 void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src);
1462
1463 bool nir_alu_instr_is_copy(nir_alu_instr *instr);
1464
1465 /* is this source channel used? */
1466 static inline bool
nir_alu_instr_channel_used(const nir_alu_instr * instr,unsigned src,unsigned channel)1467 nir_alu_instr_channel_used(const nir_alu_instr *instr, unsigned src,
1468 unsigned channel)
1469 {
1470 if (nir_op_infos[instr->op].input_sizes[src] > 0)
1471 return channel < nir_op_infos[instr->op].input_sizes[src];
1472
1473 return (instr->dest.write_mask >> channel) & 1;
1474 }
1475
1476 static inline nir_component_mask_t
nir_alu_instr_src_read_mask(const nir_alu_instr * instr,unsigned src)1477 nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src)
1478 {
1479 nir_component_mask_t read_mask = 0;
1480 for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
1481 if (!nir_alu_instr_channel_used(instr, src, c))
1482 continue;
1483
1484 read_mask |= (1 << instr->src[src].swizzle[c]);
1485 }
1486 return read_mask;
1487 }
1488
1489 /**
1490 * Get the number of channels used for a source
1491 */
1492 static inline unsigned
nir_ssa_alu_instr_src_components(const nir_alu_instr * instr,unsigned src)1493 nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
1494 {
1495 if (nir_op_infos[instr->op].input_sizes[src] > 0)
1496 return nir_op_infos[instr->op].input_sizes[src];
1497
1498 return nir_dest_num_components(instr->dest.dest);
1499 }
1500
1501 static inline bool
nir_alu_instr_is_comparison(const nir_alu_instr * instr)1502 nir_alu_instr_is_comparison(const nir_alu_instr *instr)
1503 {
1504 switch (instr->op) {
1505 case nir_op_flt:
1506 case nir_op_fge:
1507 case nir_op_feq:
1508 case nir_op_fneu:
1509 case nir_op_ilt:
1510 case nir_op_ult:
1511 case nir_op_ige:
1512 case nir_op_uge:
1513 case nir_op_ieq:
1514 case nir_op_ine:
1515 case nir_op_i2b1:
1516 case nir_op_f2b1:
1517 case nir_op_inot:
1518 return true;
1519 default:
1520 return false;
1521 }
1522 }
1523
1524 bool nir_const_value_negative_equal(nir_const_value c1, nir_const_value c2,
1525 nir_alu_type full_type);
1526
1527 bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2,
1528 unsigned src1, unsigned src2);
1529
1530 bool nir_alu_srcs_negative_equal(const nir_alu_instr *alu1,
1531 const nir_alu_instr *alu2,
1532 unsigned src1, unsigned src2);
1533
1534 bool nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn);
1535
1536 typedef enum {
1537 nir_deref_type_var,
1538 nir_deref_type_array,
1539 nir_deref_type_array_wildcard,
1540 nir_deref_type_ptr_as_array,
1541 nir_deref_type_struct,
1542 nir_deref_type_cast,
1543 } nir_deref_type;
1544
1545 typedef struct {
1546 nir_instr instr;
1547
1548 /** The type of this deref instruction */
1549 nir_deref_type deref_type;
1550
1551 /** Bitmask what modes the underlying variable might be
1552 *
1553 * For OpenCL-style generic pointers, we may not know exactly what mode it
1554 * is at any given point in time in the compile process. This bitfield
1555 * contains the set of modes which it MAY be.
1556 *
1557 * Generally, this field should not be accessed directly. Use one of the
1558 * nir_deref_mode_ helpers instead.
1559 */
1560 nir_variable_mode modes;
1561
1562 /** The dereferenced type of the resulting pointer value */
1563 const struct glsl_type *type;
1564
1565 union {
1566 /** Variable being dereferenced if deref_type is a deref_var */
1567 nir_variable *var;
1568
1569 /** Parent deref if deref_type is not deref_var */
1570 nir_src parent;
1571 };
1572
1573 /** Additional deref parameters */
1574 union {
1575 struct {
1576 nir_src index;
1577 } arr;
1578
1579 struct {
1580 unsigned index;
1581 } strct;
1582
1583 struct {
1584 unsigned ptr_stride;
1585 unsigned align_mul;
1586 unsigned align_offset;
1587 } cast;
1588 };
1589
1590 /** Destination to store the resulting "pointer" */
1591 nir_dest dest;
1592 } nir_deref_instr;
1593
1594 /** Returns true if deref might have one of the given modes
1595 *
1596 * For multi-mode derefs, this returns true if any of the possible modes for
1597 * the deref to have any of the specified modes. This function returning true
1598 * does NOT mean that the deref definitely has one of those modes. It simply
1599 * means that, with the best information we have at the time, it might.
1600 */
1601 static inline bool
nir_deref_mode_may_be(const nir_deref_instr * deref,nir_variable_mode modes)1602 nir_deref_mode_may_be(const nir_deref_instr *deref, nir_variable_mode modes)
1603 {
1604 assert(!(modes & ~nir_var_all));
1605 assert(deref->modes != 0);
1606 return deref->modes & modes;
1607 }
1608
1609 /** Returns true if deref must have one of the given modes
1610 *
1611 * For multi-mode derefs, this returns true if NIR can prove that the given
1612 * deref has one of the specified modes. This function returning false does
1613 * NOT mean that deref doesn't have one of the given mode. It very well may
1614 * have one of those modes, we just don't have enough information to prove
1615 * that it does for sure.
1616 */
1617 static inline bool
nir_deref_mode_must_be(const nir_deref_instr * deref,nir_variable_mode modes)1618 nir_deref_mode_must_be(const nir_deref_instr *deref, nir_variable_mode modes)
1619 {
1620 assert(!(modes & ~nir_var_all));
1621 assert(deref->modes != 0);
1622 return !(deref->modes & ~modes);
1623 }
1624
1625 /** Returns true if deref has the given mode
1626 *
1627 * This returns true if the deref has exactly the mode specified. If the
1628 * deref may have that mode but may also have a different mode (i.e. modes has
1629 * multiple bits set), this will assert-fail.
1630 *
1631 * If you're confused about which nir_deref_mode_ helper to use, use this one
1632 * or nir_deref_mode_is_one_of below.
1633 */
1634 static inline bool
nir_deref_mode_is(const nir_deref_instr * deref,nir_variable_mode mode)1635 nir_deref_mode_is(const nir_deref_instr *deref, nir_variable_mode mode)
1636 {
1637 assert(util_bitcount(mode) == 1 && (mode & nir_var_all));
1638 assert(deref->modes != 0);
1639
1640 /* This is only for "simple" cases so, if modes might interact with this
1641 * deref then the deref has to have a single mode.
1642 */
1643 if (nir_deref_mode_may_be(deref, mode)) {
1644 assert(util_bitcount(deref->modes) == 1);
1645 assert(deref->modes == mode);
1646 }
1647
1648 return deref->modes == mode;
1649 }
1650
1651 /** Returns true if deref has one of the given modes
1652 *
1653 * This returns true if the deref has exactly one possible mode and that mode
1654 * is one of the modes specified. If the deref may have one of those modes
1655 * but may also have a different mode (i.e. modes has multiple bits set), this
1656 * will assert-fail.
1657 */
1658 static inline bool
nir_deref_mode_is_one_of(const nir_deref_instr * deref,nir_variable_mode modes)1659 nir_deref_mode_is_one_of(const nir_deref_instr *deref, nir_variable_mode modes)
1660 {
1661 /* This is only for "simple" cases so, if modes might interact with this
1662 * deref then the deref has to have a single mode.
1663 */
1664 if (nir_deref_mode_may_be(deref, modes)) {
1665 assert(util_bitcount(deref->modes) == 1);
1666 assert(nir_deref_mode_must_be(deref, modes));
1667 }
1668
1669 return nir_deref_mode_may_be(deref, modes);
1670 }
1671
1672 /** Returns true if deref's possible modes lie in the given set of modes
1673 *
1674 * This returns true if the deref's modes lie in the given set of modes. If
1675 * the deref's modes overlap with the specified modes but aren't entirely
1676 * contained in the specified set of modes, this will assert-fail. In
1677 * particular, if this is used in a generic pointers scenario, the specified
1678 * modes has to contain all or none of the possible generic pointer modes.
1679 *
1680 * This is intended mostly for mass-lowering of derefs which might have
1681 * generic pointers.
1682 */
1683 static inline bool
nir_deref_mode_is_in_set(const nir_deref_instr * deref,nir_variable_mode modes)1684 nir_deref_mode_is_in_set(const nir_deref_instr *deref, nir_variable_mode modes)
1685 {
1686 if (nir_deref_mode_may_be(deref, modes))
1687 assert(nir_deref_mode_must_be(deref, modes));
1688
1689 return nir_deref_mode_may_be(deref, modes);
1690 }
1691
1692 static inline nir_deref_instr *nir_src_as_deref(nir_src src);
1693
1694 static inline nir_deref_instr *
nir_deref_instr_parent(const nir_deref_instr * instr)1695 nir_deref_instr_parent(const nir_deref_instr *instr)
1696 {
1697 if (instr->deref_type == nir_deref_type_var)
1698 return NULL;
1699 else
1700 return nir_src_as_deref(instr->parent);
1701 }
1702
1703 static inline nir_variable *
nir_deref_instr_get_variable(const nir_deref_instr * instr)1704 nir_deref_instr_get_variable(const nir_deref_instr *instr)
1705 {
1706 while (instr->deref_type != nir_deref_type_var) {
1707 if (instr->deref_type == nir_deref_type_cast)
1708 return NULL;
1709
1710 instr = nir_deref_instr_parent(instr);
1711 }
1712
1713 return instr->var;
1714 }
1715
1716 bool nir_deref_instr_has_indirect(nir_deref_instr *instr);
1717 bool nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr);
1718 bool nir_deref_instr_has_complex_use(nir_deref_instr *instr);
1719
1720 bool nir_deref_instr_remove_if_unused(nir_deref_instr *instr);
1721
1722 unsigned nir_deref_instr_array_stride(nir_deref_instr *instr);
1723
1724 typedef struct {
1725 nir_instr instr;
1726
1727 struct nir_function *callee;
1728
1729 unsigned num_params;
1730 nir_src params[];
1731 } nir_call_instr;
1732
1733 #include "nir_intrinsics.h"
1734
1735 #define NIR_INTRINSIC_MAX_CONST_INDEX 5
1736
1737 /** Represents an intrinsic
1738 *
1739 * An intrinsic is an instruction type for handling things that are
1740 * more-or-less regular operations but don't just consume and produce SSA
1741 * values like ALU operations do. Intrinsics are not for things that have
1742 * special semantic meaning such as phi nodes and parallel copies.
1743 * Examples of intrinsics include variable load/store operations, system
1744 * value loads, and the like. Even though texturing more-or-less falls
1745 * under this category, texturing is its own instruction type because
1746 * trying to represent texturing with intrinsics would lead to a
1747 * combinatorial explosion of intrinsic opcodes.
1748 *
1749 * By having a single instruction type for handling a lot of different
1750 * cases, optimization passes can look for intrinsics and, for the most
1751 * part, completely ignore them. Each intrinsic type also has a few
1752 * possible flags that govern whether or not they can be reordered or
1753 * eliminated. That way passes like dead code elimination can still work
1754 * on intrisics without understanding the meaning of each.
1755 *
1756 * Each intrinsic has some number of constant indices, some number of
1757 * variables, and some number of sources. What these sources, variables,
1758 * and indices mean depends on the intrinsic and is documented with the
1759 * intrinsic declaration in nir_intrinsics.h. Intrinsics and texture
1760 * instructions are the only types of instruction that can operate on
1761 * variables.
1762 */
1763 typedef struct {
1764 nir_instr instr;
1765
1766 nir_intrinsic_op intrinsic;
1767
1768 nir_dest dest;
1769
1770 /** number of components if this is a vectorized intrinsic
1771 *
1772 * Similarly to ALU operations, some intrinsics are vectorized.
1773 * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0.
1774 * For vectorized intrinsics, the num_components field specifies the
1775 * number of destination components and the number of source components
1776 * for all sources with nir_intrinsic_infos.src_components[i] == 0.
1777 */
1778 uint8_t num_components;
1779
1780 int const_index[NIR_INTRINSIC_MAX_CONST_INDEX];
1781
1782 nir_src src[];
1783 } nir_intrinsic_instr;
1784
1785 static inline nir_variable *
nir_intrinsic_get_var(nir_intrinsic_instr * intrin,unsigned i)1786 nir_intrinsic_get_var(nir_intrinsic_instr *intrin, unsigned i)
1787 {
1788 return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i]));
1789 }
1790
1791 typedef enum {
1792 /* Memory ordering. */
1793 NIR_MEMORY_ACQUIRE = 1 << 0,
1794 NIR_MEMORY_RELEASE = 1 << 1,
1795 NIR_MEMORY_ACQ_REL = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE,
1796
1797 /* Memory visibility operations. */
1798 NIR_MEMORY_MAKE_AVAILABLE = 1 << 2,
1799 NIR_MEMORY_MAKE_VISIBLE = 1 << 3,
1800 } nir_memory_semantics;
1801
1802 typedef enum {
1803 NIR_SCOPE_NONE,
1804 NIR_SCOPE_INVOCATION,
1805 NIR_SCOPE_SUBGROUP,
1806 NIR_SCOPE_SHADER_CALL,
1807 NIR_SCOPE_WORKGROUP,
1808 NIR_SCOPE_QUEUE_FAMILY,
1809 NIR_SCOPE_DEVICE,
1810 } nir_scope;
1811
1812 /**
1813 * \name NIR intrinsics semantic flags
1814 *
1815 * information about what the compiler can do with the intrinsics.
1816 *
1817 * \sa nir_intrinsic_info::flags
1818 */
1819 typedef enum {
1820 /**
1821 * whether the intrinsic can be safely eliminated if none of its output
1822 * value is not being used.
1823 */
1824 NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0),
1825
1826 /**
1827 * Whether the intrinsic can be reordered with respect to any other
1828 * intrinsic, i.e. whether the only reordering dependencies of the
1829 * intrinsic are due to the register reads/writes.
1830 */
1831 NIR_INTRINSIC_CAN_REORDER = (1 << 1),
1832 } nir_intrinsic_semantic_flag;
1833
1834 /**
1835 * Maximum valid value for a nir align_mul value (in intrinsics or derefs).
1836 *
1837 * Offsets can be signed, so this is the largest power of two in int32_t.
1838 */
1839 #define NIR_ALIGN_MUL_MAX 0x40000000
1840
1841 typedef struct nir_io_semantics {
1842 unsigned location:7; /* gl_vert_attrib, gl_varying_slot, or gl_frag_result */
1843 unsigned num_slots:6; /* max 32, may be pessimistic with const indexing */
1844 unsigned dual_source_blend_index:1;
1845 unsigned fb_fetch_output:1; /* for GL_KHR_blend_equation_advanced */
1846 unsigned gs_streams:8; /* xxyyzzww: 2-bit stream index for each component */
1847 unsigned medium_precision:1; /* GLSL mediump qualifier */
1848 unsigned per_view:1;
1849 unsigned high_16bits:1; /* whether accessing low or high half of the slot */
1850 unsigned _pad:6;
1851 } nir_io_semantics;
1852
1853 #define NIR_INTRINSIC_MAX_INPUTS 11
1854
1855 typedef struct {
1856 const char *name;
1857
1858 uint8_t num_srcs; /** < number of register/SSA inputs */
1859
1860 /** number of components of each input register
1861 *
1862 * If this value is 0, the number of components is given by the
1863 * num_components field of nir_intrinsic_instr. If this value is -1, the
1864 * intrinsic consumes however many components are provided and it is not
1865 * validated at all.
1866 */
1867 int8_t src_components[NIR_INTRINSIC_MAX_INPUTS];
1868
1869 bool has_dest;
1870
1871 /** number of components of the output register
1872 *
1873 * If this value is 0, the number of components is given by the
1874 * num_components field of nir_intrinsic_instr.
1875 */
1876 uint8_t dest_components;
1877
1878 /** bitfield of legal bit sizes */
1879 uint8_t dest_bit_sizes;
1880
1881 /** source which the destination bit size must match
1882 *
1883 * Some intrinsics, such as subgroup intrinsics, are data manipulation
1884 * intrinsics and they have similar bit-size rules to ALU ops. This enables
1885 * validation to validate a bit more and enables auto-generated builder code
1886 * to properly determine destination bit sizes automatically.
1887 */
1888 int8_t bit_size_src;
1889
1890 /** the number of constant indices used by the intrinsic */
1891 uint8_t num_indices;
1892
1893 /** list of indices */
1894 uint8_t indices[NIR_INTRINSIC_MAX_CONST_INDEX];
1895
1896 /** indicates the usage of intr->const_index[n] */
1897 uint8_t index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS];
1898
1899 /** semantic flags for calls to this intrinsic */
1900 nir_intrinsic_semantic_flag flags;
1901 } nir_intrinsic_info;
1902
1903 extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
1904
1905 static inline unsigned
nir_intrinsic_src_components(const nir_intrinsic_instr * intr,unsigned srcn)1906 nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn)
1907 {
1908 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
1909 assert(srcn < info->num_srcs);
1910 if (info->src_components[srcn] > 0)
1911 return info->src_components[srcn];
1912 else if (info->src_components[srcn] == 0)
1913 return intr->num_components;
1914 else
1915 return nir_src_num_components(intr->src[srcn]);
1916 }
1917
1918 static inline unsigned
nir_intrinsic_dest_components(nir_intrinsic_instr * intr)1919 nir_intrinsic_dest_components(nir_intrinsic_instr *intr)
1920 {
1921 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
1922 if (!info->has_dest)
1923 return 0;
1924 else if (info->dest_components)
1925 return info->dest_components;
1926 else
1927 return intr->num_components;
1928 }
1929
1930 /**
1931 * Helper to copy const_index[] from src to dst, without assuming they
1932 * match in order.
1933 */
1934 static inline void
nir_intrinsic_copy_const_indices(nir_intrinsic_instr * dst,nir_intrinsic_instr * src)1935 nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src)
1936 {
1937 if (src->intrinsic == dst->intrinsic) {
1938 memcpy(dst->const_index, src->const_index, sizeof(dst->const_index));
1939 return;
1940 }
1941
1942 const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic];
1943 const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic];
1944
1945 for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) {
1946 if (src_info->index_map[i] == 0)
1947 continue;
1948
1949 /* require that dst instruction also uses the same const_index[]: */
1950 assert(dst_info->index_map[i] > 0);
1951
1952 dst->const_index[dst_info->index_map[i] - 1] =
1953 src->const_index[src_info->index_map[i] - 1];
1954 }
1955 }
1956
1957 #include "nir_intrinsics_indices.h"
1958
1959 static inline void
nir_intrinsic_set_align(nir_intrinsic_instr * intrin,unsigned align_mul,unsigned align_offset)1960 nir_intrinsic_set_align(nir_intrinsic_instr *intrin,
1961 unsigned align_mul, unsigned align_offset)
1962 {
1963 assert(util_is_power_of_two_nonzero(align_mul));
1964 assert(align_offset < align_mul);
1965 nir_intrinsic_set_align_mul(intrin, align_mul);
1966 nir_intrinsic_set_align_offset(intrin, align_offset);
1967 }
1968
1969 /** Returns a simple alignment for a load/store intrinsic offset
1970 *
1971 * Instead of the full mul+offset alignment scheme provided by the ALIGN_MUL
1972 * and ALIGN_OFFSET parameters, this helper takes both into account and
1973 * provides a single simple alignment parameter. The offset X is guaranteed
1974 * to satisfy X % align == 0.
1975 */
1976 static inline unsigned
nir_intrinsic_align(const nir_intrinsic_instr * intrin)1977 nir_intrinsic_align(const nir_intrinsic_instr *intrin)
1978 {
1979 const unsigned align_mul = nir_intrinsic_align_mul(intrin);
1980 const unsigned align_offset = nir_intrinsic_align_offset(intrin);
1981 assert(align_offset < align_mul);
1982 return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul;
1983 }
1984
1985 static inline bool
nir_intrinsic_has_align(const nir_intrinsic_instr * intrin)1986 nir_intrinsic_has_align(const nir_intrinsic_instr *intrin)
1987 {
1988 return nir_intrinsic_has_align_mul(intrin) &&
1989 nir_intrinsic_has_align_offset(intrin);
1990 }
1991
1992 unsigned
1993 nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr);
1994
1995 /* Converts a image_deref_* intrinsic into a image_* one */
1996 void nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr,
1997 nir_ssa_def *handle, bool bindless);
1998
1999 /* Determine if an intrinsic can be arbitrarily reordered and eliminated. */
2000 static inline bool
nir_intrinsic_can_reorder(nir_intrinsic_instr * instr)2001 nir_intrinsic_can_reorder(nir_intrinsic_instr *instr)
2002 {
2003 if (instr->intrinsic == nir_intrinsic_load_deref) {
2004 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
2005 return nir_deref_mode_is_in_set(deref, nir_var_read_only_modes) ||
2006 (nir_intrinsic_access(instr) & ACCESS_CAN_REORDER);
2007 } else if (instr->intrinsic == nir_intrinsic_load_ssbo ||
2008 instr->intrinsic == nir_intrinsic_bindless_image_load ||
2009 instr->intrinsic == nir_intrinsic_image_deref_load ||
2010 instr->intrinsic == nir_intrinsic_image_load) {
2011 return nir_intrinsic_access(instr) & ACCESS_CAN_REORDER;
2012 } else {
2013 const nir_intrinsic_info *info =
2014 &nir_intrinsic_infos[instr->intrinsic];
2015 return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
2016 (info->flags & NIR_INTRINSIC_CAN_REORDER);
2017 }
2018 }
2019
2020 bool nir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr);
2021
2022 /** Texture instruction source type */
2023 typedef enum {
2024 /** Texture coordinate
2025 *
2026 * Must have nir_tex_instr::coord_components components.
2027 */
2028 nir_tex_src_coord,
2029
2030 /** Projector
2031 *
2032 * The texture coordinate (except for the array component, if any) is
2033 * divided by this value before LOD computation and sampling.
2034 *
2035 * Must be a float scalar.
2036 */
2037 nir_tex_src_projector,
2038
2039 /** Shadow comparator
2040 *
2041 * For shadow sampling, the fetched texel values are compared against the
2042 * shadow comparator using the compare op specified by the sampler object
2043 * and converted to 1.0 if the comparison succeeds and 0.0 if it fails.
2044 * Interpolation happens after this conversion so the actual result may be
2045 * anywhere in the range [0.0, 1.0].
2046 *
2047 * Only valid if nir_tex_instr::is_shadow and must be a float scalar.
2048 */
2049 nir_tex_src_comparator,
2050
2051 /** Coordinate offset
2052 *
2053 * An integer value that is added to the texel address before sampling.
2054 * This is only allowed with operations that take an explicit LOD as it is
2055 * applied in integer texel space after LOD selection and not normalized
2056 * coordinate space.
2057 */
2058 nir_tex_src_offset,
2059
2060 /** LOD bias
2061 *
2062 * This value is added to the computed LOD before mip-mapping.
2063 */
2064 nir_tex_src_bias,
2065
2066 /** Explicit LOD */
2067 nir_tex_src_lod,
2068
2069 /** Min LOD
2070 *
2071 * The computed LOD is clamped to be at least as large as min_lod before
2072 * mip-mapping.
2073 */
2074 nir_tex_src_min_lod,
2075
2076 /** MSAA sample index */
2077 nir_tex_src_ms_index,
2078
2079 /** Intel-specific MSAA compression data */
2080 nir_tex_src_ms_mcs_intel,
2081
2082 /** Explicit horizontal (X-major) coordinate derivative */
2083 nir_tex_src_ddx,
2084
2085 /** Explicit vertical (Y-major) coordinate derivative */
2086 nir_tex_src_ddy,
2087
2088 /** Texture variable dereference */
2089 nir_tex_src_texture_deref,
2090
2091 /** Sampler variable dereference */
2092 nir_tex_src_sampler_deref,
2093
2094 /** Texture index offset
2095 *
2096 * This is added to nir_tex_instr::texture_index. Unless
2097 * nir_tex_instr::texture_non_uniform is set, this is guaranteed to be
2098 * dynamically uniform.
2099 */
2100 nir_tex_src_texture_offset,
2101
2102 /** Dynamically uniform sampler index offset
2103 *
2104 * This is added to nir_tex_instr::sampler_index. Unless
2105 * nir_tex_instr::sampler_non_uniform is set, this is guaranteed to be
2106 * dynamically uniform.
2107 */
2108 nir_tex_src_sampler_offset,
2109
2110 /** Bindless texture handle
2111 *
2112 * This is, unfortunately, a bit overloaded at the moment. There are
2113 * generally two types of bindless handles:
2114 *
2115 * 1. For GL_ARB_bindless bindless handles. These are part of the
2116 * GL/Gallium-level API and are always a 64-bit integer.
2117 *
2118 * 2. HW-specific handles. GL_ARB_bindless handles may be lowered to
2119 * these. Also, these are used by many Vulkan drivers to implement
2120 * descriptor sets, especially for UPDATE_AFTER_BIND descriptors.
2121 * The details of hardware handles (bit size, format, etc.) is
2122 * HW-specific.
2123 *
2124 * Because of this overloading and the resulting ambiguity, we currently
2125 * don't validate anything for these.
2126 */
2127 nir_tex_src_texture_handle,
2128
2129 /** Bindless sampler handle
2130 *
2131 * See nir_tex_src_texture_handle,
2132 */
2133 nir_tex_src_sampler_handle,
2134
2135 /** Plane index for multi-plane YCbCr textures */
2136 nir_tex_src_plane,
2137
2138 /**
2139 * Backend-specific vec4 tex src argument.
2140 *
2141 * Can be used to have NIR optimization (copy propagation, lower_vec_to_movs)
2142 * apply to the packing of the tex srcs. This lowering must only happen
2143 * after nir_lower_tex().
2144 *
2145 * The nir_tex_instr_src_type() of this argument is float, so no lowering
2146 * will happen if nir_lower_int_to_float is used.
2147 */
2148 nir_tex_src_backend1,
2149
2150 /** Second backend-specific vec4 tex src argument, see nir_tex_src_backend1. */
2151 nir_tex_src_backend2,
2152
2153 nir_num_tex_src_types
2154 } nir_tex_src_type;
2155
2156 /** A texture instruction source */
2157 typedef struct {
2158 /** Base source */
2159 nir_src src;
2160
2161 /** Type of this source */
2162 nir_tex_src_type src_type;
2163 } nir_tex_src;
2164
2165 /** Texture instruction opcode */
2166 typedef enum {
2167 nir_texop_tex, /**< Regular texture look-up */
2168 nir_texop_txb, /**< Texture look-up with LOD bias */
2169 nir_texop_txl, /**< Texture look-up with explicit LOD */
2170 nir_texop_txd, /**< Texture look-up with partial derivatives */
2171 nir_texop_txf, /**< Texel fetch with explicit LOD */
2172 nir_texop_txf_ms, /**< Multisample texture fetch */
2173 nir_texop_txf_ms_fb, /**< Multisample texture fetch from framebuffer */
2174 nir_texop_txf_ms_mcs_intel, /**< Multisample compression value fetch */
2175 nir_texop_txs, /**< Texture size */
2176 nir_texop_lod, /**< Texture lod query */
2177 nir_texop_tg4, /**< Texture gather */
2178 nir_texop_query_levels, /**< Texture levels query */
2179 nir_texop_texture_samples, /**< Texture samples query */
2180 nir_texop_samples_identical, /**< Query whether all samples are definitely
2181 * identical.
2182 */
2183 nir_texop_tex_prefetch, /**< Regular texture look-up, eligible for pre-dispatch */
2184 nir_texop_fragment_fetch_amd, /**< Multisample fragment color texture fetch */
2185 nir_texop_fragment_mask_fetch_amd, /**< Multisample fragment mask texture fetch */
2186 } nir_texop;
2187
2188 /** Represents a texture instruction */
2189 typedef struct {
2190 /** Base instruction */
2191 nir_instr instr;
2192
2193 /** Dimensionality of the texture operation
2194 *
2195 * This will typically match the dimensionality of the texture deref type
2196 * if a nir_tex_src_texture_deref is present. However, it may not if
2197 * texture lowering has occurred.
2198 */
2199 enum glsl_sampler_dim sampler_dim;
2200
2201 /** ALU type of the destination
2202 *
2203 * This is the canonical sampled type for this texture operation and may
2204 * not exactly match the sampled type of the deref type when a
2205 * nir_tex_src_texture_deref is present. For OpenCL, the sampled type of
2206 * the texture deref will be GLSL_TYPE_VOID and this is allowed to be
2207 * anything. With SPIR-V, the signedness of integer types is allowed to
2208 * differ. For all APIs, the bit size may differ if the driver has done
2209 * any sort of mediump or similar lowering since texture types always have
2210 * 32-bit sampled types.
2211 */
2212 nir_alu_type dest_type;
2213
2214 /** Texture opcode */
2215 nir_texop op;
2216
2217 /** Destination */
2218 nir_dest dest;
2219
2220 /** Array of sources
2221 *
2222 * This array has nir_tex_instr::num_srcs elements
2223 */
2224 nir_tex_src *src;
2225
2226 /** Number of sources */
2227 unsigned num_srcs;
2228
2229 /** Number of components in the coordinate, if any */
2230 unsigned coord_components;
2231
2232 /** True if the texture instruction acts on an array texture */
2233 bool is_array;
2234
2235 /** True if the texture instruction performs a shadow comparison
2236 *
2237 * If this is true, the texture instruction must have a
2238 * nir_tex_src_comparator.
2239 */
2240 bool is_shadow;
2241
2242 /**
2243 * If is_shadow is true, whether this is the old-style shadow that outputs
2244 * 4 components or the new-style shadow that outputs 1 component.
2245 */
2246 bool is_new_style_shadow;
2247
2248 /**
2249 * True if this texture instruction should return a sparse residency code.
2250 * The code is in the last component of the result.
2251 */
2252 bool is_sparse;
2253
2254 /** nir_texop_tg4 component selector
2255 *
2256 * This determines which RGBA component is gathered.
2257 */
2258 unsigned component : 2;
2259
2260 /** Validation needs to know this for gradient component count */
2261 unsigned array_is_lowered_cube : 1;
2262
2263 /** Gather offsets */
2264 int8_t tg4_offsets[4][2];
2265
2266 /** True if the texture index or handle is not dynamically uniform */
2267 bool texture_non_uniform;
2268
2269 /** True if the sampler index or handle is not dynamically uniform */
2270 bool sampler_non_uniform;
2271
2272 /** The texture index
2273 *
2274 * If this texture instruction has a nir_tex_src_texture_offset source,
2275 * then the texture index is given by texture_index + texture_offset.
2276 */
2277 unsigned texture_index;
2278
2279 /** The sampler index
2280 *
2281 * The following operations do not require a sampler and, as such, this
2282 * field should be ignored:
2283 * - nir_texop_txf
2284 * - nir_texop_txf_ms
2285 * - nir_texop_txs
2286 * - nir_texop_query_levels
2287 * - nir_texop_texture_samples
2288 * - nir_texop_samples_identical
2289 *
2290 * If this texture instruction has a nir_tex_src_sampler_offset source,
2291 * then the sampler index is given by sampler_index + sampler_offset.
2292 */
2293 unsigned sampler_index;
2294 } nir_tex_instr;
2295
2296 /**
2297 * Returns true if the texture operation requires a sampler as a general rule
2298 *
2299 * Note that the specific hw/driver backend could require to a sampler
2300 * object/configuration packet in any case, for some other reason.
2301 *
2302 * @see nir_tex_instr::sampler_index.
2303 */
2304 static inline bool
nir_tex_instr_need_sampler(const nir_tex_instr * instr)2305 nir_tex_instr_need_sampler(const nir_tex_instr *instr)
2306 {
2307 switch (instr->op) {
2308 case nir_texop_txf:
2309 case nir_texop_txf_ms:
2310 case nir_texop_txs:
2311 case nir_texop_query_levels:
2312 case nir_texop_texture_samples:
2313 case nir_texop_samples_identical:
2314 return false;
2315 default:
2316 return true;
2317 }
2318 }
2319
2320 /** Returns the number of components returned by this nir_tex_instr
2321 *
2322 * Useful for code building texture instructions when you don't want to think
2323 * about how many components a particular texture op returns. This does not
2324 * include the sparse residency code.
2325 */
2326 static inline unsigned
nir_tex_instr_result_size(const nir_tex_instr * instr)2327 nir_tex_instr_result_size(const nir_tex_instr *instr)
2328 {
2329 switch (instr->op) {
2330 case nir_texop_txs: {
2331 unsigned ret;
2332 switch (instr->sampler_dim) {
2333 case GLSL_SAMPLER_DIM_1D:
2334 case GLSL_SAMPLER_DIM_BUF:
2335 ret = 1;
2336 break;
2337 case GLSL_SAMPLER_DIM_2D:
2338 case GLSL_SAMPLER_DIM_CUBE:
2339 case GLSL_SAMPLER_DIM_MS:
2340 case GLSL_SAMPLER_DIM_RECT:
2341 case GLSL_SAMPLER_DIM_EXTERNAL:
2342 case GLSL_SAMPLER_DIM_SUBPASS:
2343 ret = 2;
2344 break;
2345 case GLSL_SAMPLER_DIM_3D:
2346 ret = 3;
2347 break;
2348 default:
2349 unreachable("not reached");
2350 }
2351 if (instr->is_array)
2352 ret++;
2353 return ret;
2354 }
2355
2356 case nir_texop_lod:
2357 return 2;
2358
2359 case nir_texop_texture_samples:
2360 case nir_texop_query_levels:
2361 case nir_texop_samples_identical:
2362 case nir_texop_fragment_mask_fetch_amd:
2363 return 1;
2364
2365 default:
2366 if (instr->is_shadow && instr->is_new_style_shadow)
2367 return 1;
2368
2369 return 4;
2370 }
2371 }
2372
2373 /**
2374 * Returns the destination size of this nir_tex_instr including the sparse
2375 * residency code, if any.
2376 */
2377 static inline unsigned
nir_tex_instr_dest_size(const nir_tex_instr * instr)2378 nir_tex_instr_dest_size(const nir_tex_instr *instr)
2379 {
2380 /* One more component is needed for the residency code. */
2381 return nir_tex_instr_result_size(instr) + instr->is_sparse;
2382 }
2383
2384 /**
2385 * Returns true if this texture operation queries something about the texture
2386 * rather than actually sampling it.
2387 */
2388 static inline bool
nir_tex_instr_is_query(const nir_tex_instr * instr)2389 nir_tex_instr_is_query(const nir_tex_instr *instr)
2390 {
2391 switch (instr->op) {
2392 case nir_texop_txs:
2393 case nir_texop_lod:
2394 case nir_texop_texture_samples:
2395 case nir_texop_query_levels:
2396 return true;
2397 case nir_texop_tex:
2398 case nir_texop_txb:
2399 case nir_texop_txl:
2400 case nir_texop_txd:
2401 case nir_texop_txf:
2402 case nir_texop_txf_ms:
2403 case nir_texop_txf_ms_fb:
2404 case nir_texop_txf_ms_mcs_intel:
2405 case nir_texop_tg4:
2406 return false;
2407 default:
2408 unreachable("Invalid texture opcode");
2409 }
2410 }
2411
2412 /** Returns true if this texture instruction does implicit derivatives
2413 *
2414 * This is important as there are extra control-flow rules around derivatives
2415 * and texture instructions which perform them implicitly.
2416 */
2417 static inline bool
nir_tex_instr_has_implicit_derivative(const nir_tex_instr * instr)2418 nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr)
2419 {
2420 switch (instr->op) {
2421 case nir_texop_tex:
2422 case nir_texop_txb:
2423 case nir_texop_lod:
2424 return true;
2425 default:
2426 return false;
2427 }
2428 }
2429
2430 /** Returns the ALU type of the given texture instruction source */
2431 static inline nir_alu_type
nir_tex_instr_src_type(const nir_tex_instr * instr,unsigned src)2432 nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
2433 {
2434 switch (instr->src[src].src_type) {
2435 case nir_tex_src_coord:
2436 switch (instr->op) {
2437 case nir_texop_txf:
2438 case nir_texop_txf_ms:
2439 case nir_texop_txf_ms_fb:
2440 case nir_texop_txf_ms_mcs_intel:
2441 case nir_texop_samples_identical:
2442 return nir_type_int;
2443
2444 default:
2445 return nir_type_float;
2446 }
2447
2448 case nir_tex_src_lod:
2449 switch (instr->op) {
2450 case nir_texop_txs:
2451 case nir_texop_txf:
2452 case nir_texop_txf_ms:
2453 return nir_type_int;
2454
2455 default:
2456 return nir_type_float;
2457 }
2458
2459 case nir_tex_src_projector:
2460 case nir_tex_src_comparator:
2461 case nir_tex_src_bias:
2462 case nir_tex_src_min_lod:
2463 case nir_tex_src_ddx:
2464 case nir_tex_src_ddy:
2465 case nir_tex_src_backend1:
2466 case nir_tex_src_backend2:
2467 return nir_type_float;
2468
2469 case nir_tex_src_offset:
2470 case nir_tex_src_ms_index:
2471 case nir_tex_src_plane:
2472 return nir_type_int;
2473
2474 case nir_tex_src_ms_mcs_intel:
2475 case nir_tex_src_texture_deref:
2476 case nir_tex_src_sampler_deref:
2477 case nir_tex_src_texture_offset:
2478 case nir_tex_src_sampler_offset:
2479 case nir_tex_src_texture_handle:
2480 case nir_tex_src_sampler_handle:
2481 return nir_type_uint;
2482
2483 case nir_num_tex_src_types:
2484 unreachable("nir_num_tex_src_types is not a valid source type");
2485 }
2486
2487 unreachable("Invalid texture source type");
2488 }
2489
2490 /**
2491 * Returns the number of components required by the given texture instruction
2492 * source
2493 */
2494 static inline unsigned
nir_tex_instr_src_size(const nir_tex_instr * instr,unsigned src)2495 nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src)
2496 {
2497 if (instr->src[src].src_type == nir_tex_src_coord)
2498 return instr->coord_components;
2499
2500 /* The MCS value is expected to be a vec4 returned by a txf_ms_mcs_intel */
2501 if (instr->src[src].src_type == nir_tex_src_ms_mcs_intel)
2502 return 4;
2503
2504 if (instr->src[src].src_type == nir_tex_src_ddx ||
2505 instr->src[src].src_type == nir_tex_src_ddy) {
2506
2507 if (instr->is_array && !instr->array_is_lowered_cube)
2508 return instr->coord_components - 1;
2509 else
2510 return instr->coord_components;
2511 }
2512
2513 /* Usual APIs don't allow cube + offset, but we allow it, with 2 coords for
2514 * the offset, since a cube maps to a single face.
2515 */
2516 if (instr->src[src].src_type == nir_tex_src_offset) {
2517 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
2518 return 2;
2519 else if (instr->is_array)
2520 return instr->coord_components - 1;
2521 else
2522 return instr->coord_components;
2523 }
2524
2525 if (instr->src[src].src_type == nir_tex_src_backend1 ||
2526 instr->src[src].src_type == nir_tex_src_backend2)
2527 return nir_src_num_components(instr->src[src].src);
2528
2529 return 1;
2530 }
2531
2532 /**
2533 * Returns the index of the texture instruction source with the given
2534 * nir_tex_src_type or -1 if no such source exists.
2535 */
2536 static inline int
nir_tex_instr_src_index(const nir_tex_instr * instr,nir_tex_src_type type)2537 nir_tex_instr_src_index(const nir_tex_instr *instr, nir_tex_src_type type)
2538 {
2539 for (unsigned i = 0; i < instr->num_srcs; i++)
2540 if (instr->src[i].src_type == type)
2541 return (int) i;
2542
2543 return -1;
2544 }
2545
2546 /** Adds a source to a texture instruction */
2547 void nir_tex_instr_add_src(nir_tex_instr *tex,
2548 nir_tex_src_type src_type,
2549 nir_src src);
2550
2551 /** Removes a source from a texture instruction */
2552 void nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx);
2553
2554 bool nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex);
2555
2556 typedef struct {
2557 nir_instr instr;
2558
2559 nir_ssa_def def;
2560
2561 nir_const_value value[];
2562 } nir_load_const_instr;
2563
2564 typedef enum {
2565 /** Return from a function
2566 *
2567 * This instruction is a classic function return. It jumps to
2568 * nir_function_impl::end_block. No return value is provided in this
2569 * instruction. Instead, the function is expected to write any return
2570 * data to a deref passed in from the caller.
2571 */
2572 nir_jump_return,
2573
2574 /** Immediately exit the current shader
2575 *
2576 * This instruction is roughly the equivalent of C's "exit()" in that it
2577 * immediately terminates the current shader invocation. From a CFG
2578 * perspective, it looks like a jump to nir_function_impl::end_block but
2579 * it actually jumps to the end block of the shader entrypoint. A halt
2580 * instruction in the shader entrypoint itself is semantically identical
2581 * to a return.
2582 *
2583 * For shaders with built-in I/O, any outputs written prior to a halt
2584 * instruction remain written and any outputs not written prior to the
2585 * halt have undefined values. It does NOT cause an implicit discard of
2586 * written results. If one wants discard results in a fragment shader,
2587 * for instance, a discard or demote intrinsic is required.
2588 */
2589 nir_jump_halt,
2590
2591 /** Break out of the inner-most loop
2592 *
2593 * This has the same semantics as C's "break" statement.
2594 */
2595 nir_jump_break,
2596
2597 /** Jump back to the top of the inner-most loop
2598 *
2599 * This has the same semantics as C's "continue" statement assuming that a
2600 * NIR loop is implemented as "while (1) { body }".
2601 */
2602 nir_jump_continue,
2603
2604 /** Jumps for unstructured CFG.
2605 *
2606 * As within an unstructured CFG we can't rely on block ordering we need to
2607 * place explicit jumps at the end of every block.
2608 */
2609 nir_jump_goto,
2610 nir_jump_goto_if,
2611 } nir_jump_type;
2612
2613 typedef struct {
2614 nir_instr instr;
2615 nir_jump_type type;
2616 nir_src condition;
2617 struct nir_block *target;
2618 struct nir_block *else_target;
2619 } nir_jump_instr;
2620
2621 /* creates a new SSA variable in an undefined state */
2622
2623 typedef struct {
2624 nir_instr instr;
2625 nir_ssa_def def;
2626 } nir_ssa_undef_instr;
2627
2628 typedef struct {
2629 struct exec_node node;
2630
2631 /* The predecessor block corresponding to this source */
2632 struct nir_block *pred;
2633
2634 nir_src src;
2635 } nir_phi_src;
2636
2637 #define nir_foreach_phi_src(phi_src, phi) \
2638 foreach_list_typed(nir_phi_src, phi_src, node, &(phi)->srcs)
2639 #define nir_foreach_phi_src_safe(phi_src, phi) \
2640 foreach_list_typed_safe(nir_phi_src, phi_src, node, &(phi)->srcs)
2641
2642 typedef struct {
2643 nir_instr instr;
2644
2645 struct exec_list srcs; /** < list of nir_phi_src */
2646
2647 nir_dest dest;
2648 } nir_phi_instr;
2649
2650 static inline nir_phi_src *
nir_phi_get_src_from_block(nir_phi_instr * phi,struct nir_block * block)2651 nir_phi_get_src_from_block(nir_phi_instr *phi, struct nir_block *block)
2652 {
2653 nir_foreach_phi_src(src, phi) {
2654 if (src->pred == block)
2655 return src;
2656 }
2657
2658 assert(!"Block is not a predecessor of phi.");
2659 return NULL;
2660 }
2661
2662 typedef struct {
2663 struct exec_node node;
2664 nir_src src;
2665 nir_dest dest;
2666 } nir_parallel_copy_entry;
2667
2668 #define nir_foreach_parallel_copy_entry(entry, pcopy) \
2669 foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries)
2670
2671 typedef struct {
2672 nir_instr instr;
2673
2674 /* A list of nir_parallel_copy_entrys. The sources of all of the
2675 * entries are copied to the corresponding destinations "in parallel".
2676 * In other words, if we have two entries: a -> b and b -> a, the values
2677 * get swapped.
2678 */
2679 struct exec_list entries;
2680 } nir_parallel_copy_instr;
2681
2682 NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr,
2683 type, nir_instr_type_alu)
2684 NIR_DEFINE_CAST(nir_instr_as_deref, nir_instr, nir_deref_instr, instr,
2685 type, nir_instr_type_deref)
2686 NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr,
2687 type, nir_instr_type_call)
2688 NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr,
2689 type, nir_instr_type_jump)
2690 NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr,
2691 type, nir_instr_type_tex)
2692 NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr,
2693 type, nir_instr_type_intrinsic)
2694 NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr,
2695 type, nir_instr_type_load_const)
2696 NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr,
2697 type, nir_instr_type_ssa_undef)
2698 NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr,
2699 type, nir_instr_type_phi)
2700 NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr,
2701 nir_parallel_copy_instr, instr,
2702 type, nir_instr_type_parallel_copy)
2703
2704
2705 #define NIR_DEFINE_SRC_AS_CONST(type, suffix) \
2706 static inline type \
2707 nir_src_comp_as_##suffix(nir_src src, unsigned comp) \
2708 { \
2709 assert(nir_src_is_const(src)); \
2710 nir_load_const_instr *load = \
2711 nir_instr_as_load_const(src.ssa->parent_instr); \
2712 assert(comp < load->def.num_components); \
2713 return nir_const_value_as_##suffix(load->value[comp], \
2714 load->def.bit_size); \
2715 } \
2716 \
2717 static inline type \
2718 nir_src_as_##suffix(nir_src src) \
2719 { \
2720 assert(nir_src_num_components(src) == 1); \
2721 return nir_src_comp_as_##suffix(src, 0); \
2722 }
2723
2724 NIR_DEFINE_SRC_AS_CONST(int64_t, int)
2725 NIR_DEFINE_SRC_AS_CONST(uint64_t, uint)
2726 NIR_DEFINE_SRC_AS_CONST(bool, bool)
2727 NIR_DEFINE_SRC_AS_CONST(double, float)
2728
2729 #undef NIR_DEFINE_SRC_AS_CONST
2730
2731
2732 typedef struct {
2733 nir_ssa_def *def;
2734 unsigned comp;
2735 } nir_ssa_scalar;
2736
2737 static inline bool
nir_ssa_scalar_is_const(nir_ssa_scalar s)2738 nir_ssa_scalar_is_const(nir_ssa_scalar s)
2739 {
2740 return s.def->parent_instr->type == nir_instr_type_load_const;
2741 }
2742
2743 static inline nir_const_value
nir_ssa_scalar_as_const_value(nir_ssa_scalar s)2744 nir_ssa_scalar_as_const_value(nir_ssa_scalar s)
2745 {
2746 assert(s.comp < s.def->num_components);
2747 nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr);
2748 return load->value[s.comp];
2749 }
2750
2751 #define NIR_DEFINE_SCALAR_AS_CONST(type, suffix) \
2752 static inline type \
2753 nir_ssa_scalar_as_##suffix(nir_ssa_scalar s) \
2754 { \
2755 return nir_const_value_as_##suffix( \
2756 nir_ssa_scalar_as_const_value(s), s.def->bit_size); \
2757 }
2758
NIR_DEFINE_SCALAR_AS_CONST(int64_t,int)2759 NIR_DEFINE_SCALAR_AS_CONST(int64_t, int)
2760 NIR_DEFINE_SCALAR_AS_CONST(uint64_t, uint)
2761 NIR_DEFINE_SCALAR_AS_CONST(bool, bool)
2762 NIR_DEFINE_SCALAR_AS_CONST(double, float)
2763
2764 #undef NIR_DEFINE_SCALAR_AS_CONST
2765
2766 static inline bool
2767 nir_ssa_scalar_is_alu(nir_ssa_scalar s)
2768 {
2769 return s.def->parent_instr->type == nir_instr_type_alu;
2770 }
2771
2772 static inline nir_op
nir_ssa_scalar_alu_op(nir_ssa_scalar s)2773 nir_ssa_scalar_alu_op(nir_ssa_scalar s)
2774 {
2775 return nir_instr_as_alu(s.def->parent_instr)->op;
2776 }
2777
2778 static inline nir_ssa_scalar
nir_ssa_scalar_chase_alu_src(nir_ssa_scalar s,unsigned alu_src_idx)2779 nir_ssa_scalar_chase_alu_src(nir_ssa_scalar s, unsigned alu_src_idx)
2780 {
2781 nir_ssa_scalar out = { NULL, 0 };
2782
2783 nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
2784 assert(alu_src_idx < nir_op_infos[alu->op].num_inputs);
2785
2786 /* Our component must be written */
2787 assert(s.comp < s.def->num_components);
2788 assert(alu->dest.write_mask & (1u << s.comp));
2789
2790 assert(alu->src[alu_src_idx].src.is_ssa);
2791 out.def = alu->src[alu_src_idx].src.ssa;
2792
2793 if (nir_op_infos[alu->op].input_sizes[alu_src_idx] == 0) {
2794 /* The ALU src is unsized so the source component follows the
2795 * destination component.
2796 */
2797 out.comp = alu->src[alu_src_idx].swizzle[s.comp];
2798 } else {
2799 /* This is a sized source so all source components work together to
2800 * produce all the destination components. Since we need to return a
2801 * scalar, this only works if the source is a scalar.
2802 */
2803 assert(nir_op_infos[alu->op].input_sizes[alu_src_idx] == 1);
2804 out.comp = alu->src[alu_src_idx].swizzle[0];
2805 }
2806 assert(out.comp < out.def->num_components);
2807
2808 return out;
2809 }
2810
2811 nir_ssa_scalar nir_ssa_scalar_chase_movs(nir_ssa_scalar s);
2812
2813 /** Returns a nir_ssa_scalar where we've followed the bit-exact mov/vec use chain to the original definition */
2814 static inline nir_ssa_scalar
nir_ssa_scalar_resolved(nir_ssa_def * def,unsigned channel)2815 nir_ssa_scalar_resolved(nir_ssa_def *def, unsigned channel)
2816 {
2817 nir_ssa_scalar s = { def, channel };
2818 return nir_ssa_scalar_chase_movs(s);
2819 }
2820
2821
2822 typedef struct {
2823 bool success;
2824
2825 nir_variable *var;
2826 unsigned desc_set;
2827 unsigned binding;
2828 unsigned num_indices;
2829 nir_src indices[4];
2830 bool read_first_invocation;
2831 } nir_binding;
2832
2833 nir_binding nir_chase_binding(nir_src rsrc);
2834 nir_variable *nir_get_binding_variable(struct nir_shader *shader, nir_binding binding);
2835
2836
2837 /*
2838 * Control flow
2839 *
2840 * Control flow consists of a tree of control flow nodes, which include
2841 * if-statements and loops. The leaves of the tree are basic blocks, lists of
2842 * instructions that always run start-to-finish. Each basic block also keeps
2843 * track of its successors (blocks which may run immediately after the current
2844 * block) and predecessors (blocks which could have run immediately before the
2845 * current block). Each function also has a start block and an end block which
2846 * all return statements point to (which is always empty). Together, all the
2847 * blocks with their predecessors and successors make up the control flow
2848 * graph (CFG) of the function. There are helpers that modify the tree of
2849 * control flow nodes while modifying the CFG appropriately; these should be
2850 * used instead of modifying the tree directly.
2851 */
2852
2853 typedef enum {
2854 nir_cf_node_block,
2855 nir_cf_node_if,
2856 nir_cf_node_loop,
2857 nir_cf_node_function
2858 } nir_cf_node_type;
2859
2860 typedef struct nir_cf_node {
2861 struct exec_node node;
2862 nir_cf_node_type type;
2863 struct nir_cf_node *parent;
2864 } nir_cf_node;
2865
2866 typedef struct nir_block {
2867 nir_cf_node cf_node;
2868
2869 struct exec_list instr_list; /** < list of nir_instr */
2870
2871 /** generic block index; generated by nir_index_blocks */
2872 unsigned index;
2873
2874 /*
2875 * Each block can only have up to 2 successors, so we put them in a simple
2876 * array - no need for anything more complicated.
2877 */
2878 struct nir_block *successors[2];
2879
2880 /* Set of nir_block predecessors in the CFG */
2881 struct set *predecessors;
2882
2883 /*
2884 * this node's immediate dominator in the dominance tree - set to NULL for
2885 * the start block.
2886 */
2887 struct nir_block *imm_dom;
2888
2889 /* This node's children in the dominance tree */
2890 unsigned num_dom_children;
2891 struct nir_block **dom_children;
2892
2893 /* Set of nir_blocks on the dominance frontier of this block */
2894 struct set *dom_frontier;
2895
2896 /*
2897 * These two indices have the property that dom_{pre,post}_index for each
2898 * child of this block in the dominance tree will always be between
2899 * dom_pre_index and dom_post_index for this block, which makes testing if
2900 * a given block is dominated by another block an O(1) operation.
2901 */
2902 uint32_t dom_pre_index, dom_post_index;
2903
2904 /**
2905 * Value just before the first nir_instr->index in the block, but after
2906 * end_ip that of any predecessor block.
2907 */
2908 uint32_t start_ip;
2909 /**
2910 * Value just after the last nir_instr->index in the block, but before the
2911 * start_ip of any successor block.
2912 */
2913 uint32_t end_ip;
2914
2915 /* SSA def live in and out for this block; used for liveness analysis.
2916 * Indexed by ssa_def->index
2917 */
2918 BITSET_WORD *live_in;
2919 BITSET_WORD *live_out;
2920 } nir_block;
2921
2922 static inline bool
nir_block_is_reachable(nir_block * b)2923 nir_block_is_reachable(nir_block *b)
2924 {
2925 /* See also nir_block_dominates */
2926 return b->dom_post_index != 0;
2927 }
2928
2929 static inline nir_instr *
nir_block_first_instr(nir_block * block)2930 nir_block_first_instr(nir_block *block)
2931 {
2932 struct exec_node *head = exec_list_get_head(&block->instr_list);
2933 return exec_node_data(nir_instr, head, node);
2934 }
2935
2936 static inline nir_instr *
nir_block_last_instr(nir_block * block)2937 nir_block_last_instr(nir_block *block)
2938 {
2939 struct exec_node *tail = exec_list_get_tail(&block->instr_list);
2940 return exec_node_data(nir_instr, tail, node);
2941 }
2942
2943 static inline bool
nir_block_ends_in_jump(nir_block * block)2944 nir_block_ends_in_jump(nir_block *block)
2945 {
2946 return !exec_list_is_empty(&block->instr_list) &&
2947 nir_block_last_instr(block)->type == nir_instr_type_jump;
2948 }
2949
2950 static inline bool
nir_block_ends_in_return_or_halt(nir_block * block)2951 nir_block_ends_in_return_or_halt(nir_block *block)
2952 {
2953 if (exec_list_is_empty(&block->instr_list))
2954 return false;
2955
2956 nir_instr *instr = nir_block_last_instr(block);
2957 if (instr->type != nir_instr_type_jump)
2958 return false;
2959
2960 nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
2961 return jump_instr->type == nir_jump_return ||
2962 jump_instr->type == nir_jump_halt;
2963 }
2964
2965 static inline bool
nir_block_ends_in_break(nir_block * block)2966 nir_block_ends_in_break(nir_block *block)
2967 {
2968 if (exec_list_is_empty(&block->instr_list))
2969 return false;
2970
2971 nir_instr *instr = nir_block_last_instr(block);
2972 return instr->type == nir_instr_type_jump &&
2973 nir_instr_as_jump(instr)->type == nir_jump_break;
2974 }
2975
2976 #define nir_foreach_instr(instr, block) \
2977 foreach_list_typed(nir_instr, instr, node, &(block)->instr_list)
2978 #define nir_foreach_instr_reverse(instr, block) \
2979 foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list)
2980 #define nir_foreach_instr_safe(instr, block) \
2981 foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list)
2982 #define nir_foreach_instr_reverse_safe(instr, block) \
2983 foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list)
2984
2985 static inline nir_phi_instr *
nir_block_last_phi_instr(nir_block * block)2986 nir_block_last_phi_instr(nir_block *block)
2987 {
2988 nir_phi_instr *last_phi = NULL;
2989 nir_foreach_instr(instr, block) {
2990 if (instr->type == nir_instr_type_phi)
2991 last_phi = nir_instr_as_phi(instr);
2992 else
2993 return last_phi;
2994 }
2995 return last_phi;
2996 }
2997
2998 typedef enum {
2999 nir_selection_control_none = 0x0,
3000 nir_selection_control_flatten = 0x1,
3001 nir_selection_control_dont_flatten = 0x2,
3002 } nir_selection_control;
3003
3004 typedef struct nir_if {
3005 nir_cf_node cf_node;
3006 nir_src condition;
3007 nir_selection_control control;
3008
3009 struct exec_list then_list; /** < list of nir_cf_node */
3010 struct exec_list else_list; /** < list of nir_cf_node */
3011 } nir_if;
3012
3013 typedef struct {
3014 nir_if *nif;
3015
3016 /** Instruction that generates nif::condition. */
3017 nir_instr *conditional_instr;
3018
3019 /** Block within ::nif that has the break instruction. */
3020 nir_block *break_block;
3021
3022 /** Last block for the then- or else-path that does not contain the break. */
3023 nir_block *continue_from_block;
3024
3025 /** True when ::break_block is in the else-path of ::nif. */
3026 bool continue_from_then;
3027 bool induction_rhs;
3028
3029 /* This is true if the terminators exact trip count is unknown. For
3030 * example:
3031 *
3032 * for (int i = 0; i < imin(x, 4); i++)
3033 * ...
3034 *
3035 * Here loop analysis would have set a max_trip_count of 4 however we dont
3036 * know for sure that this is the exact trip count.
3037 */
3038 bool exact_trip_count_unknown;
3039
3040 struct list_head loop_terminator_link;
3041 } nir_loop_terminator;
3042
3043 typedef struct {
3044 /* Induction variable. */
3045 nir_ssa_def *def;
3046
3047 /* Init statement with only uniform. */
3048 nir_src *init_src;
3049
3050 /* Update statement with only uniform. */
3051 nir_alu_src *update_src;
3052 } nir_loop_induction_variable;
3053
3054 typedef struct {
3055 /* Estimated cost (in number of instructions) of the loop */
3056 unsigned instr_cost;
3057
3058 /* Guessed trip count based on array indexing */
3059 unsigned guessed_trip_count;
3060
3061 /* Maximum number of times the loop is run (if known) */
3062 unsigned max_trip_count;
3063
3064 /* Do we know the exact number of times the loop will be run */
3065 bool exact_trip_count_known;
3066
3067 /* Unroll the loop regardless of its size */
3068 bool force_unroll;
3069
3070 /* Does the loop contain complex loop terminators, continues or other
3071 * complex behaviours? If this is true we can't rely on
3072 * loop_terminator_list to be complete or accurate.
3073 */
3074 bool complex_loop;
3075
3076 nir_loop_terminator *limiting_terminator;
3077
3078 /* A list of loop_terminators terminating this loop. */
3079 struct list_head loop_terminator_list;
3080
3081 /* array of induction variables for this loop */
3082 nir_loop_induction_variable *induction_vars;
3083 unsigned num_induction_vars;
3084 } nir_loop_info;
3085
3086 typedef enum {
3087 nir_loop_control_none = 0x0,
3088 nir_loop_control_unroll = 0x1,
3089 nir_loop_control_dont_unroll = 0x2,
3090 } nir_loop_control;
3091
3092 typedef struct {
3093 nir_cf_node cf_node;
3094
3095 struct exec_list body; /** < list of nir_cf_node */
3096
3097 nir_loop_info *info;
3098 nir_loop_control control;
3099 bool partially_unrolled;
3100 bool divergent;
3101 } nir_loop;
3102
3103 /**
3104 * Various bits of metadata that can may be created or required by
3105 * optimization and analysis passes
3106 */
3107 typedef enum {
3108 nir_metadata_none = 0x0,
3109
3110 /** Indicates that nir_block::index values are valid.
3111 *
3112 * The start block has index 0 and they increase through a natural walk of
3113 * the CFG. nir_function_impl::num_blocks is the number of blocks and
3114 * every block index is in the range [0, nir_function_impl::num_blocks].
3115 *
3116 * A pass can preserve this metadata type if it doesn't touch the CFG.
3117 */
3118 nir_metadata_block_index = 0x1,
3119
3120 /** Indicates that block dominance information is valid
3121 *
3122 * This includes:
3123 *
3124 * - nir_block::num_dom_children
3125 * - nir_block::dom_children
3126 * - nir_block::dom_frontier
3127 * - nir_block::dom_pre_index
3128 * - nir_block::dom_post_index
3129 *
3130 * A pass can preserve this metadata type if it doesn't touch the CFG.
3131 */
3132 nir_metadata_dominance = 0x2,
3133
3134 /** Indicates that SSA def data-flow liveness information is valid
3135 *
3136 * This includes:
3137 *
3138 * - nir_block::live_in
3139 * - nir_block::live_out
3140 *
3141 * A pass can preserve this metadata type if it never adds or removes any
3142 * SSA defs or uses of SSA defs (most passes shouldn't preserve this
3143 * metadata type).
3144 */
3145 nir_metadata_live_ssa_defs = 0x4,
3146
3147 /** A dummy metadata value to track when a pass forgot to call
3148 * nir_metadata_preserve.
3149 *
3150 * A pass should always clear this value even if it doesn't make any
3151 * progress to indicate that it thought about preserving metadata.
3152 */
3153 nir_metadata_not_properly_reset = 0x8,
3154
3155 /** Indicates that loop analysis information is valid.
3156 *
3157 * This includes everything pointed to by nir_loop::info.
3158 *
3159 * A pass can preserve this metadata type if it is guaranteed to not affect
3160 * any loop metadata. However, since loop metadata includes things like
3161 * loop counts which depend on arithmetic in the loop, this is very hard to
3162 * determine. Most passes shouldn't preserve this metadata type.
3163 */
3164 nir_metadata_loop_analysis = 0x10,
3165
3166 /** Indicates that nir_instr::index values are valid.
3167 *
3168 * The start instruction has index 0 and they increase through a natural
3169 * walk of instructions in blocks in the CFG. The indices my have holes
3170 * after passes such as DCE.
3171 *
3172 * A pass can preserve this metadata type if it never adds or moves any
3173 * instructions (most passes shouldn't preserve this metadata type), but
3174 * can preserve it if it only removes instructions.
3175 */
3176 nir_metadata_instr_index = 0x20,
3177
3178 /** All metadata
3179 *
3180 * This includes all nir_metadata flags except not_properly_reset. Passes
3181 * which do not change the shader in any way should call
3182 *
3183 * nir_metadata_preserve(impl, nir_metadata_all);
3184 */
3185 nir_metadata_all = ~nir_metadata_not_properly_reset,
3186 } nir_metadata;
3187 MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_metadata)
3188
3189 typedef struct {
3190 nir_cf_node cf_node;
3191
3192 /** pointer to the function of which this is an implementation */
3193 struct nir_function *function;
3194
3195 struct exec_list body; /** < list of nir_cf_node */
3196
3197 nir_block *end_block;
3198
3199 /** list for all local variables in the function */
3200 struct exec_list locals;
3201
3202 /** list of local registers in the function */
3203 struct exec_list registers;
3204
3205 /** next available local register index */
3206 unsigned reg_alloc;
3207
3208 /** next available SSA value index */
3209 unsigned ssa_alloc;
3210
3211 /* total number of basic blocks, only valid when block_index_dirty = false */
3212 unsigned num_blocks;
3213
3214 /** True if this nir_function_impl uses structured control-flow
3215 *
3216 * Structured nir_function_impls have different validation rules.
3217 */
3218 bool structured;
3219
3220 nir_metadata valid_metadata;
3221 } nir_function_impl;
3222
3223 #define nir_foreach_function_temp_variable(var, impl) \
3224 foreach_list_typed(nir_variable, var, node, &(impl)->locals)
3225
3226 #define nir_foreach_function_temp_variable_safe(var, impl) \
3227 foreach_list_typed_safe(nir_variable, var, node, &(impl)->locals)
3228
3229 ATTRIBUTE_RETURNS_NONNULL static inline nir_block *
nir_start_block(nir_function_impl * impl)3230 nir_start_block(nir_function_impl *impl)
3231 {
3232 return (nir_block *) impl->body.head_sentinel.next;
3233 }
3234
3235 ATTRIBUTE_RETURNS_NONNULL static inline nir_block *
nir_impl_last_block(nir_function_impl * impl)3236 nir_impl_last_block(nir_function_impl *impl)
3237 {
3238 return (nir_block *) impl->body.tail_sentinel.prev;
3239 }
3240
3241 static inline nir_cf_node *
nir_cf_node_next(nir_cf_node * node)3242 nir_cf_node_next(nir_cf_node *node)
3243 {
3244 struct exec_node *next = exec_node_get_next(&node->node);
3245 if (exec_node_is_tail_sentinel(next))
3246 return NULL;
3247 else
3248 return exec_node_data(nir_cf_node, next, node);
3249 }
3250
3251 static inline nir_cf_node *
nir_cf_node_prev(nir_cf_node * node)3252 nir_cf_node_prev(nir_cf_node *node)
3253 {
3254 struct exec_node *prev = exec_node_get_prev(&node->node);
3255 if (exec_node_is_head_sentinel(prev))
3256 return NULL;
3257 else
3258 return exec_node_data(nir_cf_node, prev, node);
3259 }
3260
3261 static inline bool
nir_cf_node_is_first(const nir_cf_node * node)3262 nir_cf_node_is_first(const nir_cf_node *node)
3263 {
3264 return exec_node_is_head_sentinel(node->node.prev);
3265 }
3266
3267 static inline bool
nir_cf_node_is_last(const nir_cf_node * node)3268 nir_cf_node_is_last(const nir_cf_node *node)
3269 {
3270 return exec_node_is_tail_sentinel(node->node.next);
3271 }
3272
NIR_DEFINE_CAST(nir_cf_node_as_block,nir_cf_node,nir_block,cf_node,type,nir_cf_node_block)3273 NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node,
3274 type, nir_cf_node_block)
3275 NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node,
3276 type, nir_cf_node_if)
3277 NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node,
3278 type, nir_cf_node_loop)
3279 NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node,
3280 nir_function_impl, cf_node, type, nir_cf_node_function)
3281
3282 static inline nir_block *
3283 nir_if_first_then_block(nir_if *if_stmt)
3284 {
3285 struct exec_node *head = exec_list_get_head(&if_stmt->then_list);
3286 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
3287 }
3288
3289 static inline nir_block *
nir_if_last_then_block(nir_if * if_stmt)3290 nir_if_last_then_block(nir_if *if_stmt)
3291 {
3292 struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list);
3293 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
3294 }
3295
3296 static inline nir_block *
nir_if_first_else_block(nir_if * if_stmt)3297 nir_if_first_else_block(nir_if *if_stmt)
3298 {
3299 struct exec_node *head = exec_list_get_head(&if_stmt->else_list);
3300 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
3301 }
3302
3303 static inline nir_block *
nir_if_last_else_block(nir_if * if_stmt)3304 nir_if_last_else_block(nir_if *if_stmt)
3305 {
3306 struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list);
3307 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
3308 }
3309
3310 static inline nir_block *
nir_loop_first_block(nir_loop * loop)3311 nir_loop_first_block(nir_loop *loop)
3312 {
3313 struct exec_node *head = exec_list_get_head(&loop->body);
3314 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
3315 }
3316
3317 static inline nir_block *
nir_loop_last_block(nir_loop * loop)3318 nir_loop_last_block(nir_loop *loop)
3319 {
3320 struct exec_node *tail = exec_list_get_tail(&loop->body);
3321 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
3322 }
3323
3324 /**
3325 * Return true if this list of cf_nodes contains a single empty block.
3326 */
3327 static inline bool
nir_cf_list_is_empty_block(struct exec_list * cf_list)3328 nir_cf_list_is_empty_block(struct exec_list *cf_list)
3329 {
3330 if (exec_list_is_singular(cf_list)) {
3331 struct exec_node *head = exec_list_get_head(cf_list);
3332 nir_block *block =
3333 nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
3334 return exec_list_is_empty(&block->instr_list);
3335 }
3336 return false;
3337 }
3338
3339 typedef struct {
3340 uint8_t num_components;
3341 uint8_t bit_size;
3342 } nir_parameter;
3343
3344 typedef struct nir_printf_info {
3345 unsigned num_args;
3346 unsigned *arg_sizes;
3347 unsigned string_size;
3348 char *strings;
3349 } nir_printf_info;
3350
3351 typedef struct nir_function {
3352 struct exec_node node;
3353
3354 const char *name;
3355 struct nir_shader *shader;
3356
3357 unsigned num_params;
3358 nir_parameter *params;
3359
3360 /** The implementation of this function.
3361 *
3362 * If the function is only declared and not implemented, this is NULL.
3363 */
3364 nir_function_impl *impl;
3365
3366 bool is_entrypoint;
3367 } nir_function;
3368
3369 typedef enum {
3370 nir_lower_imul64 = (1 << 0),
3371 nir_lower_isign64 = (1 << 1),
3372 /** Lower all int64 modulus and division opcodes */
3373 nir_lower_divmod64 = (1 << 2),
3374 /** Lower all 64-bit umul_high and imul_high opcodes */
3375 nir_lower_imul_high64 = (1 << 3),
3376 nir_lower_mov64 = (1 << 4),
3377 nir_lower_icmp64 = (1 << 5),
3378 nir_lower_iadd64 = (1 << 6),
3379 nir_lower_iabs64 = (1 << 7),
3380 nir_lower_ineg64 = (1 << 8),
3381 nir_lower_logic64 = (1 << 9),
3382 nir_lower_minmax64 = (1 << 10),
3383 nir_lower_shift64 = (1 << 11),
3384 nir_lower_imul_2x32_64 = (1 << 12),
3385 nir_lower_extract64 = (1 << 13),
3386 nir_lower_ufind_msb64 = (1 << 14),
3387 nir_lower_bit_count64 = (1 << 15),
3388 nir_lower_subgroup_shuffle64 = (1 << 16),
3389 nir_lower_scan_reduce_bitwise64 = (1 << 17),
3390 nir_lower_scan_reduce_iadd64 = (1 << 18),
3391 nir_lower_vote_ieq64 = (1 << 19),
3392 } nir_lower_int64_options;
3393
3394 typedef enum {
3395 nir_lower_drcp = (1 << 0),
3396 nir_lower_dsqrt = (1 << 1),
3397 nir_lower_drsq = (1 << 2),
3398 nir_lower_dtrunc = (1 << 3),
3399 nir_lower_dfloor = (1 << 4),
3400 nir_lower_dceil = (1 << 5),
3401 nir_lower_dfract = (1 << 6),
3402 nir_lower_dround_even = (1 << 7),
3403 nir_lower_dmod = (1 << 8),
3404 nir_lower_dsub = (1 << 9),
3405 nir_lower_ddiv = (1 << 10),
3406 nir_lower_fp64_full_software = (1 << 11),
3407 } nir_lower_doubles_options;
3408
3409 typedef enum {
3410 nir_divergence_single_prim_per_subgroup = (1 << 0),
3411 nir_divergence_single_patch_per_tcs_subgroup = (1 << 1),
3412 nir_divergence_single_patch_per_tes_subgroup = (1 << 2),
3413 nir_divergence_view_index_uniform = (1 << 3),
3414 nir_divergence_single_frag_shading_rate_per_subgroup = (1 << 4),
3415 nir_divergence_multiple_workgroup_per_compute_subgroup = (1 << 5),
3416 } nir_divergence_options;
3417
3418 typedef enum {
3419 nir_pack_varying_interp_mode_none = (1 << 0),
3420 nir_pack_varying_interp_mode_smooth = (1 << 1),
3421 nir_pack_varying_interp_mode_flat = (1 << 2),
3422 nir_pack_varying_interp_mode_noperspective = (1 << 3),
3423 nir_pack_varying_interp_loc_sample = (1 << 16),
3424 nir_pack_varying_interp_loc_centroid = (1 << 17),
3425 nir_pack_varying_interp_loc_center = (1 << 18),
3426 } nir_pack_varying_options;
3427
3428 /** An instruction filtering callback
3429 *
3430 * Returns true if the instruction should be processed and false otherwise.
3431 */
3432 typedef bool (*nir_instr_filter_cb)(const nir_instr *, const void *);
3433
3434 typedef struct nir_shader_compiler_options {
3435 bool lower_fdiv;
3436 bool lower_ffma16;
3437 bool lower_ffma32;
3438 bool lower_ffma64;
3439 bool fuse_ffma16;
3440 bool fuse_ffma32;
3441 bool fuse_ffma64;
3442 bool lower_flrp16;
3443 bool lower_flrp32;
3444 /** Lowers flrp when it does not support doubles */
3445 bool lower_flrp64;
3446 bool lower_fpow;
3447 bool lower_fsat;
3448 bool lower_fsqrt;
3449 bool lower_sincos;
3450 bool lower_fmod;
3451 /** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */
3452 bool lower_bitfield_extract;
3453 /** Lowers ibitfield_extract/ubitfield_extract to compares, shifts. */
3454 bool lower_bitfield_extract_to_shifts;
3455 /** Lowers bitfield_insert to bfi/bfm */
3456 bool lower_bitfield_insert;
3457 /** Lowers bitfield_insert to compares, and shifts. */
3458 bool lower_bitfield_insert_to_shifts;
3459 /** Lowers bitfield_insert to bfm/bitfield_select. */
3460 bool lower_bitfield_insert_to_bitfield_select;
3461 /** Lowers bitfield_reverse to shifts. */
3462 bool lower_bitfield_reverse;
3463 /** Lowers bit_count to shifts. */
3464 bool lower_bit_count;
3465 /** Lowers ifind_msb to compare and ufind_msb */
3466 bool lower_ifind_msb;
3467 /** Lowers ifind_msb and ufind_msb to reverse variants */
3468 bool lower_find_msb_to_reverse;
3469 /** Lowers find_lsb to ufind_msb and logic ops */
3470 bool lower_find_lsb;
3471 bool lower_uadd_carry;
3472 bool lower_usub_borrow;
3473 /** Lowers imul_high/umul_high to 16-bit multiplies and carry operations. */
3474 bool lower_mul_high;
3475 /** lowers fneg to fmul(x, -1.0). Driver must call nir_opt_algebraic_late() */
3476 bool lower_fneg;
3477 /** lowers ineg to isub. Driver must call nir_opt_algebraic_late(). */
3478 bool lower_ineg;
3479 /** lowers fisnormal to alu ops. */
3480 bool lower_fisnormal;
3481
3482 /* lower {slt,sge,seq,sne} to {flt,fge,feq,fneu} + b2f: */
3483 bool lower_scmp;
3484
3485 /* lower b/fall_equalN/b/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */
3486 bool lower_vector_cmp;
3487
3488 /** enable rules to avoid bit ops */
3489 bool lower_bitops;
3490
3491 /** enables rules to lower isign to imin+imax */
3492 bool lower_isign;
3493
3494 /** enables rules to lower fsign to fsub and flt */
3495 bool lower_fsign;
3496
3497 /** enables rules to lower iabs to ineg+imax */
3498 bool lower_iabs;
3499
3500 /** enable rules that avoid generating umax from signed integer ops */
3501 bool lower_umax;
3502
3503 /** enable rules that avoid generating umin from signed integer ops */
3504 bool lower_umin;
3505
3506 /* lower fdph to fdot4 */
3507 bool lower_fdph;
3508
3509 /** lower fdot to fmul and fsum/fadd. */
3510 bool lower_fdot;
3511
3512 /* Does the native fdot instruction replicate its result for four
3513 * components? If so, then opt_algebraic_late will turn all fdotN
3514 * instructions into fdotN_replicated instructions.
3515 */
3516 bool fdot_replicates;
3517
3518 /** lowers ffloor to fsub+ffract: */
3519 bool lower_ffloor;
3520
3521 /** lowers ffract to fsub+ffloor: */
3522 bool lower_ffract;
3523
3524 /** lowers fceil to fneg+ffloor+fneg: */
3525 bool lower_fceil;
3526
3527 bool lower_ftrunc;
3528
3529 bool lower_ldexp;
3530
3531 bool lower_pack_half_2x16;
3532 bool lower_pack_unorm_2x16;
3533 bool lower_pack_snorm_2x16;
3534 bool lower_pack_unorm_4x8;
3535 bool lower_pack_snorm_4x8;
3536 bool lower_pack_64_2x32;
3537 bool lower_pack_64_4x16;
3538 bool lower_pack_32_2x16;
3539 bool lower_pack_64_2x32_split;
3540 bool lower_pack_32_2x16_split;
3541 bool lower_unpack_half_2x16;
3542 bool lower_unpack_unorm_2x16;
3543 bool lower_unpack_snorm_2x16;
3544 bool lower_unpack_unorm_4x8;
3545 bool lower_unpack_snorm_4x8;
3546 bool lower_unpack_64_2x32_split;
3547 bool lower_unpack_32_2x16_split;
3548
3549 bool lower_pack_split;
3550
3551 bool lower_extract_byte;
3552 bool lower_extract_word;
3553 bool lower_insert_byte;
3554 bool lower_insert_word;
3555
3556 bool lower_all_io_to_temps;
3557 bool lower_all_io_to_elements;
3558
3559 /* Indicates that the driver only has zero-based vertex id */
3560 bool vertex_id_zero_based;
3561
3562 /**
3563 * If enabled, gl_BaseVertex will be lowered as:
3564 * is_indexed_draw (~0/0) & firstvertex
3565 */
3566 bool lower_base_vertex;
3567
3568 /**
3569 * If enabled, gl_HelperInvocation will be lowered as:
3570 *
3571 * !((1 << sample_id) & sample_mask_in))
3572 *
3573 * This depends on some possibly hw implementation details, which may
3574 * not be true for all hw. In particular that the FS is only executed
3575 * for covered samples or for helper invocations. So, do not blindly
3576 * enable this option.
3577 *
3578 * Note: See also issue #22 in ARB_shader_image_load_store
3579 */
3580 bool lower_helper_invocation;
3581
3582 /**
3583 * Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
3584 *
3585 * gl_SampleMaskIn == 0 ---> gl_HelperInvocation
3586 * gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
3587 */
3588 bool optimize_sample_mask_in;
3589
3590 bool lower_cs_local_index_from_id;
3591 bool lower_cs_local_id_from_index;
3592
3593 /* Prevents lowering global_invocation_id to be in terms of workgroup_id */
3594 bool has_cs_global_id;
3595
3596 bool lower_device_index_to_zero;
3597
3598 /* Set if nir_lower_pntc_ytransform() should invert gl_PointCoord.
3599 * Either when frame buffer is flipped or GL_POINT_SPRITE_COORD_ORIGIN
3600 * is GL_LOWER_LEFT.
3601 */
3602 bool lower_wpos_pntc;
3603
3604 /**
3605 * Set if nir_op_[iu]hadd and nir_op_[iu]rhadd instructions should be
3606 * lowered to simple arithmetic.
3607 *
3608 * If this flag is set, the lowering will be applied to all bit-sizes of
3609 * these instructions.
3610 *
3611 * \sa ::lower_hadd64
3612 */
3613 bool lower_hadd;
3614
3615 /**
3616 * Set if only 64-bit nir_op_[iu]hadd and nir_op_[iu]rhadd instructions
3617 * should be lowered to simple arithmetic.
3618 *
3619 * If this flag is set, the lowering will be applied to only 64-bit
3620 * versions of these instructions.
3621 *
3622 * \sa ::lower_hadd
3623 */
3624 bool lower_hadd64;
3625
3626 /**
3627 * Set if nir_op_uadd_sat and nir_op_usub_sat should be lowered to simple
3628 * arithmetic.
3629 *
3630 * If this flag is set, the lowering will be applied to all bit-sizes of
3631 * these instructions.
3632 *
3633 * \sa ::lower_usub_sat64
3634 */
3635 bool lower_uadd_sat;
3636
3637 /**
3638 * Set if only 64-bit nir_op_usub_sat should be lowered to simple
3639 * arithmetic.
3640 *
3641 * \sa ::lower_add_sat
3642 */
3643 bool lower_usub_sat64;
3644
3645 /**
3646 * Set if nir_op_iadd_sat and nir_op_isub_sat should be lowered to simple
3647 * arithmetic.
3648 *
3649 * If this flag is set, the lowering will be applied to all bit-sizes of
3650 * these instructions.
3651 */
3652 bool lower_iadd_sat;
3653
3654 /**
3655 * Should IO be re-vectorized? Some scalar ISAs still operate on vec4's
3656 * for IO purposes and would prefer loads/stores be vectorized.
3657 */
3658 bool vectorize_io;
3659 bool lower_to_scalar;
3660 nir_instr_filter_cb lower_to_scalar_filter;
3661
3662 /**
3663 * Whether nir_opt_vectorize should only create 16-bit 2D vectors.
3664 */
3665 bool vectorize_vec2_16bit;
3666
3667 /**
3668 * Should the linker unify inputs_read/outputs_written between adjacent
3669 * shader stages which are linked into a single program?
3670 */
3671 bool unify_interfaces;
3672
3673 /**
3674 * Should nir_lower_io() create load_interpolated_input intrinsics?
3675 *
3676 * If not, it generates regular load_input intrinsics and interpolation
3677 * information must be inferred from the list of input nir_variables.
3678 */
3679 bool use_interpolated_input_intrinsics;
3680
3681
3682 /**
3683 * Whether nir_lower_io() will lower interpolateAt functions to
3684 * load_interpolated_input intrinsics.
3685 *
3686 * Unlike use_interpolated_input_intrinsics this will only lower these
3687 * functions and leave input load intrinsics untouched.
3688 */
3689 bool lower_interpolate_at;
3690
3691 /* Lowers when 32x32->64 bit multiplication is not supported */
3692 bool lower_mul_2x32_64;
3693
3694 /* Lowers when rotate instruction is not supported */
3695 bool lower_rotate;
3696
3697 /** Backend supports ternary addition */
3698 bool has_iadd3;
3699
3700 /**
3701 * Backend supports imul24, and would like to use it (when possible)
3702 * for address/offset calculation. If true, driver should call
3703 * nir_lower_amul(). (If not set, amul will automatically be lowered
3704 * to imul.)
3705 */
3706 bool has_imul24;
3707
3708 /** Backend supports umul24, if not set umul24 will automatically be lowered
3709 * to imul with masked inputs */
3710 bool has_umul24;
3711
3712 /** Backend supports umad24, if not set umad24 will automatically be lowered
3713 * to imul with masked inputs and iadd */
3714 bool has_umad24;
3715
3716 /* Backend supports fused comapre against zero and csel */
3717 bool has_fused_comp_and_csel;
3718
3719 /** Backend supports fsub, if not set fsub will automatically be lowered to
3720 * fadd(x, fneg(y)). If true, driver should call nir_opt_algebraic_late(). */
3721 bool has_fsub;
3722
3723 /** Backend supports isub, if not set isub will automatically be lowered to
3724 * iadd(x, ineg(y)). If true, driver should call nir_opt_algebraic_late(). */
3725 bool has_isub;
3726
3727 /** Backend supports pack_32_4x8 or pack_32_4x8_split. */
3728 bool has_pack_32_4x8;
3729
3730 /** Backend supports txs, if not nir_lower_tex(..) uses txs-free variants
3731 * for rect texture lowering. */
3732 bool has_txs;
3733
3734 /** Backend supports sdot_4x8 and udot_4x8 opcodes. */
3735 bool has_dot_4x8;
3736
3737 /** Backend supports sudot_4x8 opcodes. */
3738 bool has_sudot_4x8;
3739
3740 /** Backend supports sdot_2x16 and udot_2x16 opcodes. */
3741 bool has_dot_2x16;
3742
3743 /* Whether to generate only scoped_barrier intrinsics instead of the set of
3744 * memory and control barrier intrinsics based on GLSL.
3745 */
3746 bool use_scoped_barrier;
3747
3748 /**
3749 * Is this the Intel vec4 backend?
3750 *
3751 * Used to inhibit algebraic optimizations that are known to be harmful on
3752 * the Intel vec4 backend. This is generally applicable to any
3753 * optimization that might cause more immediate values to be used in
3754 * 3-source (e.g., ffma and flrp) instructions.
3755 */
3756 bool intel_vec4;
3757
3758 /**
3759 * For most Intel GPUs, all ternary operations such as FMA and BFE cannot
3760 * have immediates, so two to three instructions may eventually be needed.
3761 */
3762 bool avoid_ternary_with_two_constants;
3763
3764 /** Whether 8-bit ALU is supported. */
3765 bool support_8bit_alu;
3766
3767 /** Whether 16-bit ALU is supported. */
3768 bool support_16bit_alu;
3769
3770 unsigned max_unroll_iterations;
3771 unsigned max_unroll_iterations_aggressive;
3772
3773 bool lower_uniforms_to_ubo;
3774
3775 /* If the precision is ignored, backends that don't handle
3776 * different precisions when passing data between stages and use
3777 * vectorized IO can pack more varyings when linking. */
3778 bool linker_ignore_precision;
3779
3780 /**
3781 * Specifies which type of indirectly accessed variables should force
3782 * loop unrolling.
3783 */
3784 nir_variable_mode force_indirect_unrolling;
3785
3786 nir_lower_int64_options lower_int64_options;
3787 nir_lower_doubles_options lower_doubles_options;
3788 nir_divergence_options divergence_analysis_options;
3789
3790 /**
3791 * Support pack varyings with different interpolation location
3792 * (center, centroid, sample) and mode (flat, noperspective, smooth)
3793 * into same slot.
3794 */
3795 nir_pack_varying_options pack_varying_options;
3796 } nir_shader_compiler_options;
3797
3798 typedef struct nir_shader {
3799 /** list of uniforms (nir_variable) */
3800 struct exec_list variables;
3801
3802 /** Set of driver-specific options for the shader.
3803 *
3804 * The memory for the options is expected to be kept in a single static
3805 * copy by the driver.
3806 */
3807 const struct nir_shader_compiler_options *options;
3808
3809 /** Various bits of compile-time information about a given shader */
3810 struct shader_info info;
3811
3812 struct exec_list functions; /** < list of nir_function */
3813
3814 struct list_head gc_list; /** < list of all nir_instrs allocated on the shader but not yet freed. */
3815
3816 /**
3817 * The size of the variable space for load_input_*, load_uniform_*, etc.
3818 * intrinsics. This is in back-end specific units which is likely one of
3819 * bytes, dwords, or vec4s depending on context and back-end.
3820 */
3821 unsigned num_inputs, num_uniforms, num_outputs;
3822
3823 /** Size in bytes of required scratch space */
3824 unsigned scratch_size;
3825
3826 /** Constant data associated with this shader.
3827 *
3828 * Constant data is loaded through load_constant intrinsics (as compared to
3829 * the NIR load_const instructions which have the constant value inlined
3830 * into them). This is usually generated by nir_opt_large_constants (so
3831 * shaders don't have to load_const into a temporary array when they want
3832 * to indirect on a const array).
3833 */
3834 void *constant_data;
3835 /** Size of the constant data associated with the shader, in bytes */
3836 unsigned constant_data_size;
3837
3838 unsigned printf_info_count;
3839 nir_printf_info *printf_info;
3840 } nir_shader;
3841
3842 #define nir_foreach_function(func, shader) \
3843 foreach_list_typed(nir_function, func, node, &(shader)->functions)
3844
3845 static inline nir_function_impl *
nir_shader_get_entrypoint(nir_shader * shader)3846 nir_shader_get_entrypoint(nir_shader *shader)
3847 {
3848 nir_function *func = NULL;
3849
3850 nir_foreach_function(function, shader) {
3851 assert(func == NULL);
3852 if (function->is_entrypoint) {
3853 func = function;
3854 #ifndef NDEBUG
3855 break;
3856 #endif
3857 }
3858 }
3859
3860 if (!func)
3861 return NULL;
3862
3863 assert(func->num_params == 0);
3864 assert(func->impl);
3865 return func->impl;
3866 }
3867
3868 typedef struct nir_liveness_bounds {
3869 uint32_t start;
3870 uint32_t end;
3871 } nir_liveness_bounds;
3872
3873 typedef struct nir_instr_liveness {
3874 /**
3875 * nir_instr->index for the start and end of a single live interval for SSA
3876 * defs. ssa values last used by a nir_if condition will have an interval
3877 * ending at the first instruction after the last one before the if
3878 * condition.
3879 *
3880 * Indexed by def->index (impl->ssa_alloc elements).
3881 */
3882 struct nir_liveness_bounds *defs;
3883 } nir_instr_liveness;
3884
3885 nir_instr_liveness *
3886 nir_live_ssa_defs_per_instr(nir_function_impl *impl);
3887
3888 nir_shader *nir_shader_create(void *mem_ctx,
3889 gl_shader_stage stage,
3890 const nir_shader_compiler_options *options,
3891 shader_info *si);
3892
3893 nir_register *nir_local_reg_create(nir_function_impl *impl);
3894
3895 void nir_reg_remove(nir_register *reg);
3896
3897 /** Adds a variable to the appropriate list in nir_shader */
3898 void nir_shader_add_variable(nir_shader *shader, nir_variable *var);
3899
3900 static inline void
nir_function_impl_add_variable(nir_function_impl * impl,nir_variable * var)3901 nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var)
3902 {
3903 assert(var->data.mode == nir_var_function_temp);
3904 exec_list_push_tail(&impl->locals, &var->node);
3905 }
3906
3907 /** creates a variable, sets a few defaults, and adds it to the list */
3908 nir_variable *nir_variable_create(nir_shader *shader,
3909 nir_variable_mode mode,
3910 const struct glsl_type *type,
3911 const char *name);
3912 /** creates a local variable and adds it to the list */
3913 nir_variable *nir_local_variable_create(nir_function_impl *impl,
3914 const struct glsl_type *type,
3915 const char *name);
3916
3917 nir_variable *nir_find_variable_with_location(nir_shader *shader,
3918 nir_variable_mode mode,
3919 unsigned location);
3920
3921 nir_variable *nir_find_variable_with_driver_location(nir_shader *shader,
3922 nir_variable_mode mode,
3923 unsigned location);
3924
3925 void nir_sort_variables_with_modes(nir_shader *shader,
3926 int (*compar)(const nir_variable *,
3927 const nir_variable *),
3928 nir_variable_mode modes);
3929
3930 /** creates a function and adds it to the shader's list of functions */
3931 nir_function *nir_function_create(nir_shader *shader, const char *name);
3932
3933 nir_function_impl *nir_function_impl_create(nir_function *func);
3934 /** creates a function_impl that isn't tied to any particular function */
3935 nir_function_impl *nir_function_impl_create_bare(nir_shader *shader);
3936
3937 nir_block *nir_block_create(nir_shader *shader);
3938 nir_if *nir_if_create(nir_shader *shader);
3939 nir_loop *nir_loop_create(nir_shader *shader);
3940
3941 nir_function_impl *nir_cf_node_get_function(nir_cf_node *node);
3942
3943 /** requests that the given pieces of metadata be generated */
3944 void nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...);
3945 /** dirties all but the preserved metadata */
3946 void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved);
3947 /** Preserves all metadata for the given shader */
3948 void nir_shader_preserve_all_metadata(nir_shader *shader);
3949
3950 /** creates an instruction with default swizzle/writemask/etc. with NULL registers */
3951 nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op);
3952
3953 nir_deref_instr *nir_deref_instr_create(nir_shader *shader,
3954 nir_deref_type deref_type);
3955
3956 nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type);
3957
3958 nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader,
3959 unsigned num_components,
3960 unsigned bit_size);
3961
3962 nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader,
3963 nir_intrinsic_op op);
3964
3965 nir_call_instr *nir_call_instr_create(nir_shader *shader,
3966 nir_function *callee);
3967
3968 /** Creates a NIR texture instruction */
3969 nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs);
3970
3971 nir_phi_instr *nir_phi_instr_create(nir_shader *shader);
3972 nir_phi_src *nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src);
3973
3974 nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader);
3975
3976 nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader,
3977 unsigned num_components,
3978 unsigned bit_size);
3979
3980 nir_const_value nir_alu_binop_identity(nir_op binop, unsigned bit_size);
3981
3982 /**
3983 * NIR Cursors and Instruction Insertion API
3984 * @{
3985 *
3986 * A tiny struct representing a point to insert/extract instructions or
3987 * control flow nodes. Helps reduce the combinatorial explosion of possible
3988 * points to insert/extract.
3989 *
3990 * \sa nir_control_flow.h
3991 */
3992 typedef enum {
3993 nir_cursor_before_block,
3994 nir_cursor_after_block,
3995 nir_cursor_before_instr,
3996 nir_cursor_after_instr,
3997 } nir_cursor_option;
3998
3999 typedef struct {
4000 nir_cursor_option option;
4001 union {
4002 nir_block *block;
4003 nir_instr *instr;
4004 };
4005 } nir_cursor;
4006
4007 static inline nir_block *
nir_cursor_current_block(nir_cursor cursor)4008 nir_cursor_current_block(nir_cursor cursor)
4009 {
4010 if (cursor.option == nir_cursor_before_instr ||
4011 cursor.option == nir_cursor_after_instr) {
4012 return cursor.instr->block;
4013 } else {
4014 return cursor.block;
4015 }
4016 }
4017
4018 bool nir_cursors_equal(nir_cursor a, nir_cursor b);
4019
4020 static inline nir_cursor
nir_before_block(nir_block * block)4021 nir_before_block(nir_block *block)
4022 {
4023 nir_cursor cursor;
4024 cursor.option = nir_cursor_before_block;
4025 cursor.block = block;
4026 return cursor;
4027 }
4028
4029 static inline nir_cursor
nir_after_block(nir_block * block)4030 nir_after_block(nir_block *block)
4031 {
4032 nir_cursor cursor;
4033 cursor.option = nir_cursor_after_block;
4034 cursor.block = block;
4035 return cursor;
4036 }
4037
4038 static inline nir_cursor
nir_before_instr(nir_instr * instr)4039 nir_before_instr(nir_instr *instr)
4040 {
4041 nir_cursor cursor;
4042 cursor.option = nir_cursor_before_instr;
4043 cursor.instr = instr;
4044 return cursor;
4045 }
4046
4047 static inline nir_cursor
nir_after_instr(nir_instr * instr)4048 nir_after_instr(nir_instr *instr)
4049 {
4050 nir_cursor cursor;
4051 cursor.option = nir_cursor_after_instr;
4052 cursor.instr = instr;
4053 return cursor;
4054 }
4055
4056 static inline nir_cursor
nir_before_block_after_phis(nir_block * block)4057 nir_before_block_after_phis(nir_block *block)
4058 {
4059 nir_phi_instr *last_phi = nir_block_last_phi_instr(block);
4060 if (last_phi)
4061 return nir_after_instr(&last_phi->instr);
4062 else
4063 return nir_before_block(block);
4064 }
4065
4066 static inline nir_cursor
nir_after_block_before_jump(nir_block * block)4067 nir_after_block_before_jump(nir_block *block)
4068 {
4069 nir_instr *last_instr = nir_block_last_instr(block);
4070 if (last_instr && last_instr->type == nir_instr_type_jump) {
4071 return nir_before_instr(last_instr);
4072 } else {
4073 return nir_after_block(block);
4074 }
4075 }
4076
4077 static inline nir_cursor
nir_before_src(nir_src * src,bool is_if_condition)4078 nir_before_src(nir_src *src, bool is_if_condition)
4079 {
4080 if (is_if_condition) {
4081 nir_block *prev_block =
4082 nir_cf_node_as_block(nir_cf_node_prev(&src->parent_if->cf_node));
4083 assert(!nir_block_ends_in_jump(prev_block));
4084 return nir_after_block(prev_block);
4085 } else if (src->parent_instr->type == nir_instr_type_phi) {
4086 #ifndef NDEBUG
4087 nir_phi_instr *cond_phi = nir_instr_as_phi(src->parent_instr);
4088 bool found = false;
4089 nir_foreach_phi_src(phi_src, cond_phi) {
4090 if (phi_src->src.ssa == src->ssa) {
4091 found = true;
4092 break;
4093 }
4094 }
4095 assert(found);
4096 #endif
4097 /* The LIST_ENTRY macro is a generic container-of macro, it just happens
4098 * to have a more specific name.
4099 */
4100 nir_phi_src *phi_src = LIST_ENTRY(nir_phi_src, src, src);
4101 return nir_after_block_before_jump(phi_src->pred);
4102 } else {
4103 return nir_before_instr(src->parent_instr);
4104 }
4105 }
4106
4107 static inline nir_cursor
nir_before_cf_node(nir_cf_node * node)4108 nir_before_cf_node(nir_cf_node *node)
4109 {
4110 if (node->type == nir_cf_node_block)
4111 return nir_before_block(nir_cf_node_as_block(node));
4112
4113 return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node)));
4114 }
4115
4116 static inline nir_cursor
nir_after_cf_node(nir_cf_node * node)4117 nir_after_cf_node(nir_cf_node *node)
4118 {
4119 if (node->type == nir_cf_node_block)
4120 return nir_after_block(nir_cf_node_as_block(node));
4121
4122 return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node)));
4123 }
4124
4125 static inline nir_cursor
nir_after_phis(nir_block * block)4126 nir_after_phis(nir_block *block)
4127 {
4128 nir_foreach_instr(instr, block) {
4129 if (instr->type != nir_instr_type_phi)
4130 return nir_before_instr(instr);
4131 }
4132 return nir_after_block(block);
4133 }
4134
4135 static inline nir_cursor
nir_after_instr_and_phis(nir_instr * instr)4136 nir_after_instr_and_phis(nir_instr *instr)
4137 {
4138 if (instr->type == nir_instr_type_phi)
4139 return nir_after_phis(instr->block);
4140 else
4141 return nir_after_instr(instr);
4142 }
4143
4144 static inline nir_cursor
nir_after_cf_node_and_phis(nir_cf_node * node)4145 nir_after_cf_node_and_phis(nir_cf_node *node)
4146 {
4147 if (node->type == nir_cf_node_block)
4148 return nir_after_block(nir_cf_node_as_block(node));
4149
4150 nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node));
4151
4152 return nir_after_phis(block);
4153 }
4154
4155 static inline nir_cursor
nir_before_cf_list(struct exec_list * cf_list)4156 nir_before_cf_list(struct exec_list *cf_list)
4157 {
4158 nir_cf_node *first_node = exec_node_data(nir_cf_node,
4159 exec_list_get_head(cf_list), node);
4160 return nir_before_cf_node(first_node);
4161 }
4162
4163 static inline nir_cursor
nir_after_cf_list(struct exec_list * cf_list)4164 nir_after_cf_list(struct exec_list *cf_list)
4165 {
4166 nir_cf_node *last_node = exec_node_data(nir_cf_node,
4167 exec_list_get_tail(cf_list), node);
4168 return nir_after_cf_node(last_node);
4169 }
4170
4171 /**
4172 * Insert a NIR instruction at the given cursor.
4173 *
4174 * Note: This does not update the cursor.
4175 */
4176 void nir_instr_insert(nir_cursor cursor, nir_instr *instr);
4177
4178 bool nir_instr_move(nir_cursor cursor, nir_instr *instr);
4179
4180 static inline void
nir_instr_insert_before(nir_instr * instr,nir_instr * before)4181 nir_instr_insert_before(nir_instr *instr, nir_instr *before)
4182 {
4183 nir_instr_insert(nir_before_instr(instr), before);
4184 }
4185
4186 static inline void
nir_instr_insert_after(nir_instr * instr,nir_instr * after)4187 nir_instr_insert_after(nir_instr *instr, nir_instr *after)
4188 {
4189 nir_instr_insert(nir_after_instr(instr), after);
4190 }
4191
4192 static inline void
nir_instr_insert_before_block(nir_block * block,nir_instr * before)4193 nir_instr_insert_before_block(nir_block *block, nir_instr *before)
4194 {
4195 nir_instr_insert(nir_before_block(block), before);
4196 }
4197
4198 static inline void
nir_instr_insert_after_block(nir_block * block,nir_instr * after)4199 nir_instr_insert_after_block(nir_block *block, nir_instr *after)
4200 {
4201 nir_instr_insert(nir_after_block(block), after);
4202 }
4203
4204 static inline void
nir_instr_insert_before_cf(nir_cf_node * node,nir_instr * before)4205 nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before)
4206 {
4207 nir_instr_insert(nir_before_cf_node(node), before);
4208 }
4209
4210 static inline void
nir_instr_insert_after_cf(nir_cf_node * node,nir_instr * after)4211 nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after)
4212 {
4213 nir_instr_insert(nir_after_cf_node(node), after);
4214 }
4215
4216 static inline void
nir_instr_insert_before_cf_list(struct exec_list * list,nir_instr * before)4217 nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before)
4218 {
4219 nir_instr_insert(nir_before_cf_list(list), before);
4220 }
4221
4222 static inline void
nir_instr_insert_after_cf_list(struct exec_list * list,nir_instr * after)4223 nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after)
4224 {
4225 nir_instr_insert(nir_after_cf_list(list), after);
4226 }
4227
4228 void nir_instr_remove_v(nir_instr *instr);
4229 void nir_instr_free(nir_instr *instr);
4230 void nir_instr_free_list(struct exec_list *list);
4231
4232 static inline nir_cursor
nir_instr_remove(nir_instr * instr)4233 nir_instr_remove(nir_instr *instr)
4234 {
4235 nir_cursor cursor;
4236 nir_instr *prev = nir_instr_prev(instr);
4237 if (prev) {
4238 cursor = nir_after_instr(prev);
4239 } else {
4240 cursor = nir_before_block(instr->block);
4241 }
4242 nir_instr_remove_v(instr);
4243 return cursor;
4244 }
4245
4246 nir_cursor nir_instr_free_and_dce(nir_instr *instr);
4247
4248 /** @} */
4249
4250 nir_ssa_def *nir_instr_ssa_def(nir_instr *instr);
4251
4252 typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state);
4253 typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state);
4254 typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state);
4255 bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb,
4256 void *state);
4257 static inline bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state);
4258 static inline bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state);
4259 bool nir_foreach_phi_src_leaving_block(nir_block *instr,
4260 nir_foreach_src_cb cb,
4261 void *state);
4262
4263 nir_const_value *nir_src_as_const_value(nir_src src);
4264
4265 #define NIR_SRC_AS_(name, c_type, type_enum, cast_macro) \
4266 static inline c_type * \
4267 nir_src_as_ ## name (nir_src src) \
4268 { \
4269 return src.is_ssa && src.ssa->parent_instr->type == type_enum \
4270 ? cast_macro(src.ssa->parent_instr) : NULL; \
4271 }
4272
4273 NIR_SRC_AS_(alu_instr, nir_alu_instr, nir_instr_type_alu, nir_instr_as_alu)
4274 NIR_SRC_AS_(intrinsic, nir_intrinsic_instr,
4275 nir_instr_type_intrinsic, nir_instr_as_intrinsic)
4276 NIR_SRC_AS_(deref, nir_deref_instr, nir_instr_type_deref, nir_instr_as_deref)
4277
4278 bool nir_src_is_dynamically_uniform(nir_src src);
4279 bool nir_srcs_equal(nir_src src1, nir_src src2);
4280 bool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2);
4281
4282 static inline void
nir_instr_rewrite_src_ssa(ASSERTED nir_instr * instr,nir_src * src,nir_ssa_def * new_ssa)4283 nir_instr_rewrite_src_ssa(ASSERTED nir_instr *instr,
4284 nir_src *src, nir_ssa_def *new_ssa)
4285 {
4286 assert(src->parent_instr == instr);
4287 assert(src->is_ssa && src->ssa);
4288 list_del(&src->use_link);
4289 src->ssa = new_ssa;
4290 list_addtail(&src->use_link, &new_ssa->uses);
4291 }
4292
4293 void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
4294 void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
4295
4296 static inline void
nir_if_rewrite_condition_ssa(ASSERTED nir_if * if_stmt,nir_src * src,nir_ssa_def * new_ssa)4297 nir_if_rewrite_condition_ssa(ASSERTED nir_if *if_stmt,
4298 nir_src *src, nir_ssa_def *new_ssa)
4299 {
4300 assert(src->parent_if == if_stmt);
4301 assert(src->is_ssa && src->ssa);
4302 list_del(&src->use_link);
4303 src->ssa = new_ssa;
4304 list_addtail(&src->use_link, &new_ssa->if_uses);
4305 }
4306
4307 void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
4308 void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest,
4309 nir_dest new_dest);
4310
4311 void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
4312 unsigned num_components, unsigned bit_size,
4313 const char *name);
4314 void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
4315 unsigned num_components, unsigned bit_size);
4316 static inline void
nir_ssa_dest_init_for_type(nir_instr * instr,nir_dest * dest,const struct glsl_type * type,const char * name)4317 nir_ssa_dest_init_for_type(nir_instr *instr, nir_dest *dest,
4318 const struct glsl_type *type,
4319 const char *name)
4320 {
4321 assert(glsl_type_is_vector_or_scalar(type));
4322 nir_ssa_dest_init(instr, dest, glsl_get_components(type),
4323 glsl_get_bit_size(type), name);
4324 }
4325 void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_ssa_def *new_ssa);
4326 void nir_ssa_def_rewrite_uses_src(nir_ssa_def *def, nir_src new_src);
4327 void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_ssa_def *new_ssa,
4328 nir_instr *after_me);
4329
4330 nir_component_mask_t nir_src_components_read(const nir_src *src);
4331 nir_component_mask_t nir_ssa_def_components_read(const nir_ssa_def *def);
4332
4333 static inline bool
nir_ssa_def_is_unused(nir_ssa_def * ssa)4334 nir_ssa_def_is_unused(nir_ssa_def *ssa)
4335 {
4336 return list_is_empty(&ssa->uses) && list_is_empty(&ssa->if_uses);
4337 }
4338
4339
4340 /** Returns the next block, disregarding structure
4341 *
4342 * The ordering is deterministic but has no guarantees beyond that. In
4343 * particular, it is not guaranteed to be dominance-preserving.
4344 */
4345 nir_block *nir_block_unstructured_next(nir_block *block);
4346 nir_block *nir_unstructured_start_block(nir_function_impl *impl);
4347
4348 #define nir_foreach_block_unstructured(block, impl) \
4349 for (nir_block *block = nir_unstructured_start_block(impl); block != NULL; \
4350 block = nir_block_unstructured_next(block))
4351
4352 #define nir_foreach_block_unstructured_safe(block, impl) \
4353 for (nir_block *block = nir_unstructured_start_block(impl), \
4354 *next = nir_block_unstructured_next(block); \
4355 block != NULL; \
4356 block = next, next = nir_block_unstructured_next(block))
4357
4358 /*
4359 * finds the next basic block in source-code order, returns NULL if there is
4360 * none
4361 */
4362
4363 nir_block *nir_block_cf_tree_next(nir_block *block);
4364
4365 /* Performs the opposite of nir_block_cf_tree_next() */
4366
4367 nir_block *nir_block_cf_tree_prev(nir_block *block);
4368
4369 /* Gets the first block in a CF node in source-code order */
4370
4371 nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node);
4372
4373 /* Gets the last block in a CF node in source-code order */
4374
4375 nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node);
4376
4377 /* Gets the next block after a CF node in source-code order */
4378
4379 nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node);
4380
4381 /* Macros for loops that visit blocks in source-code order */
4382
4383 #define nir_foreach_block(block, impl) \
4384 for (nir_block *block = nir_start_block(impl); block != NULL; \
4385 block = nir_block_cf_tree_next(block))
4386
4387 #define nir_foreach_block_safe(block, impl) \
4388 for (nir_block *block = nir_start_block(impl), \
4389 *next = nir_block_cf_tree_next(block); \
4390 block != NULL; \
4391 block = next, next = nir_block_cf_tree_next(block))
4392
4393 #define nir_foreach_block_reverse(block, impl) \
4394 for (nir_block *block = nir_impl_last_block(impl); block != NULL; \
4395 block = nir_block_cf_tree_prev(block))
4396
4397 #define nir_foreach_block_reverse_safe(block, impl) \
4398 for (nir_block *block = nir_impl_last_block(impl), \
4399 *prev = nir_block_cf_tree_prev(block); \
4400 block != NULL; \
4401 block = prev, prev = nir_block_cf_tree_prev(block))
4402
4403 #define nir_foreach_block_in_cf_node(block, node) \
4404 for (nir_block *block = nir_cf_node_cf_tree_first(node); \
4405 block != nir_cf_node_cf_tree_next(node); \
4406 block = nir_block_cf_tree_next(block))
4407
4408 /* If the following CF node is an if, this function returns that if.
4409 * Otherwise, it returns NULL.
4410 */
4411 nir_if *nir_block_get_following_if(nir_block *block);
4412
4413 nir_loop *nir_block_get_following_loop(nir_block *block);
4414
4415 nir_block **nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx);
4416
4417 void nir_index_local_regs(nir_function_impl *impl);
4418 void nir_index_ssa_defs(nir_function_impl *impl);
4419 unsigned nir_index_instrs(nir_function_impl *impl);
4420
4421 void nir_index_blocks(nir_function_impl *impl);
4422
4423 unsigned nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes);
4424 unsigned nir_function_impl_index_vars(nir_function_impl *impl);
4425
4426 void nir_print_shader(nir_shader *shader, FILE *fp);
4427 void nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors);
4428 void nir_print_instr(const nir_instr *instr, FILE *fp);
4429 void nir_print_deref(const nir_deref_instr *deref, FILE *fp);
4430 void nir_log_shader_annotated_tagged(enum mesa_log_level level, const char *tag, nir_shader *shader, struct hash_table *annotations);
4431 #define nir_log_shadere(s) nir_log_shader_annotated_tagged(MESA_LOG_ERROR, (MESA_LOG_TAG), (s), NULL)
4432 #define nir_log_shaderw(s) nir_log_shader_annotated_tagged(MESA_LOG_WARN, (MESA_LOG_TAG), (s), NULL)
4433 #define nir_log_shaderi(s) nir_log_shader_annotated_tagged(MESA_LOG_INFO, (MESA_LOG_TAG), (s), NULL)
4434 #define nir_log_shader_annotated(s, annotations) nir_log_shader_annotated_tagged(MESA_LOG_ERROR, (MESA_LOG_TAG), (s), annotations)
4435
4436 char *nir_shader_as_str(nir_shader *nir, void *mem_ctx);
4437 char *nir_shader_as_str_annotated(nir_shader *nir, struct hash_table *annotations, void *mem_ctx);
4438
4439 /** Shallow clone of a single instruction. */
4440 nir_instr *nir_instr_clone(nir_shader *s, const nir_instr *orig);
4441
4442 /** Shallow clone of a single ALU instruction. */
4443 nir_alu_instr *nir_alu_instr_clone(nir_shader *s, const nir_alu_instr *orig);
4444
4445 nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s);
4446 nir_function_impl *nir_function_impl_clone(nir_shader *shader,
4447 const nir_function_impl *fi);
4448 nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var);
4449 nir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader);
4450
4451 void nir_shader_replace(nir_shader *dest, nir_shader *src);
4452
4453 void nir_shader_serialize_deserialize(nir_shader *s);
4454
4455 #ifndef NDEBUG
4456 void nir_validate_shader(nir_shader *shader, const char *when);
4457 void nir_validate_ssa_dominance(nir_shader *shader, const char *when);
4458 void nir_metadata_set_validation_flag(nir_shader *shader);
4459 void nir_metadata_check_validation_flag(nir_shader *shader);
4460
4461 static inline bool
should_skip_nir(const char * name)4462 should_skip_nir(const char *name)
4463 {
4464 static const char *list = NULL;
4465 if (!list) {
4466 /* Comma separated list of names to skip. */
4467 list = getenv("NIR_SKIP");
4468 if (!list)
4469 list = "";
4470 }
4471
4472 if (!list[0])
4473 return false;
4474
4475 return comma_separated_list_contains(list, name);
4476 }
4477
4478 static inline bool
should_clone_nir(void)4479 should_clone_nir(void)
4480 {
4481 static int should_clone = -1;
4482 if (should_clone < 0)
4483 should_clone = env_var_as_boolean("NIR_TEST_CLONE", false);
4484
4485 return should_clone;
4486 }
4487
4488 static inline bool
should_serialize_deserialize_nir(void)4489 should_serialize_deserialize_nir(void)
4490 {
4491 static int test_serialize = -1;
4492 if (test_serialize < 0)
4493 test_serialize = env_var_as_boolean("NIR_TEST_SERIALIZE", false);
4494
4495 return test_serialize;
4496 }
4497
4498 static inline bool
should_print_nir(nir_shader * shader)4499 should_print_nir(nir_shader *shader)
4500 {
4501 static int should_print = -1;
4502 if (should_print < 0)
4503 should_print = env_var_as_unsigned("NIR_PRINT", 0);
4504
4505 if (should_print == 1)
4506 return !shader->info.internal;
4507
4508 return should_print;
4509 }
4510 #else
nir_validate_shader(nir_shader * shader,const char * when)4511 static inline void nir_validate_shader(nir_shader *shader, const char *when) { (void) shader; (void)when; }
nir_validate_ssa_dominance(nir_shader * shader,const char * when)4512 static inline void nir_validate_ssa_dominance(nir_shader *shader, const char *when) { (void) shader; (void)when; }
nir_metadata_set_validation_flag(nir_shader * shader)4513 static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; }
nir_metadata_check_validation_flag(nir_shader * shader)4514 static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; }
should_skip_nir(UNUSED const char * pass_name)4515 static inline bool should_skip_nir(UNUSED const char *pass_name) { return false; }
should_clone_nir(void)4516 static inline bool should_clone_nir(void) { return false; }
should_serialize_deserialize_nir(void)4517 static inline bool should_serialize_deserialize_nir(void) { return false; }
should_print_nir(nir_shader * shader)4518 static inline bool should_print_nir(nir_shader *shader) { return false; }
4519 #endif /* NDEBUG */
4520
4521 #define _PASS(pass, nir, do_pass) do { \
4522 if (should_skip_nir(#pass)) { \
4523 printf("skipping %s\n", #pass); \
4524 break; \
4525 } \
4526 do_pass \
4527 if (should_clone_nir()) { \
4528 nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \
4529 nir_shader_replace(nir, clone); \
4530 } \
4531 if (should_serialize_deserialize_nir()) { \
4532 nir_shader_serialize_deserialize(nir); \
4533 } \
4534 } while (0)
4535
4536 #define NIR_PASS(progress, nir, pass, ...) _PASS(pass, nir, \
4537 nir_metadata_set_validation_flag(nir); \
4538 if (should_print_nir(nir)) \
4539 printf("%s\n", #pass); \
4540 if (pass(nir, ##__VA_ARGS__)) { \
4541 nir_validate_shader(nir, "after " #pass); \
4542 progress = true; \
4543 if (should_print_nir(nir)) \
4544 nir_print_shader(nir, stdout); \
4545 nir_metadata_check_validation_flag(nir); \
4546 } \
4547 )
4548
4549 #define NIR_PASS_V(nir, pass, ...) _PASS(pass, nir, \
4550 if (should_print_nir(nir)) \
4551 printf("%s\n", #pass); \
4552 pass(nir, ##__VA_ARGS__); \
4553 nir_validate_shader(nir, "after " #pass); \
4554 if (should_print_nir(nir)) \
4555 nir_print_shader(nir, stdout); \
4556 )
4557
4558 #define NIR_SKIP(name) should_skip_nir(#name)
4559
4560 /** An instruction filtering callback with writemask
4561 *
4562 * Returns true if the instruction should be processed with the associated
4563 * writemask and false otherwise.
4564 */
4565 typedef bool (*nir_instr_writemask_filter_cb)(const nir_instr *,
4566 unsigned writemask, const void *);
4567
4568 /** A simple instruction lowering callback
4569 *
4570 * Many instruction lowering passes can be written as a simple function which
4571 * takes an instruction as its input and returns a sequence of instructions
4572 * that implement the consumed instruction. This function type represents
4573 * such a lowering function. When called, a function with this prototype
4574 * should either return NULL indicating that no lowering needs to be done or
4575 * emit a sequence of instructions using the provided builder (whose cursor
4576 * will already be placed after the instruction to be lowered) and return the
4577 * resulting nir_ssa_def.
4578 */
4579 typedef nir_ssa_def *(*nir_lower_instr_cb)(struct nir_builder *,
4580 nir_instr *, void *);
4581
4582 /**
4583 * Special return value for nir_lower_instr_cb when some progress occurred
4584 * (like changing an input to the instr) that didn't result in a replacement
4585 * SSA def being generated.
4586 */
4587 #define NIR_LOWER_INSTR_PROGRESS ((nir_ssa_def *)(uintptr_t)1)
4588
4589 /**
4590 * Special return value for nir_lower_instr_cb when some progress occurred
4591 * that should remove the current instruction that doesn't create an output
4592 * (like a store)
4593 */
4594
4595 #define NIR_LOWER_INSTR_PROGRESS_REPLACE ((nir_ssa_def *)(uintptr_t)2)
4596
4597 /** Iterate over all the instructions in a nir_function_impl and lower them
4598 * using the provided callbacks
4599 *
4600 * This function implements the guts of a standard lowering pass for you. It
4601 * iterates over all of the instructions in a nir_function_impl and calls the
4602 * filter callback on each one. If the filter callback returns true, it then
4603 * calls the lowering call back on the instruction. (Splitting it this way
4604 * allows us to avoid some save/restore work for instructions we know won't be
4605 * lowered.) If the instruction is dead after the lowering is complete, it
4606 * will be removed. If new instructions are added, the lowering callback will
4607 * also be called on them in case multiple lowerings are required.
4608 *
4609 * If the callback indicates that the original instruction is replaced (either
4610 * through a new SSA def or NIR_LOWER_INSTR_PROGRESS_REPLACE), then the
4611 * instruction is removed along with any now-dead SSA defs it used.
4612 *
4613 * The metadata for the nir_function_impl will also be updated. If any blocks
4614 * are added (they cannot be removed), dominance and block indices will be
4615 * invalidated.
4616 */
4617 bool nir_function_impl_lower_instructions(nir_function_impl *impl,
4618 nir_instr_filter_cb filter,
4619 nir_lower_instr_cb lower,
4620 void *cb_data);
4621 bool nir_shader_lower_instructions(nir_shader *shader,
4622 nir_instr_filter_cb filter,
4623 nir_lower_instr_cb lower,
4624 void *cb_data);
4625
4626 void nir_calc_dominance_impl(nir_function_impl *impl);
4627 void nir_calc_dominance(nir_shader *shader);
4628
4629 nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2);
4630 bool nir_block_dominates(nir_block *parent, nir_block *child);
4631 bool nir_block_is_unreachable(nir_block *block);
4632
4633 void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp);
4634 void nir_dump_dom_tree(nir_shader *shader, FILE *fp);
4635
4636 void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp);
4637 void nir_dump_dom_frontier(nir_shader *shader, FILE *fp);
4638
4639 void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
4640 void nir_dump_cfg(nir_shader *shader, FILE *fp);
4641
4642 void nir_gs_count_vertices_and_primitives(const nir_shader *shader,
4643 int *out_vtxcnt,
4644 int *out_prmcnt,
4645 unsigned num_streams);
4646
4647 bool nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes);
4648 bool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes);
4649 bool nir_split_var_copies(nir_shader *shader);
4650 bool nir_split_per_member_structs(nir_shader *shader);
4651 bool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes);
4652
4653 bool nir_lower_returns_impl(nir_function_impl *impl);
4654 bool nir_lower_returns(nir_shader *shader);
4655
4656 void nir_inline_function_impl(struct nir_builder *b,
4657 const nir_function_impl *impl,
4658 nir_ssa_def **params,
4659 struct hash_table *shader_var_remap);
4660 bool nir_inline_functions(nir_shader *shader);
4661
4662 void nir_find_inlinable_uniforms(nir_shader *shader);
4663 void nir_inline_uniforms(nir_shader *shader, unsigned num_uniforms,
4664 const uint32_t *uniform_values,
4665 const uint16_t *uniform_dw_offsets);
4666
4667 bool nir_propagate_invariant(nir_shader *shader, bool invariant_prim);
4668
4669 void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader);
4670 void nir_lower_deref_copy_instr(struct nir_builder *b,
4671 nir_intrinsic_instr *copy);
4672 bool nir_lower_var_copies(nir_shader *shader);
4673
4674 bool nir_opt_memcpy(nir_shader *shader);
4675 bool nir_lower_memcpy(nir_shader *shader);
4676
4677 void nir_fixup_deref_modes(nir_shader *shader);
4678
4679 bool nir_lower_global_vars_to_local(nir_shader *shader);
4680
4681 typedef enum {
4682 nir_lower_direct_array_deref_of_vec_load = (1 << 0),
4683 nir_lower_indirect_array_deref_of_vec_load = (1 << 1),
4684 nir_lower_direct_array_deref_of_vec_store = (1 << 2),
4685 nir_lower_indirect_array_deref_of_vec_store = (1 << 3),
4686 } nir_lower_array_deref_of_vec_options;
4687
4688 bool nir_lower_array_deref_of_vec(nir_shader *shader, nir_variable_mode modes,
4689 nir_lower_array_deref_of_vec_options options);
4690
4691 bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes,
4692 uint32_t max_lower_array_len);
4693
4694 bool nir_lower_indirect_builtin_uniform_derefs(nir_shader *shader);
4695
4696 bool nir_lower_locals_to_regs(nir_shader *shader);
4697
4698 void nir_lower_io_to_temporaries(nir_shader *shader,
4699 nir_function_impl *entrypoint,
4700 bool outputs, bool inputs);
4701
4702 bool nir_lower_vars_to_scratch(nir_shader *shader,
4703 nir_variable_mode modes,
4704 int size_threshold,
4705 glsl_type_size_align_func size_align);
4706
4707 void nir_lower_clip_halfz(nir_shader *shader);
4708
4709 void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
4710
4711 void nir_gather_ssa_types(nir_function_impl *impl,
4712 BITSET_WORD *float_types,
4713 BITSET_WORD *int_types);
4714
4715 void nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
4716 unsigned *size,
4717 int (*type_size)(const struct glsl_type *, bool));
4718
4719 /* Some helpers to do very simple linking */
4720 bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer);
4721 bool nir_remove_unused_io_vars(nir_shader *shader, nir_variable_mode mode,
4722 uint64_t *used_by_other_stage,
4723 uint64_t *used_by_other_stage_patches);
4724 void nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
4725 bool default_to_smooth_interp);
4726 void nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer);
4727 bool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer);
4728 void nir_link_varying_precision(nir_shader *producer, nir_shader *consumer);
4729
4730 bool nir_lower_amul(nir_shader *shader,
4731 int (*type_size)(const struct glsl_type *, bool));
4732
4733 bool nir_lower_ubo_vec4(nir_shader *shader);
4734
4735 void nir_assign_io_var_locations(nir_shader *shader,
4736 nir_variable_mode mode,
4737 unsigned *size,
4738 gl_shader_stage stage);
4739
4740 typedef struct {
4741 uint8_t num_linked_io_vars;
4742 uint8_t num_linked_patch_io_vars;
4743 } nir_linked_io_var_info;
4744
4745 nir_linked_io_var_info
4746 nir_assign_linked_io_var_locations(nir_shader *producer,
4747 nir_shader *consumer);
4748
4749 typedef enum {
4750 /* If set, this causes all 64-bit IO operations to be lowered on-the-fly
4751 * to 32-bit operations. This is only valid for nir_var_shader_in/out
4752 * modes.
4753 */
4754 nir_lower_io_lower_64bit_to_32 = (1 << 0),
4755
4756 /* If set, this forces all non-flat fragment shader inputs to be
4757 * interpolated as if with the "sample" qualifier. This requires
4758 * nir_shader_compiler_options::use_interpolated_input_intrinsics.
4759 */
4760 nir_lower_io_force_sample_interpolation = (1 << 1),
4761 } nir_lower_io_options;
4762 bool nir_lower_io(nir_shader *shader,
4763 nir_variable_mode modes,
4764 int (*type_size)(const struct glsl_type *, bool),
4765 nir_lower_io_options);
4766
4767 bool nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes);
4768
4769 bool
4770 nir_lower_vars_to_explicit_types(nir_shader *shader,
4771 nir_variable_mode modes,
4772 glsl_type_size_align_func type_info);
4773 void
4774 nir_gather_explicit_io_initializers(nir_shader *shader,
4775 void *dst, size_t dst_size,
4776 nir_variable_mode mode);
4777
4778 bool nir_lower_vec3_to_vec4(nir_shader *shader, nir_variable_mode modes);
4779
4780 typedef enum {
4781 /**
4782 * An address format which is a simple 32-bit global GPU address.
4783 */
4784 nir_address_format_32bit_global,
4785
4786 /**
4787 * An address format which is a simple 64-bit global GPU address.
4788 */
4789 nir_address_format_64bit_global,
4790
4791 /**
4792 * An address format which is a 64-bit global base address and a 32-bit
4793 * offset.
4794 *
4795 * The address is comprised as a 32-bit vec4 where .xy are a uint64_t base
4796 * address stored with the low bits in .x and high bits in .y, .z is
4797 * undefined, and .w is an offset. This is intended to match
4798 * 64bit_bounded_global but without the bounds checking.
4799 */
4800 nir_address_format_64bit_global_32bit_offset,
4801
4802 /**
4803 * An address format which is a bounds-checked 64-bit global GPU address.
4804 *
4805 * The address is comprised as a 32-bit vec4 where .xy are a uint64_t base
4806 * address stored with the low bits in .x and high bits in .y, .z is a
4807 * size, and .w is an offset. When the final I/O operation is lowered, .w
4808 * is checked against .z and the operation is predicated on the result.
4809 */
4810 nir_address_format_64bit_bounded_global,
4811
4812 /**
4813 * An address format which is comprised of a vec2 where the first
4814 * component is a buffer index and the second is an offset.
4815 */
4816 nir_address_format_32bit_index_offset,
4817
4818 /**
4819 * An address format which is a 64-bit value, where the high 32 bits
4820 * are a buffer index, and the low 32 bits are an offset.
4821 */
4822 nir_address_format_32bit_index_offset_pack64,
4823
4824 /**
4825 * An address format which is comprised of a vec3 where the first two
4826 * components specify the buffer and the third is an offset.
4827 */
4828 nir_address_format_vec2_index_32bit_offset,
4829
4830 /**
4831 * An address format which represents generic pointers with a 62-bit
4832 * pointer and a 2-bit enum in the top two bits. The top two bits have
4833 * the following meanings:
4834 *
4835 * - 0x0: Global memory
4836 * - 0x1: Shared memory
4837 * - 0x2: Scratch memory
4838 * - 0x3: Global memory
4839 *
4840 * The redundancy between 0x0 and 0x3 is because of Intel sign-extension of
4841 * addresses. Valid global memory addresses may naturally have either 0 or
4842 * ~0 as their high bits.
4843 *
4844 * Shared and scratch pointers are represented as 32-bit offsets with the
4845 * top 32 bits only being used for the enum. This allows us to avoid
4846 * 64-bit address calculations in a bunch of cases.
4847 */
4848 nir_address_format_62bit_generic,
4849
4850 /**
4851 * An address format which is a simple 32-bit offset.
4852 */
4853 nir_address_format_32bit_offset,
4854
4855 /**
4856 * An address format which is a simple 32-bit offset cast to 64-bit.
4857 */
4858 nir_address_format_32bit_offset_as_64bit,
4859
4860 /**
4861 * An address format representing a purely logical addressing model. In
4862 * this model, all deref chains must be complete from the dereference
4863 * operation to the variable. Cast derefs are not allowed. These
4864 * addresses will be 32-bit scalars but the format is immaterial because
4865 * you can always chase the chain.
4866 */
4867 nir_address_format_logical,
4868 } nir_address_format;
4869
4870 static inline unsigned
nir_address_format_bit_size(nir_address_format addr_format)4871 nir_address_format_bit_size(nir_address_format addr_format)
4872 {
4873 switch (addr_format) {
4874 case nir_address_format_32bit_global: return 32;
4875 case nir_address_format_64bit_global: return 64;
4876 case nir_address_format_64bit_global_32bit_offset: return 32;
4877 case nir_address_format_64bit_bounded_global: return 32;
4878 case nir_address_format_32bit_index_offset: return 32;
4879 case nir_address_format_32bit_index_offset_pack64: return 64;
4880 case nir_address_format_vec2_index_32bit_offset: return 32;
4881 case nir_address_format_62bit_generic: return 64;
4882 case nir_address_format_32bit_offset: return 32;
4883 case nir_address_format_32bit_offset_as_64bit: return 64;
4884 case nir_address_format_logical: return 32;
4885 }
4886 unreachable("Invalid address format");
4887 }
4888
4889 static inline unsigned
nir_address_format_num_components(nir_address_format addr_format)4890 nir_address_format_num_components(nir_address_format addr_format)
4891 {
4892 switch (addr_format) {
4893 case nir_address_format_32bit_global: return 1;
4894 case nir_address_format_64bit_global: return 1;
4895 case nir_address_format_64bit_global_32bit_offset: return 4;
4896 case nir_address_format_64bit_bounded_global: return 4;
4897 case nir_address_format_32bit_index_offset: return 2;
4898 case nir_address_format_32bit_index_offset_pack64: return 1;
4899 case nir_address_format_vec2_index_32bit_offset: return 3;
4900 case nir_address_format_62bit_generic: return 1;
4901 case nir_address_format_32bit_offset: return 1;
4902 case nir_address_format_32bit_offset_as_64bit: return 1;
4903 case nir_address_format_logical: return 1;
4904 }
4905 unreachable("Invalid address format");
4906 }
4907
4908 static inline const struct glsl_type *
nir_address_format_to_glsl_type(nir_address_format addr_format)4909 nir_address_format_to_glsl_type(nir_address_format addr_format)
4910 {
4911 unsigned bit_size = nir_address_format_bit_size(addr_format);
4912 assert(bit_size == 32 || bit_size == 64);
4913 return glsl_vector_type(bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64,
4914 nir_address_format_num_components(addr_format));
4915 }
4916
4917 const nir_const_value *nir_address_format_null_value(nir_address_format addr_format);
4918
4919 nir_ssa_def *nir_build_addr_ieq(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
4920 nir_address_format addr_format);
4921
4922 nir_ssa_def *nir_build_addr_isub(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
4923 nir_address_format addr_format);
4924
4925 nir_ssa_def * nir_explicit_io_address_from_deref(struct nir_builder *b,
4926 nir_deref_instr *deref,
4927 nir_ssa_def *base_addr,
4928 nir_address_format addr_format);
4929
4930 bool nir_get_explicit_deref_align(nir_deref_instr *deref,
4931 bool default_to_type_align,
4932 uint32_t *align_mul,
4933 uint32_t *align_offset);
4934
4935 void nir_lower_explicit_io_instr(struct nir_builder *b,
4936 nir_intrinsic_instr *io_instr,
4937 nir_ssa_def *addr,
4938 nir_address_format addr_format);
4939
4940 bool nir_lower_explicit_io(nir_shader *shader,
4941 nir_variable_mode modes,
4942 nir_address_format);
4943
4944 bool
4945 nir_lower_shader_calls(nir_shader *shader,
4946 nir_address_format address_format,
4947 unsigned stack_alignment,
4948 nir_shader ***resume_shaders_out,
4949 uint32_t *num_resume_shaders_out,
4950 void *mem_ctx);
4951
4952 nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr);
4953 nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr);
4954 nir_src *nir_get_shader_call_payload_src(nir_intrinsic_instr *call);
4955
4956 bool nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage);
4957
4958 bool nir_lower_regs_to_ssa_impl(nir_function_impl *impl);
4959 bool nir_lower_regs_to_ssa(nir_shader *shader);
4960 bool nir_lower_vars_to_ssa(nir_shader *shader);
4961
4962 bool nir_remove_dead_derefs(nir_shader *shader);
4963 bool nir_remove_dead_derefs_impl(nir_function_impl *impl);
4964
4965 typedef struct nir_remove_dead_variables_options {
4966 bool (*can_remove_var)(nir_variable *var, void *data);
4967 void *can_remove_var_data;
4968 } nir_remove_dead_variables_options;
4969
4970 bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes,
4971 const nir_remove_dead_variables_options *options);
4972
4973 bool nir_lower_variable_initializers(nir_shader *shader,
4974 nir_variable_mode modes);
4975 bool nir_zero_initialize_shared_memory(nir_shader *shader,
4976 const unsigned shared_size,
4977 const unsigned chunk_size);
4978
4979 bool nir_move_vec_src_uses_to_dest(nir_shader *shader);
4980 bool nir_lower_vec_to_movs(nir_shader *shader, nir_instr_writemask_filter_cb cb,
4981 const void *_data);
4982 void nir_lower_alpha_test(nir_shader *shader, enum compare_func func,
4983 bool alpha_to_one,
4984 const gl_state_index16 *alpha_ref_state_tokens);
4985 bool nir_lower_alu(nir_shader *shader);
4986
4987 bool nir_lower_flrp(nir_shader *shader, unsigned lowering_mask,
4988 bool always_precise);
4989
4990 bool nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *data);
4991 bool nir_lower_bool_to_bitsize(nir_shader *shader);
4992 bool nir_lower_bool_to_float(nir_shader *shader);
4993 bool nir_lower_bool_to_int32(nir_shader *shader);
4994 bool nir_opt_simplify_convert_alu_types(nir_shader *shader);
4995 bool nir_lower_convert_alu_types(nir_shader *shader,
4996 bool (*should_lower)(nir_intrinsic_instr *));
4997 bool nir_lower_constant_convert_alu_types(nir_shader *shader);
4998 bool nir_lower_alu_conversion_to_intrinsic(nir_shader *shader);
4999 bool nir_lower_int_to_float(nir_shader *shader);
5000 bool nir_lower_load_const_to_scalar(nir_shader *shader);
5001 bool nir_lower_read_invocation_to_scalar(nir_shader *shader);
5002 bool nir_lower_phis_to_scalar(nir_shader *shader, bool lower_all);
5003 void nir_lower_io_arrays_to_elements(nir_shader *producer, nir_shader *consumer);
5004 void nir_lower_io_arrays_to_elements_no_indirects(nir_shader *shader,
5005 bool outputs_only);
5006 void nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask);
5007 bool nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask);
5008 bool nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode mask);
5009 bool nir_vectorize_tess_levels(nir_shader *shader);
5010
5011 bool nir_lower_fragcolor(nir_shader *shader, unsigned max_cbufs);
5012 bool nir_lower_fragcoord_wtrans(nir_shader *shader);
5013 void nir_lower_viewport_transform(nir_shader *shader);
5014 bool nir_lower_uniforms_to_ubo(nir_shader *shader, bool dword_packed, bool load_vec4);
5015
5016 bool nir_lower_is_helper_invocation(nir_shader *shader);
5017
5018 typedef struct nir_lower_subgroups_options {
5019 uint8_t subgroup_size;
5020 uint8_t ballot_bit_size;
5021 uint8_t ballot_components;
5022 bool lower_to_scalar:1;
5023 bool lower_vote_trivial:1;
5024 bool lower_vote_eq:1;
5025 bool lower_subgroup_masks:1;
5026 bool lower_shuffle:1;
5027 bool lower_shuffle_to_32bit:1;
5028 bool lower_shuffle_to_swizzle_amd:1;
5029 bool lower_quad:1;
5030 bool lower_quad_broadcast_dynamic:1;
5031 bool lower_quad_broadcast_dynamic_to_const:1;
5032 bool lower_elect:1;
5033 bool lower_read_invocation_to_cond:1;
5034 } nir_lower_subgroups_options;
5035
5036 bool nir_lower_subgroups(nir_shader *shader,
5037 const nir_lower_subgroups_options *options);
5038
5039 bool nir_lower_system_values(nir_shader *shader);
5040
5041 typedef struct nir_lower_compute_system_values_options {
5042 bool has_base_global_invocation_id:1;
5043 bool has_base_workgroup_id:1;
5044 bool shuffle_local_ids_for_quad_derivatives:1;
5045 bool lower_local_invocation_index:1;
5046 } nir_lower_compute_system_values_options;
5047
5048 bool nir_lower_compute_system_values(nir_shader *shader,
5049 const nir_lower_compute_system_values_options *options);
5050
5051 struct nir_lower_sysvals_to_varyings_options {
5052 bool frag_coord:1;
5053 bool front_face:1;
5054 bool point_coord:1;
5055 };
5056
5057 bool
5058 nir_lower_sysvals_to_varyings(nir_shader *shader,
5059 const struct nir_lower_sysvals_to_varyings_options *options);
5060
5061 enum PACKED nir_lower_tex_packing {
5062 /** No packing */
5063 nir_lower_tex_packing_none = 0,
5064 /**
5065 * The sampler returns up to 2 32-bit words of half floats or 16-bit signed
5066 * or unsigned ints based on the sampler type
5067 */
5068 nir_lower_tex_packing_16,
5069 /** The sampler returns 1 32-bit word of 4x8 unorm */
5070 nir_lower_tex_packing_8,
5071 };
5072
5073 typedef struct nir_lower_tex_options {
5074 /**
5075 * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which
5076 * sampler types a texture projector is lowered.
5077 */
5078 unsigned lower_txp;
5079
5080 /**
5081 * If true, lower away nir_tex_src_offset for all texelfetch instructions.
5082 */
5083 bool lower_txf_offset;
5084
5085 /**
5086 * If true, lower away nir_tex_src_offset for all rect textures.
5087 */
5088 bool lower_rect_offset;
5089
5090 /**
5091 * If true, lower rect textures to 2D, using txs to fetch the
5092 * texture dimensions and dividing the texture coords by the
5093 * texture dims to normalize.
5094 */
5095 bool lower_rect;
5096
5097 /**
5098 * If true, convert yuv to rgb.
5099 */
5100 unsigned lower_y_uv_external;
5101 unsigned lower_y_u_v_external;
5102 unsigned lower_yx_xuxv_external;
5103 unsigned lower_xy_uxvx_external;
5104 unsigned lower_ayuv_external;
5105 unsigned lower_xyuv_external;
5106 unsigned lower_yuv_external;
5107 unsigned lower_yu_yv_external;
5108 unsigned lower_y41x_external;
5109 unsigned bt709_external;
5110 unsigned bt2020_external;
5111
5112 /**
5113 * To emulate certain texture wrap modes, this can be used
5114 * to saturate the specified tex coord to [0.0, 1.0]. The
5115 * bits are according to sampler #, ie. if, for example:
5116 *
5117 * (conf->saturate_s & (1 << n))
5118 *
5119 * is true, then the s coord for sampler n is saturated.
5120 *
5121 * Note that clamping must happen *after* projector lowering
5122 * so any projected texture sample instruction with a clamped
5123 * coordinate gets automatically lowered, regardless of the
5124 * 'lower_txp' setting.
5125 */
5126 unsigned saturate_s;
5127 unsigned saturate_t;
5128 unsigned saturate_r;
5129
5130 /* Bitmask of textures that need swizzling.
5131 *
5132 * If (swizzle_result & (1 << texture_index)), then the swizzle in
5133 * swizzles[texture_index] is applied to the result of the texturing
5134 * operation.
5135 */
5136 unsigned swizzle_result;
5137
5138 /* A swizzle for each texture. Values 0-3 represent x, y, z, or w swizzles
5139 * while 4 and 5 represent 0 and 1 respectively.
5140 *
5141 * Indexed by texture-id.
5142 */
5143 uint8_t swizzles[32][4];
5144
5145 /* Can be used to scale sampled values in range required by the
5146 * format.
5147 *
5148 * Indexed by texture-id.
5149 */
5150 float scale_factors[32];
5151
5152 /**
5153 * Bitmap of textures that need srgb to linear conversion. If
5154 * (lower_srgb & (1 << texture_index)) then the rgb (xyz) components
5155 * of the texture are lowered to linear.
5156 */
5157 unsigned lower_srgb;
5158
5159 /**
5160 * If true, lower nir_texop_txd on cube maps with nir_texop_txl.
5161 */
5162 bool lower_txd_cube_map;
5163
5164 /**
5165 * If true, lower nir_texop_txd on 3D surfaces with nir_texop_txl.
5166 */
5167 bool lower_txd_3d;
5168
5169 /**
5170 * If true, lower nir_texop_txd on shadow samplers (except cube maps)
5171 * with nir_texop_txl. Notice that cube map shadow samplers are lowered
5172 * with lower_txd_cube_map.
5173 */
5174 bool lower_txd_shadow;
5175
5176 /**
5177 * If true, lower nir_texop_txd on all samplers to a nir_texop_txl.
5178 * Implies lower_txd_cube_map and lower_txd_shadow.
5179 */
5180 bool lower_txd;
5181
5182 /**
5183 * If true, lower nir_texop_txb that try to use shadow compare and min_lod
5184 * at the same time to a nir_texop_lod, some math, and nir_texop_tex.
5185 */
5186 bool lower_txb_shadow_clamp;
5187
5188 /**
5189 * If true, lower nir_texop_txd on shadow samplers when it uses min_lod
5190 * with nir_texop_txl. This includes cube maps.
5191 */
5192 bool lower_txd_shadow_clamp;
5193
5194 /**
5195 * If true, lower nir_texop_txd on when it uses both offset and min_lod
5196 * with nir_texop_txl. This includes cube maps.
5197 */
5198 bool lower_txd_offset_clamp;
5199
5200 /**
5201 * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the
5202 * sampler is bindless.
5203 */
5204 bool lower_txd_clamp_bindless_sampler;
5205
5206 /**
5207 * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the
5208 * sampler index is not statically determinable to be less than 16.
5209 */
5210 bool lower_txd_clamp_if_sampler_index_not_lt_16;
5211
5212 /**
5213 * If true, lower nir_texop_txs with a non-0-lod into nir_texop_txs with
5214 * 0-lod followed by a nir_ishr.
5215 */
5216 bool lower_txs_lod;
5217
5218 /**
5219 * If true, lower nir_texop_txs for cube arrays to a nir_texop_txs with a
5220 * 2D array type followed by a nir_idiv by 6.
5221 */
5222 bool lower_txs_cube_array;
5223
5224 /**
5225 * If true, apply a .bagr swizzle on tg4 results to handle Broadcom's
5226 * mixed-up tg4 locations.
5227 */
5228 bool lower_tg4_broadcom_swizzle;
5229
5230 /**
5231 * If true, lowers tg4 with 4 constant offsets to 4 tg4 calls
5232 */
5233 bool lower_tg4_offsets;
5234
5235 /**
5236 * Lower txf_ms to fragment_mask_fetch and fragment_fetch and samples_identical to
5237 * fragment_mask_fetch.
5238 */
5239 bool lower_to_fragment_fetch_amd;
5240
5241 /**
5242 * To lower packed sampler return formats.
5243 *
5244 * Indexed by sampler-id.
5245 */
5246 enum nir_lower_tex_packing lower_tex_packing[32];
5247 } nir_lower_tex_options;
5248
5249 /** Lowers complex texture instructions to simpler ones */
5250 bool nir_lower_tex(nir_shader *shader,
5251 const nir_lower_tex_options *options);
5252
5253 typedef struct nir_lower_image_options {
5254 /**
5255 * If true, lower cube size operations.
5256 */
5257 bool lower_cube_size;
5258 } nir_lower_image_options;
5259
5260 bool nir_lower_image(nir_shader *nir,
5261 const nir_lower_image_options *options);
5262
5263 bool nir_lower_readonly_images_to_tex(nir_shader *shader, bool per_variable);
5264
5265 enum nir_lower_non_uniform_access_type {
5266 nir_lower_non_uniform_ubo_access = (1 << 0),
5267 nir_lower_non_uniform_ssbo_access = (1 << 1),
5268 nir_lower_non_uniform_texture_access = (1 << 2),
5269 nir_lower_non_uniform_image_access = (1 << 3),
5270 };
5271
5272 /* Given the nir_src used for the resource, return the channels which might be non-uniform. */
5273 typedef nir_component_mask_t (*nir_lower_non_uniform_access_callback)(const nir_src *, void *);
5274
5275 typedef struct nir_lower_non_uniform_access_options {
5276 enum nir_lower_non_uniform_access_type types;
5277 nir_lower_non_uniform_access_callback callback;
5278 void *callback_data;
5279 } nir_lower_non_uniform_access_options;
5280
5281 bool nir_lower_non_uniform_access(nir_shader *shader,
5282 const nir_lower_non_uniform_access_options *options);
5283
5284 typedef struct {
5285 /* If true, a 32-bit division lowering based on NV50LegalizeSSA::handleDIV()
5286 * is used. It is the faster of the two but it is not exact in some cases
5287 * (for example, 1091317713u / 1034u gives 5209173 instead of 1055432).
5288 *
5289 * If false, a lowering based on AMDGPUTargetLowering::LowerUDIVREM() and
5290 * AMDGPUTargetLowering::LowerSDIVREM() is used. It requires more
5291 * instructions than the nv50 path and many of them are integer
5292 * multiplications, so it is probably slower. It should always return the
5293 * correct result, though.
5294 */
5295 bool imprecise_32bit_lowering;
5296
5297 /* Whether 16-bit floating point arithmetic should be allowed in 8-bit
5298 * division lowering
5299 */
5300 bool allow_fp16;
5301 } nir_lower_idiv_options;
5302
5303 bool nir_lower_idiv(nir_shader *shader, const nir_lower_idiv_options *options);
5304
5305 typedef struct nir_input_attachment_options {
5306 bool use_fragcoord_sysval;
5307 bool use_layer_id_sysval;
5308 bool use_view_id_for_layer;
5309 } nir_input_attachment_options;
5310
5311 bool nir_lower_input_attachments(nir_shader *shader,
5312 const nir_input_attachment_options *options);
5313
5314 bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables,
5315 bool use_vars,
5316 bool use_clipdist_array,
5317 const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]);
5318 bool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables,
5319 bool use_clipdist_array,
5320 const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]);
5321 bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables,
5322 bool use_clipdist_array);
5323 bool nir_lower_clip_cull_distance_arrays(nir_shader *nir);
5324 bool nir_lower_clip_disable(nir_shader *shader, unsigned clip_plane_enable);
5325
5326 void nir_lower_point_size_mov(nir_shader *shader,
5327 const gl_state_index16 *pointsize_state_tokens);
5328
5329 bool nir_lower_frexp(nir_shader *nir);
5330
5331 void nir_lower_two_sided_color(nir_shader *shader, bool face_sysval);
5332
5333 bool nir_lower_clamp_color_outputs(nir_shader *shader);
5334
5335 bool nir_lower_flatshade(nir_shader *shader);
5336
5337 void nir_lower_passthrough_edgeflags(nir_shader *shader);
5338 bool nir_lower_patch_vertices(nir_shader *nir, unsigned static_count,
5339 const gl_state_index16 *uniform_state_tokens);
5340
5341 typedef struct nir_lower_wpos_ytransform_options {
5342 gl_state_index16 state_tokens[STATE_LENGTH];
5343 bool fs_coord_origin_upper_left :1;
5344 bool fs_coord_origin_lower_left :1;
5345 bool fs_coord_pixel_center_integer :1;
5346 bool fs_coord_pixel_center_half_integer :1;
5347 } nir_lower_wpos_ytransform_options;
5348
5349 bool nir_lower_wpos_ytransform(nir_shader *shader,
5350 const nir_lower_wpos_ytransform_options *options);
5351 bool nir_lower_wpos_center(nir_shader *shader, const bool for_sample_shading);
5352
5353 bool nir_lower_pntc_ytransform(nir_shader *shader,
5354 const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]);
5355
5356 bool nir_lower_wrmasks(nir_shader *shader, nir_instr_filter_cb cb, const void *data);
5357
5358 bool nir_lower_fb_read(nir_shader *shader);
5359
5360 typedef struct nir_lower_drawpixels_options {
5361 gl_state_index16 texcoord_state_tokens[STATE_LENGTH];
5362 gl_state_index16 scale_state_tokens[STATE_LENGTH];
5363 gl_state_index16 bias_state_tokens[STATE_LENGTH];
5364 unsigned drawpix_sampler;
5365 unsigned pixelmap_sampler;
5366 bool pixel_maps :1;
5367 bool scale_and_bias :1;
5368 } nir_lower_drawpixels_options;
5369
5370 void nir_lower_drawpixels(nir_shader *shader,
5371 const nir_lower_drawpixels_options *options);
5372
5373 typedef struct nir_lower_bitmap_options {
5374 unsigned sampler;
5375 bool swizzle_xxxx;
5376 } nir_lower_bitmap_options;
5377
5378 void nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options);
5379
5380 bool nir_lower_atomics_to_ssbo(nir_shader *shader);
5381
5382 typedef enum {
5383 nir_lower_int_source_mods = 1 << 0,
5384 nir_lower_float_source_mods = 1 << 1,
5385 nir_lower_64bit_source_mods = 1 << 2,
5386 nir_lower_triop_abs = 1 << 3,
5387 nir_lower_all_source_mods = (1 << 4) - 1
5388 } nir_lower_to_source_mods_flags;
5389
5390
5391 bool nir_lower_to_source_mods(nir_shader *shader, nir_lower_to_source_mods_flags options);
5392
5393 typedef enum {
5394 nir_lower_gs_intrinsics_per_stream = 1 << 0,
5395 nir_lower_gs_intrinsics_count_primitives = 1 << 1,
5396 nir_lower_gs_intrinsics_count_vertices_per_primitive = 1 << 2,
5397 nir_lower_gs_intrinsics_overwrite_incomplete = 1 << 3,
5398 } nir_lower_gs_intrinsics_flags;
5399
5400 bool nir_lower_gs_intrinsics(nir_shader *shader, nir_lower_gs_intrinsics_flags options);
5401
5402 typedef unsigned (*nir_lower_bit_size_callback)(const nir_instr *, void *);
5403
5404 bool nir_lower_bit_size(nir_shader *shader,
5405 nir_lower_bit_size_callback callback,
5406 void *callback_data);
5407 bool nir_lower_64bit_phis(nir_shader *shader);
5408
5409 nir_lower_int64_options nir_lower_int64_op_to_options_mask(nir_op opcode);
5410 bool nir_lower_int64(nir_shader *shader);
5411
5412 nir_lower_doubles_options nir_lower_doubles_op_to_options_mask(nir_op opcode);
5413 bool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64,
5414 nir_lower_doubles_options options);
5415 bool nir_lower_pack(nir_shader *shader);
5416
5417 bool nir_recompute_io_bases(nir_function_impl *impl, nir_variable_mode modes);
5418 bool nir_lower_mediump_io(nir_shader *nir, nir_variable_mode modes,
5419 uint64_t varying_mask, bool use_16bit_slots);
5420 bool nir_force_mediump_io(nir_shader *nir, nir_variable_mode modes,
5421 nir_alu_type types);
5422 bool nir_unpack_16bit_varying_slots(nir_shader *nir, nir_variable_mode modes);
5423 bool nir_fold_16bit_sampler_conversions(nir_shader *nir,
5424 unsigned tex_src_types);
5425
5426 typedef struct {
5427 bool legalize_type; /* whether this src should be legalized */
5428 uint8_t bit_size; /* bit_size to enforce */
5429 nir_tex_src_type match_src; /* if bit_size is 0, match bit size of this */
5430 } nir_tex_src_type_constraint, nir_tex_src_type_constraints[nir_num_tex_src_types];
5431
5432 bool nir_legalize_16bit_sampler_srcs(nir_shader *nir,
5433 nir_tex_src_type_constraints constraints);
5434
5435 bool nir_lower_point_size(nir_shader *shader, float min, float max);
5436
5437 void nir_lower_texcoord_replace(nir_shader *s, unsigned coord_replace,
5438 bool point_coord_is_sysval, bool yinvert);
5439
5440 typedef enum {
5441 nir_lower_interpolation_at_sample = (1 << 1),
5442 nir_lower_interpolation_at_offset = (1 << 2),
5443 nir_lower_interpolation_centroid = (1 << 3),
5444 nir_lower_interpolation_pixel = (1 << 4),
5445 nir_lower_interpolation_sample = (1 << 5),
5446 } nir_lower_interpolation_options;
5447
5448 bool nir_lower_interpolation(nir_shader *shader,
5449 nir_lower_interpolation_options options);
5450
5451 bool nir_lower_discard_or_demote(nir_shader *shader,
5452 bool force_correct_quad_ops_after_discard);
5453
5454 bool nir_lower_memory_model(nir_shader *shader);
5455
5456 bool nir_lower_goto_ifs(nir_shader *shader);
5457
5458 bool nir_shader_uses_view_index(nir_shader *shader);
5459 bool nir_can_lower_multiview(nir_shader *shader);
5460 bool nir_lower_multiview(nir_shader *shader, uint32_t view_mask);
5461
5462
5463 bool nir_lower_fp16_casts(nir_shader *shader);
5464 bool nir_normalize_cubemap_coords(nir_shader *shader);
5465
5466 bool nir_shader_supports_implicit_lod(nir_shader *shader);
5467
5468 void nir_live_ssa_defs_impl(nir_function_impl *impl);
5469
5470 const BITSET_WORD *nir_get_live_ssa_defs(nir_cursor cursor, void *mem_ctx);
5471
5472 void nir_loop_analyze_impl(nir_function_impl *impl,
5473 nir_variable_mode indirect_mask);
5474
5475 bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
5476
5477 bool nir_repair_ssa_impl(nir_function_impl *impl);
5478 bool nir_repair_ssa(nir_shader *shader);
5479
5480 void nir_convert_loop_to_lcssa(nir_loop *loop);
5481 bool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants);
5482 void nir_divergence_analysis(nir_shader *shader);
5483 bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr);
5484
5485 /* If phi_webs_only is true, only convert SSA values involved in phi nodes to
5486 * registers. If false, convert all values (even those not involved in a phi
5487 * node) to registers.
5488 */
5489 bool nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only);
5490
5491 bool nir_lower_phis_to_regs_block(nir_block *block);
5492 bool nir_lower_ssa_defs_to_regs_block(nir_block *block);
5493 bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl);
5494
5495 bool nir_lower_samplers(nir_shader *shader);
5496 bool nir_lower_ssbo(nir_shader *shader);
5497
5498 typedef struct nir_lower_printf_options {
5499 bool treat_doubles_as_floats : 1;
5500 unsigned max_buffer_size;
5501 } nir_lower_printf_options;
5502
5503 bool nir_lower_printf(nir_shader *nir, const nir_lower_printf_options *options);
5504
5505 /* This is here for unit tests. */
5506 bool nir_opt_comparison_pre_impl(nir_function_impl *impl);
5507
5508 bool nir_opt_comparison_pre(nir_shader *shader);
5509
5510 typedef struct nir_opt_access_options {
5511 bool is_vulkan;
5512 bool infer_non_readable;
5513 } nir_opt_access_options;
5514
5515 bool nir_opt_access(nir_shader *shader, const nir_opt_access_options *options);
5516 bool nir_opt_algebraic(nir_shader *shader);
5517 bool nir_opt_algebraic_before_ffma(nir_shader *shader);
5518 bool nir_opt_algebraic_late(nir_shader *shader);
5519 bool nir_opt_algebraic_distribute_src_mods(nir_shader *shader);
5520 bool nir_opt_constant_folding(nir_shader *shader);
5521
5522 /* Try to combine a and b into a. Return true if combination was possible,
5523 * which will result in b being removed by the pass. Return false if
5524 * combination wasn't possible.
5525 */
5526 typedef bool (*nir_combine_memory_barrier_cb)(
5527 nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *data);
5528
5529 bool nir_opt_combine_memory_barriers(nir_shader *shader,
5530 nir_combine_memory_barrier_cb combine_cb,
5531 void *data);
5532
5533 bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes);
5534
5535 bool nir_copy_prop_impl(nir_function_impl *impl);
5536 bool nir_copy_prop(nir_shader *shader);
5537
5538 bool nir_opt_copy_prop_vars(nir_shader *shader);
5539
5540 bool nir_opt_cse(nir_shader *shader);
5541
5542 bool nir_opt_dce(nir_shader *shader);
5543
5544 bool nir_opt_dead_cf(nir_shader *shader);
5545
5546 bool nir_opt_dead_write_vars(nir_shader *shader);
5547
5548 bool nir_opt_deref_impl(nir_function_impl *impl);
5549 bool nir_opt_deref(nir_shader *shader);
5550
5551 bool nir_opt_find_array_copies(nir_shader *shader);
5552
5553 bool nir_opt_fragdepth(nir_shader *shader);
5554
5555 bool nir_opt_gcm(nir_shader *shader, bool value_number);
5556
5557 bool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size);
5558
5559 bool nir_opt_if(nir_shader *shader, bool aggressive_last_continue);
5560
5561 bool nir_opt_intrinsics(nir_shader *shader);
5562
5563 bool nir_opt_large_constants(nir_shader *shader,
5564 glsl_type_size_align_func size_align,
5565 unsigned threshold);
5566
5567 bool nir_opt_loop_unroll(nir_shader *shader);
5568
5569 typedef enum {
5570 nir_move_const_undef = (1 << 0),
5571 nir_move_load_ubo = (1 << 1),
5572 nir_move_load_input = (1 << 2),
5573 nir_move_comparisons = (1 << 3),
5574 nir_move_copies = (1 << 4),
5575 nir_move_load_ssbo = (1 << 5),
5576 } nir_move_options;
5577
5578 bool nir_can_move_instr(nir_instr *instr, nir_move_options options);
5579
5580 bool nir_opt_sink(nir_shader *shader, nir_move_options options);
5581
5582 bool nir_opt_move(nir_shader *shader, nir_move_options options);
5583
5584 bool nir_opt_offsets(nir_shader *shader);
5585
5586 bool nir_opt_peephole_select(nir_shader *shader, unsigned limit,
5587 bool indirect_load_ok, bool expensive_alu_ok);
5588
5589 bool nir_opt_rematerialize_compares(nir_shader *shader);
5590
5591 bool nir_opt_remove_phis(nir_shader *shader);
5592 bool nir_opt_remove_phis_block(nir_block *block);
5593
5594 bool nir_opt_phi_precision(nir_shader *shader);
5595
5596 bool nir_opt_shrink_vectors(nir_shader *shader, bool shrink_image_store);
5597
5598 bool nir_opt_trivial_continues(nir_shader *shader);
5599
5600 bool nir_opt_undef(nir_shader *shader);
5601
5602 bool nir_lower_undef_to_zero(nir_shader *shader);
5603
5604 bool nir_opt_uniform_atomics(nir_shader *shader);
5605
5606 typedef bool (*nir_opt_vectorize_cb)(const nir_instr *instr, void *data);
5607
5608 bool nir_opt_vectorize(nir_shader *shader, nir_opt_vectorize_cb filter,
5609 void *data);
5610
5611 bool nir_opt_conditional_discard(nir_shader *shader);
5612 bool nir_opt_move_discards_to_top(nir_shader *shader);
5613
5614 typedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul,
5615 unsigned align_offset,
5616 unsigned bit_size,
5617 unsigned num_components,
5618 nir_intrinsic_instr *low, nir_intrinsic_instr *high,
5619 void *data);
5620
5621 typedef struct {
5622 nir_should_vectorize_mem_func callback;
5623 nir_variable_mode modes;
5624 nir_variable_mode robust_modes;
5625 void *cb_data;
5626 } nir_load_store_vectorize_options;
5627
5628 bool nir_opt_load_store_vectorize(nir_shader *shader, const nir_load_store_vectorize_options *options);
5629
5630 void nir_sweep(nir_shader *shader);
5631
5632 void nir_remap_dual_slot_attributes(nir_shader *shader,
5633 uint64_t *dual_slot_inputs);
5634 uint64_t nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot);
5635
5636 nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
5637 gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);
5638
5639 static inline bool
nir_variable_is_in_ubo(const nir_variable * var)5640 nir_variable_is_in_ubo(const nir_variable *var)
5641 {
5642 return (var->data.mode == nir_var_mem_ubo &&
5643 var->interface_type != NULL);
5644 }
5645
5646 static inline bool
nir_variable_is_in_ssbo(const nir_variable * var)5647 nir_variable_is_in_ssbo(const nir_variable *var)
5648 {
5649 return (var->data.mode == nir_var_mem_ssbo &&
5650 var->interface_type != NULL);
5651 }
5652
5653 static inline bool
nir_variable_is_in_block(const nir_variable * var)5654 nir_variable_is_in_block(const nir_variable *var)
5655 {
5656 return nir_variable_is_in_ubo(var) || nir_variable_is_in_ssbo(var);
5657 }
5658
5659 typedef struct nir_unsigned_upper_bound_config {
5660 unsigned min_subgroup_size;
5661 unsigned max_subgroup_size;
5662 unsigned max_workgroup_invocations;
5663 unsigned max_workgroup_count[3];
5664 unsigned max_workgroup_size[3];
5665
5666 uint32_t vertex_attrib_max[32];
5667 } nir_unsigned_upper_bound_config;
5668
5669 uint32_t
5670 nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
5671 nir_ssa_scalar scalar,
5672 const nir_unsigned_upper_bound_config *config);
5673
5674 bool
5675 nir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht,
5676 nir_ssa_scalar ssa, unsigned const_val,
5677 const nir_unsigned_upper_bound_config *config);
5678
5679 #include "nir_inline_helpers.h"
5680
5681 #ifdef __cplusplus
5682 } /* extern "C" */
5683 #endif
5684
5685 #endif /* NIR_H */
5686