1 /*
2 * Copyright © 2014 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 *
26 */
27
28 #ifndef NIR_H
29 #define NIR_H
30
31 #include "compiler/glsl_types.h"
32 #include "compiler/glsl/list.h"
33 #include "compiler/shader_enums.h"
34 #include "compiler/shader_info.h"
35 #include "util/bitscan.h"
36 #include "util/bitset.h"
37 #include "util/compiler.h"
38 #include "util/enum_operators.h"
39 #include "util/format/u_format.h"
40 #include "util/hash_table.h"
41 #include "util/list.h"
42 #include "util/log.h"
43 #include "util/macros.h"
44 #include "util/ralloc.h"
45 #include "util/set.h"
46 #include "util/u_printf.h"
47 #define XXH_INLINE_ALL
48 #include <stdio.h>
49 #include "util/xxhash.h"
50
51 #ifndef NDEBUG
52 #include "util/u_debug.h"
53 #endif /* NDEBUG */
54
55 #include "nir_opcodes.h"
56
57 #ifdef __cplusplus
58 extern "C" {
59 #endif
60
61 extern uint32_t nir_debug;
62 extern bool nir_debug_print_shader[MESA_SHADER_KERNEL + 1];
63
64 #ifndef NDEBUG
65 #define NIR_DEBUG(flag) unlikely(nir_debug &(NIR_DEBUG_##flag))
66 #else
67 #define NIR_DEBUG(flag) false
68 #endif
69
70 #define NIR_DEBUG_CLONE (1u << 0)
71 #define NIR_DEBUG_SERIALIZE (1u << 1)
72 #define NIR_DEBUG_NOVALIDATE (1u << 2)
73 #define NIR_DEBUG_VALIDATE_SSA_DOMINANCE (1u << 3)
74 #define NIR_DEBUG_TGSI (1u << 4)
75 #define NIR_DEBUG_PRINT_VS (1u << 5)
76 #define NIR_DEBUG_PRINT_TCS (1u << 6)
77 #define NIR_DEBUG_PRINT_TES (1u << 7)
78 #define NIR_DEBUG_PRINT_GS (1u << 8)
79 #define NIR_DEBUG_PRINT_FS (1u << 9)
80 #define NIR_DEBUG_PRINT_CS (1u << 10)
81 #define NIR_DEBUG_PRINT_TS (1u << 11)
82 #define NIR_DEBUG_PRINT_MS (1u << 12)
83 #define NIR_DEBUG_PRINT_RGS (1u << 13)
84 #define NIR_DEBUG_PRINT_AHS (1u << 14)
85 #define NIR_DEBUG_PRINT_CHS (1u << 15)
86 #define NIR_DEBUG_PRINT_MHS (1u << 16)
87 #define NIR_DEBUG_PRINT_IS (1u << 17)
88 #define NIR_DEBUG_PRINT_CBS (1u << 18)
89 #define NIR_DEBUG_PRINT_KS (1u << 19)
90 #define NIR_DEBUG_PRINT_NO_INLINE_CONSTS (1u << 20)
91 #define NIR_DEBUG_PRINT_INTERNAL (1u << 21)
92 #define NIR_DEBUG_PRINT_PASS_FLAGS (1u << 22)
93
94 #define NIR_DEBUG_PRINT (NIR_DEBUG_PRINT_VS | \
95 NIR_DEBUG_PRINT_TCS | \
96 NIR_DEBUG_PRINT_TES | \
97 NIR_DEBUG_PRINT_GS | \
98 NIR_DEBUG_PRINT_FS | \
99 NIR_DEBUG_PRINT_CS | \
100 NIR_DEBUG_PRINT_TS | \
101 NIR_DEBUG_PRINT_MS | \
102 NIR_DEBUG_PRINT_RGS | \
103 NIR_DEBUG_PRINT_AHS | \
104 NIR_DEBUG_PRINT_CHS | \
105 NIR_DEBUG_PRINT_MHS | \
106 NIR_DEBUG_PRINT_IS | \
107 NIR_DEBUG_PRINT_CBS | \
108 NIR_DEBUG_PRINT_KS)
109
110 #define NIR_FALSE 0u
111 #define NIR_TRUE (~0u)
112 #define NIR_MAX_VEC_COMPONENTS 16
113 #define NIR_MAX_MATRIX_COLUMNS 4
114 #define NIR_STREAM_PACKED (1 << 8)
115 typedef uint16_t nir_component_mask_t;
116
117 static inline bool
nir_num_components_valid(unsigned num_components)118 nir_num_components_valid(unsigned num_components)
119 {
120 return (num_components >= 1 &&
121 num_components <= 5) ||
122 num_components == 8 ||
123 num_components == 16;
124 }
125
126 static inline nir_component_mask_t
nir_component_mask(unsigned num_components)127 nir_component_mask(unsigned num_components)
128 {
129 assert(nir_num_components_valid(num_components));
130 return (1u << num_components) - 1;
131 }
132
133 void
134 nir_process_debug_variable(void);
135
136 bool nir_component_mask_can_reinterpret(nir_component_mask_t mask,
137 unsigned old_bit_size,
138 unsigned new_bit_size);
139 nir_component_mask_t
140 nir_component_mask_reinterpret(nir_component_mask_t mask,
141 unsigned old_bit_size,
142 unsigned new_bit_size);
143
144 /** Defines a cast function
145 *
146 * This macro defines a cast function from in_type to out_type where
147 * out_type is some structure type that contains a field of type out_type.
148 *
149 * Note that you have to be a bit careful as the generated cast function
150 * destroys constness.
151 */
152 #define NIR_DEFINE_CAST(name, in_type, out_type, field, \
153 type_field, type_value) \
154 static inline out_type * \
155 name(const in_type *parent) \
156 { \
157 assert(parent && parent->type_field == type_value); \
158 return exec_node_data(out_type, parent, field); \
159 }
160
161 struct nir_function;
162 struct nir_shader;
163 struct nir_instr;
164 struct nir_builder;
165 struct nir_xfb_info;
166
167 /**
168 * Description of built-in state associated with a uniform
169 *
170 * :c:member:`nir_variable.state_slots`
171 */
172 typedef struct {
173 gl_state_index16 tokens[STATE_LENGTH];
174 } nir_state_slot;
175
176 /* clang-format off */
177 typedef enum {
178 nir_var_system_value = (1 << 0),
179 nir_var_uniform = (1 << 1),
180 nir_var_shader_in = (1 << 2),
181 nir_var_shader_out = (1 << 3),
182 nir_var_image = (1 << 4),
183 /** Incoming call or ray payload data for ray-tracing shaders */
184 nir_var_shader_call_data = (1 << 5),
185 /** Ray hit attributes */
186 nir_var_ray_hit_attrib = (1 << 6),
187
188 /* Modes named nir_var_mem_* have explicit data layout */
189 nir_var_mem_ubo = (1 << 7),
190 nir_var_mem_push_const = (1 << 8),
191 nir_var_mem_ssbo = (1 << 9),
192 nir_var_mem_constant = (1 << 10),
193 nir_var_mem_task_payload = (1 << 11),
194 nir_var_mem_node_payload = (1 << 12),
195 nir_var_mem_node_payload_in = (1 << 13),
196
197 /* Generic modes intentionally come last. See encode_dref_modes() in
198 * nir_serialize.c for more details.
199 */
200 nir_var_shader_temp = (1 << 14),
201 nir_var_function_temp = (1 << 15),
202 nir_var_mem_shared = (1 << 16),
203 nir_var_mem_global = (1 << 17),
204
205 nir_var_mem_generic = (nir_var_shader_temp |
206 nir_var_function_temp |
207 nir_var_mem_shared |
208 nir_var_mem_global),
209
210 nir_var_read_only_modes = nir_var_shader_in | nir_var_uniform |
211 nir_var_system_value | nir_var_mem_constant |
212 nir_var_mem_ubo,
213 /* Modes where vector derefs can be indexed as arrays. nir_var_shader_out
214 * is only for mesh stages. nir_var_system_value is only for kernel stages.
215 */
216 nir_var_vec_indexable_modes = nir_var_shader_temp | nir_var_function_temp |
217 nir_var_mem_ubo | nir_var_mem_ssbo |
218 nir_var_mem_shared | nir_var_mem_global |
219 nir_var_mem_push_const | nir_var_mem_task_payload |
220 nir_var_shader_out | nir_var_system_value,
221 nir_num_variable_modes = 18,
222 nir_var_all = (1 << nir_num_variable_modes) - 1,
223 } nir_variable_mode;
224 MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_variable_mode)
225 /* clang-format on */
226
227 /**
228 * Rounding modes.
229 */
230 typedef enum {
231 nir_rounding_mode_undef = 0,
232 nir_rounding_mode_rtne = 1, /* round to nearest even */
233 nir_rounding_mode_ru = 2, /* round up */
234 nir_rounding_mode_rd = 3, /* round down */
235 nir_rounding_mode_rtz = 4, /* round towards zero */
236 } nir_rounding_mode;
237
238 /**
239 * Ray query values that can read from a RayQueryKHR object.
240 */
241 typedef enum {
242 nir_ray_query_value_intersection_type,
243 nir_ray_query_value_intersection_t,
244 nir_ray_query_value_intersection_instance_custom_index,
245 nir_ray_query_value_intersection_instance_id,
246 nir_ray_query_value_intersection_instance_sbt_index,
247 nir_ray_query_value_intersection_geometry_index,
248 nir_ray_query_value_intersection_primitive_index,
249 nir_ray_query_value_intersection_barycentrics,
250 nir_ray_query_value_intersection_front_face,
251 nir_ray_query_value_intersection_object_ray_direction,
252 nir_ray_query_value_intersection_object_ray_origin,
253 nir_ray_query_value_intersection_object_to_world,
254 nir_ray_query_value_intersection_world_to_object,
255 nir_ray_query_value_intersection_candidate_aabb_opaque,
256 nir_ray_query_value_tmin,
257 nir_ray_query_value_flags,
258 nir_ray_query_value_world_ray_direction,
259 nir_ray_query_value_world_ray_origin,
260 nir_ray_query_value_intersection_triangle_vertex_positions
261 } nir_ray_query_value;
262
263 /**
264 * Intel resource flags
265 */
266 typedef enum {
267 nir_resource_intel_bindless = 1u << 0,
268 nir_resource_intel_pushable = 1u << 1,
269 nir_resource_intel_sampler = 1u << 2,
270 nir_resource_intel_non_uniform = 1u << 3,
271 } nir_resource_data_intel;
272
273 /**
274 * Which components to interpret as signed in cmat_muladd.
275 * See 'Cooperative Matrix Operands' in SPV_KHR_cooperative_matrix.
276 */
277 typedef enum {
278 NIR_CMAT_A_SIGNED = 1u << 0,
279 NIR_CMAT_B_SIGNED = 1u << 1,
280 NIR_CMAT_C_SIGNED = 1u << 2,
281 NIR_CMAT_RESULT_SIGNED = 1u << 3,
282 } nir_cmat_signed;
283
284 typedef union {
285 bool b;
286 float f32;
287 double f64;
288 int8_t i8;
289 uint8_t u8;
290 int16_t i16;
291 uint16_t u16;
292 int32_t i32;
293 uint32_t u32;
294 int64_t i64;
295 uint64_t u64;
296 } nir_const_value;
297
298 #define nir_const_value_to_array(arr, c, components, m) \
299 do { \
300 for (unsigned i = 0; i < components; ++i) \
301 arr[i] = c[i].m; \
302 } while (false)
303
304 static inline nir_const_value
nir_const_value_for_raw_uint(uint64_t x,unsigned bit_size)305 nir_const_value_for_raw_uint(uint64_t x, unsigned bit_size)
306 {
307 nir_const_value v;
308 memset(&v, 0, sizeof(v));
309
310 /* clang-format off */
311 switch (bit_size) {
312 case 1: v.b = x; break;
313 case 8: v.u8 = x; break;
314 case 16: v.u16 = x; break;
315 case 32: v.u32 = x; break;
316 case 64: v.u64 = x; break;
317 default:
318 unreachable("Invalid bit size");
319 }
320 /* clang-format on */
321
322 return v;
323 }
324
325 static inline nir_const_value
nir_const_value_for_int(int64_t i,unsigned bit_size)326 nir_const_value_for_int(int64_t i, unsigned bit_size)
327 {
328 assert(bit_size <= 64);
329 if (bit_size < 64) {
330 assert(i >= (-(1ll << (bit_size - 1))));
331 assert(i < (1ll << (bit_size - 1)));
332 }
333
334 return nir_const_value_for_raw_uint(i, bit_size);
335 }
336
337 static inline nir_const_value
nir_const_value_for_uint(uint64_t u,unsigned bit_size)338 nir_const_value_for_uint(uint64_t u, unsigned bit_size)
339 {
340 assert(bit_size <= 64);
341 if (bit_size < 64)
342 assert(u < (1ull << bit_size));
343
344 return nir_const_value_for_raw_uint(u, bit_size);
345 }
346
347 static inline nir_const_value
nir_const_value_for_bool(bool b,unsigned bit_size)348 nir_const_value_for_bool(bool b, unsigned bit_size)
349 {
350 /* Booleans use a 0/-1 convention */
351 return nir_const_value_for_int(-(int)b, bit_size);
352 }
353
354 /* This one isn't inline because it requires half-float conversion */
355 nir_const_value nir_const_value_for_float(double b, unsigned bit_size);
356
357 static inline int64_t
nir_const_value_as_int(nir_const_value value,unsigned bit_size)358 nir_const_value_as_int(nir_const_value value, unsigned bit_size)
359 {
360 /* clang-format off */
361 switch (bit_size) {
362 /* int1_t uses 0/-1 convention */
363 case 1: return -(int)value.b;
364 case 8: return value.i8;
365 case 16: return value.i16;
366 case 32: return value.i32;
367 case 64: return value.i64;
368 default:
369 unreachable("Invalid bit size");
370 }
371 /* clang-format on */
372 }
373
374 static inline uint64_t
nir_const_value_as_uint(nir_const_value value,unsigned bit_size)375 nir_const_value_as_uint(nir_const_value value, unsigned bit_size)
376 {
377 /* clang-format off */
378 switch (bit_size) {
379 case 1: return value.b;
380 case 8: return value.u8;
381 case 16: return value.u16;
382 case 32: return value.u32;
383 case 64: return value.u64;
384 default:
385 unreachable("Invalid bit size");
386 }
387 /* clang-format on */
388 }
389
390 static inline bool
nir_const_value_as_bool(nir_const_value value,unsigned bit_size)391 nir_const_value_as_bool(nir_const_value value, unsigned bit_size)
392 {
393 int64_t i = nir_const_value_as_int(value, bit_size);
394
395 /* Booleans of any size use 0/-1 convention */
396 assert(i == 0 || i == -1);
397
398 return i;
399 }
400
401 /* This one isn't inline because it requires half-float conversion */
402 double nir_const_value_as_float(nir_const_value value, unsigned bit_size);
403
404 typedef struct nir_constant {
405 /**
406 * Value of the constant.
407 *
408 * The field used to back the values supplied by the constant is determined
409 * by the type associated with the ``nir_variable``. Constants may be
410 * scalars, vectors, or matrices.
411 */
412 nir_const_value values[NIR_MAX_VEC_COMPONENTS];
413
414 /* Indicates all the values are 0s which can enable some optimizations */
415 bool is_null_constant;
416
417 /* we could get this from the var->type but makes clone *much* easier to
418 * not have to care about the type.
419 */
420 unsigned num_elements;
421
422 /* Array elements / Structure Fields */
423 struct nir_constant **elements;
424 } nir_constant;
425
426 /**
427 * Layout qualifiers for gl_FragDepth.
428 *
429 * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared
430 * with a layout qualifier.
431 */
432 typedef enum {
433 /** No depth layout is specified. */
434 nir_depth_layout_none,
435 nir_depth_layout_any,
436 nir_depth_layout_greater,
437 nir_depth_layout_less,
438 nir_depth_layout_unchanged
439 } nir_depth_layout;
440
441 /**
442 * Enum keeping track of how a variable was declared.
443 */
444 typedef enum {
445 /**
446 * Normal declaration.
447 */
448 nir_var_declared_normally = 0,
449
450 /**
451 * Variable is an implicitly declared built-in that has not been explicitly
452 * re-declared by the shader.
453 */
454 nir_var_declared_implicitly,
455
456 /**
457 * Variable is implicitly generated by the compiler and should not be
458 * visible via the API.
459 */
460 nir_var_hidden,
461 } nir_var_declaration_type;
462
463 /**
464 * Either a uniform, global variable, shader input, or shader output. Based on
465 * ir_variable - it should be easy to translate between the two.
466 */
467
468 typedef struct nir_variable {
469 struct exec_node node;
470
471 /**
472 * Declared type of the variable
473 */
474 const struct glsl_type *type;
475
476 /**
477 * Declared name of the variable
478 */
479 char *name;
480
481 struct nir_variable_data {
482 /**
483 * Storage class of the variable.
484 *
485 * :c:struct:`nir_variable_mode`
486 */
487 unsigned mode : 18;
488
489 /**
490 * Is the variable read-only?
491 *
492 * This is set for variables declared as ``const``, shader inputs,
493 * and uniforms.
494 */
495 unsigned read_only : 1;
496 unsigned centroid : 1;
497 unsigned sample : 1;
498 unsigned patch : 1;
499 unsigned invariant : 1;
500
501 /**
502 * Was an 'invariant' qualifier explicitly set in the shader?
503 *
504 * This is used to cross validate glsl qualifiers.
505 */
506 unsigned explicit_invariant:1;
507
508 /**
509 * Is the variable a ray query?
510 */
511 unsigned ray_query : 1;
512
513 /**
514 * Precision qualifier.
515 *
516 * In desktop GLSL we do not care about precision qualifiers at all, in
517 * fact, the spec says that precision qualifiers are ignored.
518 *
519 * To make things easy, we make it so that this field is always
520 * GLSL_PRECISION_NONE on desktop shaders. This way all the variables
521 * have the same precision value and the checks we add in the compiler
522 * for this field will never break a desktop shader compile.
523 */
524 unsigned precision : 2;
525
526 /**
527 * Has this variable been statically assigned?
528 *
529 * This answers whether the variable was assigned in any path of
530 * the shader during ast_to_hir. This doesn't answer whether it is
531 * still written after dead code removal, nor is it maintained in
532 * non-ast_to_hir.cpp (GLSL parsing) paths.
533 */
534 unsigned assigned : 1;
535
536 /**
537 * Can this variable be coalesced with another?
538 *
539 * This is set by nir_lower_io_to_temporaries to say that any
540 * copies involving this variable should stay put. Propagating it can
541 * duplicate the resulting load/store, which is not wanted, and may
542 * result in a load/store of the variable with an indirect offset which
543 * the backend may not be able to handle.
544 */
545 unsigned cannot_coalesce : 1;
546
547 /**
548 * When separate shader programs are enabled, only input/outputs between
549 * the stages of a multi-stage separate program can be safely removed
550 * from the shader interface. Other input/outputs must remains active.
551 *
552 * This is also used to make sure xfb varyings that are unused by the
553 * fragment shader are not removed.
554 */
555 unsigned always_active_io : 1;
556
557 /**
558 * Interpolation mode for shader inputs / outputs
559 *
560 * :c:enum:`glsl_interp_mode`
561 */
562 unsigned interpolation : 3;
563
564 /**
565 * If non-zero, then this variable may be packed along with other variables
566 * into a single varying slot, so this offset should be applied when
567 * accessing components. For example, an offset of 1 means that the x
568 * component of this variable is actually stored in component y of the
569 * location specified by ``location``.
570 */
571 unsigned location_frac : 2;
572
573 /**
574 * If true, this variable represents an array of scalars that should
575 * be tightly packed. In other words, consecutive array elements
576 * should be stored one component apart, rather than one slot apart.
577 */
578 unsigned compact : 1;
579
580 /**
581 * Whether this is a fragment shader output implicitly initialized with
582 * the previous contents of the specified render target at the
583 * framebuffer location corresponding to this shader invocation.
584 */
585 unsigned fb_fetch_output : 1;
586
587 /**
588 * Non-zero if this variable is considered bindless as defined by
589 * ARB_bindless_texture.
590 */
591 unsigned bindless : 1;
592
593 /**
594 * Was an explicit binding set in the shader?
595 */
596 unsigned explicit_binding : 1;
597
598 /**
599 * Was the location explicitly set in the shader?
600 *
601 * If the location is explicitly set in the shader, it **cannot** be changed
602 * by the linker or by the API (e.g., calls to ``glBindAttribLocation`` have
603 * no effect).
604 */
605 unsigned explicit_location : 1;
606
607 /**
608 * Is this varying used by transform feedback?
609 *
610 * This is used by the linker to decide if it's safe to pack the varying.
611 */
612 unsigned is_xfb : 1;
613
614 /**
615 * Is this varying used only by transform feedback?
616 *
617 * This is used by the linker to decide if its safe to pack the varying.
618 */
619 unsigned is_xfb_only : 1;
620
621 /**
622 * Was a transfer feedback buffer set in the shader?
623 */
624 unsigned explicit_xfb_buffer : 1;
625
626 /**
627 * Was a transfer feedback stride set in the shader?
628 */
629 unsigned explicit_xfb_stride : 1;
630
631 /**
632 * Was an explicit offset set in the shader?
633 */
634 unsigned explicit_offset : 1;
635
636 /**
637 * Layout of the matrix. Uses glsl_matrix_layout values.
638 */
639 unsigned matrix_layout : 2;
640
641 /**
642 * Non-zero if this variable was created by lowering a named interface
643 * block.
644 */
645 unsigned from_named_ifc_block : 1;
646
647 /**
648 * Non-zero if the variable must be a shader input. This is useful for
649 * constraints on function parameters.
650 */
651 unsigned must_be_shader_input : 1;
652
653 /**
654 * Has this variable been used for reading or writing?
655 *
656 * Several GLSL semantic checks require knowledge of whether or not a
657 * variable has been used. For example, it is an error to redeclare a
658 * variable as invariant after it has been used.
659 */
660 unsigned used:1;
661
662 /**
663 * How the variable was declared. See nir_var_declaration_type.
664 *
665 * This is used to detect variables generated by the compiler, so should
666 * not be visible via the API.
667 */
668 unsigned how_declared : 2;
669
670 /**
671 * Is this variable per-view? If so, we know it must be an array with
672 * size corresponding to the number of views.
673 */
674 unsigned per_view : 1;
675
676 /**
677 * Whether the variable is per-primitive.
678 * Can be use by Mesh Shader outputs and corresponding Fragment Shader inputs.
679 */
680 unsigned per_primitive : 1;
681
682 /**
683 * Whether the variable is declared to indicate that a fragment shader
684 * input will not have interpolated values.
685 */
686 unsigned per_vertex : 1;
687
688 /**
689 * Layout qualifier for gl_FragDepth. See nir_depth_layout.
690 *
691 * This is not equal to ``ir_depth_layout_none`` if and only if this
692 * variable is ``gl_FragDepth`` and a layout qualifier is specified.
693 */
694 unsigned depth_layout : 3;
695
696 /**
697 * Vertex stream output identifier.
698 *
699 * For packed outputs, NIR_STREAM_PACKED is set and bits [2*i+1,2*i]
700 * indicate the stream of the i-th component.
701 */
702 unsigned stream : 9;
703
704 /**
705 * See gl_access_qualifier.
706 *
707 * Access flags for memory variables (SSBO/global), image uniforms, and
708 * bindless images in uniforms/inputs/outputs.
709 */
710 unsigned access : 9;
711
712 /**
713 * Descriptor set binding for sampler or UBO.
714 */
715 unsigned descriptor_set : 5;
716
717 /**
718 * output index for dual source blending.
719 */
720 unsigned index;
721
722 /**
723 * Initial binding point for a sampler or UBO.
724 *
725 * For array types, this represents the binding point for the first element.
726 */
727 unsigned binding;
728
729 /**
730 * Storage location of the base of this variable
731 *
732 * The precise meaning of this field depends on the nature of the variable.
733 *
734 * - Vertex shader input: one of the values from ``gl_vert_attrib``.
735 * - Vertex shader output: one of the values from ``gl_varying_slot``.
736 * - Geometry shader input: one of the values from ``gl_varying_slot``.
737 * - Geometry shader output: one of the values from ``gl_varying_slot``.
738 * - Fragment shader input: one of the values from ``gl_varying_slot``.
739 * - Fragment shader output: one of the values from ``gl_frag_result``.
740 * - Task shader output: one of the values from ``gl_varying_slot``.
741 * - Mesh shader input: one of the values from ``gl_varying_slot``.
742 * - Mesh shader output: one of the values from ``gl_varying_slot``.
743 * - Uniforms: Per-stage uniform slot number for default uniform block.
744 * - Uniforms: Index within the uniform block definition for UBO members.
745 * - Non-UBO Uniforms: uniform slot number.
746 * - Other: This field is not currently used.
747 *
748 * If the variable is a uniform, shader input, or shader output, and the
749 * slot has not been assigned, the value will be -1.
750 */
751 int location;
752
753 /** Required alignment of this variable */
754 unsigned alignment;
755
756 /**
757 * The actual location of the variable in the IR. Only valid for inputs,
758 * outputs, uniforms (including samplers and images), and for UBO and SSBO
759 * variables in GLSL.
760 */
761 unsigned driver_location;
762
763 /**
764 * Location an atomic counter or transform feedback is stored at.
765 */
766 unsigned offset;
767
768 union {
769 struct {
770 /** Image internal format if specified explicitly, otherwise PIPE_FORMAT_NONE. */
771 enum pipe_format format;
772 } image;
773
774 struct {
775 /**
776 * For OpenCL inline samplers. See cl_sampler_addressing_mode and cl_sampler_filter_mode
777 */
778 unsigned is_inline_sampler : 1;
779 unsigned addressing_mode : 3;
780 unsigned normalized_coordinates : 1;
781 unsigned filter_mode : 1;
782 } sampler;
783
784 struct {
785 /**
786 * Transform feedback buffer.
787 */
788 uint16_t buffer : 2;
789
790 /**
791 * Transform feedback stride.
792 */
793 uint16_t stride;
794 } xfb;
795 };
796
797 /** Name of the node this payload will be enqueued to. */
798 const char *node_name;
799 } data;
800
801 /**
802 * Identifier for this variable generated by nir_index_vars() that is unique
803 * among other variables in the same exec_list.
804 */
805 unsigned index;
806
807 /* Number of nir_variable_data members */
808 uint16_t num_members;
809
810 /**
811 * Built-in state that backs this uniform
812 *
813 * Once set at variable creation, ``state_slots`` must remain invariant.
814 * This is because, ideally, this array would be shared by all clones of
815 * this variable in the IR tree. In other words, we'd really like for it
816 * to be a fly-weight.
817 *
818 * If the variable is not a uniform, ``num_state_slots`` will be zero and
819 * ``state_slots`` will be ``NULL``.
820 *
821 * Number of state slots used.
822 */
823 uint16_t num_state_slots;
824 /** State descriptors. */
825 nir_state_slot *state_slots;
826
827 /**
828 * Constant expression assigned in the initializer of the variable
829 *
830 * This field should only be used temporarily by creators of NIR shaders
831 * and then nir_lower_variable_initializers can be used to get rid of them.
832 * Most of the rest of NIR ignores this field or asserts that it's NULL.
833 */
834 nir_constant *constant_initializer;
835
836 /**
837 * Global variable assigned in the initializer of the variable
838 * This field should only be used temporarily by creators of NIR shaders
839 * and then nir_lower_variable_initializers can be used to get rid of them.
840 * Most of the rest of NIR ignores this field or asserts that it's NULL.
841 */
842 struct nir_variable *pointer_initializer;
843
844 /**
845 * For variables that are in an interface block or are an instance of an
846 * interface block, this is the ``GLSL_TYPE_INTERFACE`` type for that block.
847 *
848 * ``ir_variable.location``
849 */
850 const struct glsl_type *interface_type;
851
852 /**
853 * Description of per-member data for per-member struct variables
854 *
855 * This is used for variables which are actually an amalgamation of
856 * multiple entities such as a struct of built-in values or a struct of
857 * inputs each with their own layout specifier. This is only allowed on
858 * variables with a struct or array of array of struct type.
859 */
860 struct nir_variable_data *members;
861 } nir_variable;
862
863 static inline bool
_nir_shader_variable_has_mode(nir_variable * var,unsigned modes)864 _nir_shader_variable_has_mode(nir_variable *var, unsigned modes)
865 {
866 /* This isn't a shader variable */
867 assert(!(modes & nir_var_function_temp));
868 return var->data.mode & modes;
869 }
870
871 #define nir_foreach_variable_in_list(var, var_list) \
872 foreach_list_typed(nir_variable, var, node, var_list)
873
874 #define nir_foreach_variable_in_list_safe(var, var_list) \
875 foreach_list_typed_safe(nir_variable, var, node, var_list)
876
877 #define nir_foreach_variable_in_shader(var, shader) \
878 nir_foreach_variable_in_list(var, &(shader)->variables)
879
880 #define nir_foreach_variable_in_shader_safe(var, shader) \
881 nir_foreach_variable_in_list_safe(var, &(shader)->variables)
882
883 #define nir_foreach_variable_with_modes(var, shader, modes) \
884 nir_foreach_variable_in_shader(var, shader) \
885 if (_nir_shader_variable_has_mode(var, modes))
886
887 #define nir_foreach_variable_with_modes_safe(var, shader, modes) \
888 nir_foreach_variable_in_shader_safe(var, shader) \
889 if (_nir_shader_variable_has_mode(var, modes))
890
891 #define nir_foreach_shader_in_variable(var, shader) \
892 nir_foreach_variable_with_modes(var, shader, nir_var_shader_in)
893
894 #define nir_foreach_shader_in_variable_safe(var, shader) \
895 nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_in)
896
897 #define nir_foreach_shader_out_variable(var, shader) \
898 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out)
899
900 #define nir_foreach_shader_out_variable_safe(var, shader) \
901 nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_out)
902
903 #define nir_foreach_uniform_variable(var, shader) \
904 nir_foreach_variable_with_modes(var, shader, nir_var_uniform)
905
906 #define nir_foreach_uniform_variable_safe(var, shader) \
907 nir_foreach_variable_with_modes_safe(var, shader, nir_var_uniform)
908
909 #define nir_foreach_image_variable(var, shader) \
910 nir_foreach_variable_with_modes(var, shader, nir_var_image)
911
912 #define nir_foreach_image_variable_safe(var, shader) \
913 nir_foreach_variable_with_modes_safe(var, shader, nir_var_image)
914
915 static inline bool
nir_variable_is_global(const nir_variable * var)916 nir_variable_is_global(const nir_variable *var)
917 {
918 return var->data.mode != nir_var_function_temp;
919 }
920
921 typedef enum ENUM_PACKED {
922 nir_instr_type_alu,
923 nir_instr_type_deref,
924 nir_instr_type_call,
925 nir_instr_type_tex,
926 nir_instr_type_intrinsic,
927 nir_instr_type_load_const,
928 nir_instr_type_jump,
929 nir_instr_type_undef,
930 nir_instr_type_phi,
931 nir_instr_type_parallel_copy,
932 } nir_instr_type;
933
934 typedef struct nir_instr {
935 struct exec_node node;
936 struct nir_block *block;
937 nir_instr_type type;
938
939 /* A temporary for optimization and analysis passes to use for storing
940 * flags. For instance, DCE uses this to store the "dead/live" info.
941 */
942 uint8_t pass_flags;
943
944 /** generic instruction index. */
945 uint32_t index;
946 } nir_instr;
947
948 static inline nir_instr *
nir_instr_next(nir_instr * instr)949 nir_instr_next(nir_instr *instr)
950 {
951 struct exec_node *next = exec_node_get_next(&instr->node);
952 if (exec_node_is_tail_sentinel(next))
953 return NULL;
954 else
955 return exec_node_data(nir_instr, next, node);
956 }
957
958 static inline nir_instr *
nir_instr_prev(nir_instr * instr)959 nir_instr_prev(nir_instr *instr)
960 {
961 struct exec_node *prev = exec_node_get_prev(&instr->node);
962 if (exec_node_is_head_sentinel(prev))
963 return NULL;
964 else
965 return exec_node_data(nir_instr, prev, node);
966 }
967
968 static inline bool
nir_instr_is_first(const nir_instr * instr)969 nir_instr_is_first(const nir_instr *instr)
970 {
971 return exec_node_is_head_sentinel(exec_node_get_prev_const(&instr->node));
972 }
973
974 static inline bool
nir_instr_is_last(const nir_instr * instr)975 nir_instr_is_last(const nir_instr *instr)
976 {
977 return exec_node_is_tail_sentinel(exec_node_get_next_const(&instr->node));
978 }
979
980 typedef struct nir_def {
981 /** Instruction which produces this SSA value. */
982 nir_instr *parent_instr;
983
984 /** set of nir_instrs where this register is used (read from) */
985 struct list_head uses;
986
987 /** generic SSA definition index. */
988 unsigned index;
989
990 uint8_t num_components;
991
992 /* The bit-size of each channel; must be one of 1, 8, 16, 32, or 64 */
993 uint8_t bit_size;
994
995 /**
996 * True if this SSA value may have different values in different SIMD
997 * invocations of the shader. This is set by nir_divergence_analysis.
998 */
999 bool divergent;
1000 } nir_def;
1001
1002 struct nir_src;
1003 struct nir_if;
1004
1005 typedef struct nir_src {
1006 /* Instruction or if-statement that consumes this value as a source. This
1007 * should only be accessed through nir_src_* helpers.
1008 *
1009 * Internally, it is a tagged pointer to a nir_instr or nir_if.
1010 */
1011 uintptr_t _parent;
1012
1013 struct list_head use_link;
1014 nir_def *ssa;
1015 } nir_src;
1016
1017 /* Layout of the _parent pointer. Bottom bit is set for nir_if parents (clear
1018 * for nir_instr parents). Remaining bits are the pointer.
1019 */
1020 #define NIR_SRC_PARENT_IS_IF (0x1)
1021 #define NIR_SRC_PARENT_MASK (~((uintptr_t) NIR_SRC_PARENT_IS_IF))
1022
1023 static inline bool
nir_src_is_if(const nir_src * src)1024 nir_src_is_if(const nir_src *src)
1025 {
1026 return src->_parent & NIR_SRC_PARENT_IS_IF;
1027 }
1028
1029 static inline nir_instr *
nir_src_parent_instr(const nir_src * src)1030 nir_src_parent_instr(const nir_src *src)
1031 {
1032 assert(!nir_src_is_if(src));
1033
1034 /* Because it is not an if, the tag is 0, therefore we do not need to mask */
1035 return (nir_instr *)(src->_parent);
1036 }
1037
1038 static inline struct nir_if *
nir_src_parent_if(const nir_src * src)1039 nir_src_parent_if(const nir_src *src)
1040 {
1041 assert(nir_src_is_if(src));
1042
1043 /* Because it is an if, the tag is 1, so we need to mask */
1044 return (struct nir_if *)(src->_parent & NIR_SRC_PARENT_MASK);
1045 }
1046
1047 static inline void
_nir_src_set_parent(nir_src * src,void * parent,bool is_if)1048 _nir_src_set_parent(nir_src *src, void *parent, bool is_if)
1049 {
1050 uintptr_t ptr = (uintptr_t) parent;
1051 assert((ptr & ~NIR_SRC_PARENT_MASK) == 0 && "pointer must be aligned");
1052
1053 if (is_if)
1054 ptr |= NIR_SRC_PARENT_IS_IF;
1055
1056 src->_parent = ptr;
1057 }
1058
1059 static inline void
nir_src_set_parent_instr(nir_src * src,nir_instr * parent_instr)1060 nir_src_set_parent_instr(nir_src *src, nir_instr *parent_instr)
1061 {
1062 _nir_src_set_parent(src, parent_instr, false);
1063 }
1064
1065 static inline void
nir_src_set_parent_if(nir_src * src,struct nir_if * parent_if)1066 nir_src_set_parent_if(nir_src *src, struct nir_if *parent_if)
1067 {
1068 _nir_src_set_parent(src, parent_if, true);
1069 }
1070
1071 static inline nir_src
nir_src_init(void)1072 nir_src_init(void)
1073 {
1074 nir_src src = { 0 };
1075 return src;
1076 }
1077
1078 #define NIR_SRC_INIT nir_src_init()
1079
1080 #define nir_foreach_use_including_if(src, reg_or_ssa_def) \
1081 list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
1082
1083 #define nir_foreach_use_including_if_safe(src, reg_or_ssa_def) \
1084 list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
1085
1086 #define nir_foreach_use(src, reg_or_ssa_def) \
1087 nir_foreach_use_including_if(src, reg_or_ssa_def) \
1088 if (!nir_src_is_if(src))
1089
1090 #define nir_foreach_use_safe(src, reg_or_ssa_def) \
1091 nir_foreach_use_including_if_safe(src, reg_or_ssa_def) \
1092 if (!nir_src_is_if(src))
1093
1094 #define nir_foreach_if_use(src, reg_or_ssa_def) \
1095 nir_foreach_use_including_if(src, reg_or_ssa_def) \
1096 if (nir_src_is_if(src))
1097
1098 #define nir_foreach_if_use_safe(src, reg_or_ssa_def) \
1099 nir_foreach_use_including_if_safe(src, reg_or_ssa_def) \
1100 if (nir_src_is_if(src))
1101
1102 static inline bool
nir_def_used_by_if(const nir_def * def)1103 nir_def_used_by_if(const nir_def *def)
1104 {
1105 nir_foreach_if_use(_, def)
1106 return true;
1107
1108 return false;
1109 }
1110
1111 static inline nir_src
nir_src_for_ssa(nir_def * def)1112 nir_src_for_ssa(nir_def *def)
1113 {
1114 nir_src src = NIR_SRC_INIT;
1115
1116 src.ssa = def;
1117
1118 return src;
1119 }
1120
1121 static inline unsigned
nir_src_bit_size(nir_src src)1122 nir_src_bit_size(nir_src src)
1123 {
1124 return src.ssa->bit_size;
1125 }
1126
1127 static inline unsigned
nir_src_num_components(nir_src src)1128 nir_src_num_components(nir_src src)
1129 {
1130 return src.ssa->num_components;
1131 }
1132
1133 static inline bool
nir_src_is_const(nir_src src)1134 nir_src_is_const(nir_src src)
1135 {
1136 return src.ssa->parent_instr->type == nir_instr_type_load_const;
1137 }
1138
1139 static inline bool
nir_src_is_undef(nir_src src)1140 nir_src_is_undef(nir_src src)
1141 {
1142 return src.ssa->parent_instr->type == nir_instr_type_undef;
1143 }
1144
1145 static inline bool
nir_src_is_divergent(nir_src src)1146 nir_src_is_divergent(nir_src src)
1147 {
1148 return src.ssa->divergent;
1149 }
1150
1151 /* Are all components the same, ie. .xxxx */
1152 static inline bool
nir_is_same_comp_swizzle(uint8_t * swiz,unsigned nr_comp)1153 nir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp)
1154 {
1155 for (unsigned i = 1; i < nr_comp; i++)
1156 if (swiz[i] != swiz[0])
1157 return false;
1158 return true;
1159 }
1160
1161 /* Are all components sequential, ie. .yzw */
1162 static inline bool
nir_is_sequential_comp_swizzle(uint8_t * swiz,unsigned nr_comp)1163 nir_is_sequential_comp_swizzle(uint8_t *swiz, unsigned nr_comp)
1164 {
1165 for (unsigned i = 1; i < nr_comp; i++)
1166 if (swiz[i] != (swiz[0] + i))
1167 return false;
1168 return true;
1169 }
1170
1171 /***/
1172 typedef struct nir_alu_src {
1173 /** Base source */
1174 nir_src src;
1175
1176 /**
1177 * For each input component, says which component of the register it is
1178 * chosen from.
1179 *
1180 * Note that which elements of the swizzle are used and which are ignored
1181 * are based on the write mask for most opcodes - for example, a statement
1182 * like "foo.xzw = bar.zyx" would have a writemask of 1101b and a swizzle
1183 * of {2, 1, x, 0} where x means "don't care."
1184 */
1185 uint8_t swizzle[NIR_MAX_VEC_COMPONENTS];
1186 } nir_alu_src;
1187
1188 /** NIR sized and unsized types
1189 *
1190 * The values in this enum are carefully chosen so that the sized type is
1191 * just the unsized type OR the number of bits.
1192 */
1193 /* clang-format off */
1194 typedef enum ENUM_PACKED {
1195 nir_type_invalid = 0, /* Not a valid type */
1196 nir_type_int = 2,
1197 nir_type_uint = 4,
1198 nir_type_bool = 6,
1199 nir_type_float = 128,
1200 nir_type_bool1 = 1 | nir_type_bool,
1201 nir_type_bool8 = 8 | nir_type_bool,
1202 nir_type_bool16 = 16 | nir_type_bool,
1203 nir_type_bool32 = 32 | nir_type_bool,
1204 nir_type_int1 = 1 | nir_type_int,
1205 nir_type_int8 = 8 | nir_type_int,
1206 nir_type_int16 = 16 | nir_type_int,
1207 nir_type_int32 = 32 | nir_type_int,
1208 nir_type_int64 = 64 | nir_type_int,
1209 nir_type_uint1 = 1 | nir_type_uint,
1210 nir_type_uint8 = 8 | nir_type_uint,
1211 nir_type_uint16 = 16 | nir_type_uint,
1212 nir_type_uint32 = 32 | nir_type_uint,
1213 nir_type_uint64 = 64 | nir_type_uint,
1214 nir_type_float16 = 16 | nir_type_float,
1215 nir_type_float32 = 32 | nir_type_float,
1216 nir_type_float64 = 64 | nir_type_float,
1217 } nir_alu_type;
1218 /* clang-format on */
1219
1220 #define NIR_ALU_TYPE_SIZE_MASK 0x79
1221 #define NIR_ALU_TYPE_BASE_TYPE_MASK 0x86
1222
1223 static inline unsigned
nir_alu_type_get_type_size(nir_alu_type type)1224 nir_alu_type_get_type_size(nir_alu_type type)
1225 {
1226 return type & NIR_ALU_TYPE_SIZE_MASK;
1227 }
1228
1229 static inline nir_alu_type
nir_alu_type_get_base_type(nir_alu_type type)1230 nir_alu_type_get_base_type(nir_alu_type type)
1231 {
1232 return (nir_alu_type)(type & NIR_ALU_TYPE_BASE_TYPE_MASK);
1233 }
1234
1235 nir_alu_type
1236 nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type);
1237
1238 static inline nir_alu_type
nir_get_nir_type_for_glsl_type(const struct glsl_type * type)1239 nir_get_nir_type_for_glsl_type(const struct glsl_type *type)
1240 {
1241 return nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(type));
1242 }
1243
1244 enum glsl_base_type
1245 nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type);
1246
1247 nir_op nir_type_conversion_op(nir_alu_type src, nir_alu_type dst,
1248 nir_rounding_mode rnd);
1249
1250 /**
1251 * Atomic intrinsics perform different operations depending on the value of
1252 * their atomic_op constant index. nir_atomic_op defines the operations.
1253 */
1254 typedef enum {
1255 nir_atomic_op_iadd,
1256 nir_atomic_op_imin,
1257 nir_atomic_op_umin,
1258 nir_atomic_op_imax,
1259 nir_atomic_op_umax,
1260 nir_atomic_op_iand,
1261 nir_atomic_op_ior,
1262 nir_atomic_op_ixor,
1263 nir_atomic_op_xchg,
1264 nir_atomic_op_fadd,
1265 nir_atomic_op_fmin,
1266 nir_atomic_op_fmax,
1267 nir_atomic_op_cmpxchg,
1268 nir_atomic_op_fcmpxchg,
1269 nir_atomic_op_inc_wrap,
1270 nir_atomic_op_dec_wrap,
1271 } nir_atomic_op;
1272
1273 static inline nir_alu_type
nir_atomic_op_type(nir_atomic_op op)1274 nir_atomic_op_type(nir_atomic_op op)
1275 {
1276 switch (op) {
1277 case nir_atomic_op_imin:
1278 case nir_atomic_op_imax:
1279 return nir_type_int;
1280
1281 case nir_atomic_op_fadd:
1282 case nir_atomic_op_fmin:
1283 case nir_atomic_op_fmax:
1284 case nir_atomic_op_fcmpxchg:
1285 return nir_type_float;
1286
1287 case nir_atomic_op_iadd:
1288 case nir_atomic_op_iand:
1289 case nir_atomic_op_ior:
1290 case nir_atomic_op_ixor:
1291 case nir_atomic_op_xchg:
1292 case nir_atomic_op_cmpxchg:
1293 case nir_atomic_op_umin:
1294 case nir_atomic_op_umax:
1295 case nir_atomic_op_inc_wrap:
1296 case nir_atomic_op_dec_wrap:
1297 return nir_type_uint;
1298 }
1299
1300 unreachable("Invalid nir_atomic_op");
1301 }
1302
1303 /** Returns nir_op_vec<num_components> or nir_op_mov if num_components == 1
1304 *
1305 * This is subtly different from nir_op_is_vec() which returns false for
1306 * nir_op_mov. Returning nir_op_mov from nir_op_vec() when num_components == 1
1307 * makes sense under the assumption that the num_components of the resulting
1308 * nir_def will same as what is passed in here because a single-component mov
1309 * is effectively a vec1. However, if alu->def.num_components > 1, nir_op_mov
1310 * has different semantics from nir_op_vec* so so code which detects "is this
1311 * a vec?" typically needs to handle nir_op_mov separate from nir_op_vecN.
1312 *
1313 * In the unlikely case where you can handle nir_op_vecN and nir_op_mov
1314 * together, use nir_op_is_vec_or_mov().
1315 */
1316 nir_op
1317 nir_op_vec(unsigned num_components);
1318
1319 /** Returns true if this op is one of nir_op_vec*
1320 *
1321 * Returns false for nir_op_mov. See nir_op_vec() for more details.
1322 */
1323 bool
1324 nir_op_is_vec(nir_op op);
1325
1326 static inline bool
nir_op_is_vec_or_mov(nir_op op)1327 nir_op_is_vec_or_mov(nir_op op)
1328 {
1329 return op == nir_op_mov || nir_op_is_vec(op);
1330 }
1331
1332 static inline bool
nir_is_float_control_signed_zero_preserve(unsigned execution_mode,unsigned bit_size)1333 nir_is_float_control_signed_zero_preserve(unsigned execution_mode, unsigned bit_size)
1334 {
1335 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP16) ||
1336 (32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP32) ||
1337 (64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_PRESERVE_FP64);
1338 }
1339
1340 static inline bool
nir_is_float_control_inf_preserve(unsigned execution_mode,unsigned bit_size)1341 nir_is_float_control_inf_preserve(unsigned execution_mode, unsigned bit_size)
1342 {
1343 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_INF_PRESERVE_FP16) ||
1344 (32 == bit_size && execution_mode & FLOAT_CONTROLS_INF_PRESERVE_FP32) ||
1345 (64 == bit_size && execution_mode & FLOAT_CONTROLS_INF_PRESERVE_FP64);
1346 }
1347
1348 static inline bool
nir_is_float_control_nan_preserve(unsigned execution_mode,unsigned bit_size)1349 nir_is_float_control_nan_preserve(unsigned execution_mode, unsigned bit_size)
1350 {
1351 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP16) ||
1352 (32 == bit_size && execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP32) ||
1353 (64 == bit_size && execution_mode & FLOAT_CONTROLS_NAN_PRESERVE_FP64);
1354 }
1355
1356 static inline bool
nir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode,unsigned bit_size)1357 nir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode, unsigned bit_size)
1358 {
1359 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) ||
1360 (32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32) ||
1361 (64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64);
1362 }
1363
1364 static inline bool
nir_is_denorm_flush_to_zero(unsigned execution_mode,unsigned bit_size)1365 nir_is_denorm_flush_to_zero(unsigned execution_mode, unsigned bit_size)
1366 {
1367 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16) ||
1368 (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) ||
1369 (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64);
1370 }
1371
1372 static inline bool
nir_is_denorm_preserve(unsigned execution_mode,unsigned bit_size)1373 nir_is_denorm_preserve(unsigned execution_mode, unsigned bit_size)
1374 {
1375 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP16) ||
1376 (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32) ||
1377 (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP64);
1378 }
1379
1380 static inline bool
nir_is_rounding_mode_rtne(unsigned execution_mode,unsigned bit_size)1381 nir_is_rounding_mode_rtne(unsigned execution_mode, unsigned bit_size)
1382 {
1383 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) ||
1384 (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) ||
1385 (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64);
1386 }
1387
1388 static inline bool
nir_is_rounding_mode_rtz(unsigned execution_mode,unsigned bit_size)1389 nir_is_rounding_mode_rtz(unsigned execution_mode, unsigned bit_size)
1390 {
1391 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) ||
1392 (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) ||
1393 (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64);
1394 }
1395
1396 static inline bool
nir_has_any_rounding_mode_rtz(unsigned execution_mode)1397 nir_has_any_rounding_mode_rtz(unsigned execution_mode)
1398 {
1399 return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) ||
1400 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) ||
1401 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64);
1402 }
1403
1404 static inline bool
nir_has_any_rounding_mode_rtne(unsigned execution_mode)1405 nir_has_any_rounding_mode_rtne(unsigned execution_mode)
1406 {
1407 return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) ||
1408 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) ||
1409 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64);
1410 }
1411
1412 static inline nir_rounding_mode
nir_get_rounding_mode_from_float_controls(unsigned execution_mode,nir_alu_type type)1413 nir_get_rounding_mode_from_float_controls(unsigned execution_mode,
1414 nir_alu_type type)
1415 {
1416 if (nir_alu_type_get_base_type(type) != nir_type_float)
1417 return nir_rounding_mode_undef;
1418
1419 unsigned bit_size = nir_alu_type_get_type_size(type);
1420
1421 if (nir_is_rounding_mode_rtz(execution_mode, bit_size))
1422 return nir_rounding_mode_rtz;
1423 if (nir_is_rounding_mode_rtne(execution_mode, bit_size))
1424 return nir_rounding_mode_rtne;
1425 return nir_rounding_mode_undef;
1426 }
1427
1428 static inline bool
nir_has_any_rounding_mode_enabled(unsigned execution_mode)1429 nir_has_any_rounding_mode_enabled(unsigned execution_mode)
1430 {
1431 bool result =
1432 nir_has_any_rounding_mode_rtne(execution_mode) ||
1433 nir_has_any_rounding_mode_rtz(execution_mode);
1434 return result;
1435 }
1436
1437 typedef enum {
1438 /**
1439 * Operation where the first two sources are commutative.
1440 *
1441 * For 2-source operations, this just mathematical commutativity. Some
1442 * 3-source operations, like ffma, are only commutative in the first two
1443 * sources.
1444 */
1445 NIR_OP_IS_2SRC_COMMUTATIVE = (1 << 0),
1446
1447 /**
1448 * Operation is associative
1449 */
1450 NIR_OP_IS_ASSOCIATIVE = (1 << 1),
1451
1452 /**
1453 * Operation where src[0] is used to select src[1] on true or src[2] false.
1454 * src[0] may be Boolean, or it may be another type used in an implicit
1455 * comparison.
1456 */
1457 NIR_OP_IS_SELECTION = (1 << 2),
1458
1459 /**
1460 * Operation where a screen-space derivative is taken of src[0]. Must not be
1461 * moved into non-uniform control flow.
1462 */
1463 NIR_OP_IS_DERIVATIVE = (1 << 3),
1464 } nir_op_algebraic_property;
1465
1466 /* vec16 is the widest ALU op in NIR, making the max number of input of ALU
1467 * instructions to be the same as NIR_MAX_VEC_COMPONENTS.
1468 */
1469 #define NIR_ALU_MAX_INPUTS NIR_MAX_VEC_COMPONENTS
1470
1471 /***/
1472 typedef struct nir_op_info {
1473 /** Name of the NIR ALU opcode */
1474 const char *name;
1475
1476 /** Number of inputs (sources) */
1477 uint8_t num_inputs;
1478
1479 /**
1480 * The number of components in the output
1481 *
1482 * If non-zero, this is the size of the output and input sizes are
1483 * explicitly given; swizzle and writemask are still in effect, but if
1484 * the output component is masked out, then the input component may
1485 * still be in use.
1486 *
1487 * If zero, the opcode acts in the standard, per-component manner; the
1488 * operation is performed on each component (except the ones that are
1489 * masked out) with the input being taken from the input swizzle for
1490 * that component.
1491 *
1492 * The size of some of the inputs may be given (i.e. non-zero) even
1493 * though output_size is zero; in that case, the inputs with a zero
1494 * size act per-component, while the inputs with non-zero size don't.
1495 */
1496 uint8_t output_size;
1497
1498 /**
1499 * The type of vector that the instruction outputs. Note that the
1500 * staurate modifier is only allowed on outputs with the float type.
1501 */
1502 nir_alu_type output_type;
1503
1504 /**
1505 * The number of components in each input
1506 *
1507 * See nir_op_infos::output_size for more detail about the relationship
1508 * between input and output sizes.
1509 */
1510 uint8_t input_sizes[NIR_ALU_MAX_INPUTS];
1511
1512 /**
1513 * The type of vector that each input takes.
1514 */
1515 nir_alu_type input_types[NIR_ALU_MAX_INPUTS];
1516
1517 /** Algebraic properties of this opcode */
1518 nir_op_algebraic_property algebraic_properties;
1519
1520 /** Whether this represents a numeric conversion opcode */
1521 bool is_conversion;
1522 } nir_op_info;
1523
1524 /** Metadata for each nir_op, indexed by opcode */
1525 extern const nir_op_info nir_op_infos[nir_num_opcodes];
1526
1527 static inline bool
nir_op_is_selection(nir_op op)1528 nir_op_is_selection(nir_op op)
1529 {
1530 return (nir_op_infos[op].algebraic_properties & NIR_OP_IS_SELECTION) != 0;
1531 }
1532
1533 static inline bool
nir_op_is_derivative(nir_op op)1534 nir_op_is_derivative(nir_op op)
1535 {
1536 return (nir_op_infos[op].algebraic_properties & NIR_OP_IS_DERIVATIVE) != 0;
1537 }
1538
1539 /***/
1540 typedef struct nir_alu_instr {
1541 /** Base instruction */
1542 nir_instr instr;
1543
1544 /** Opcode */
1545 nir_op op;
1546
1547 /** Indicates that this ALU instruction generates an exact value
1548 *
1549 * This is kind of a mixture of GLSL "precise" and "invariant" and not
1550 * really equivalent to either. This indicates that the value generated by
1551 * this operation is high-precision and any code transformations that touch
1552 * it must ensure that the resulting value is bit-for-bit identical to the
1553 * original.
1554 */
1555 bool exact : 1;
1556
1557 /**
1558 * Indicates that this instruction doese not cause signed integer wrapping
1559 * to occur, in the form of overflow or underflow.
1560 */
1561 bool no_signed_wrap : 1;
1562
1563 /**
1564 * Indicates that this instruction does not cause unsigned integer wrapping
1565 * to occur, in the form of overflow or underflow.
1566 */
1567 bool no_unsigned_wrap : 1;
1568
1569 /** Destination */
1570 nir_def def;
1571
1572 /** Sources
1573 *
1574 * The size of the array is given by :c:member:`nir_op_info.num_inputs`.
1575 */
1576 nir_alu_src src[];
1577 } nir_alu_instr;
1578
1579 void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src);
1580
1581 nir_component_mask_t
1582 nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src);
1583 /**
1584 * Get the number of channels used for a source
1585 */
1586 unsigned
1587 nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src);
1588
1589 /* is this source channel used? */
1590 static inline bool
nir_alu_instr_channel_used(const nir_alu_instr * instr,unsigned src,unsigned channel)1591 nir_alu_instr_channel_used(const nir_alu_instr *instr, unsigned src,
1592 unsigned channel)
1593 {
1594 return channel < nir_ssa_alu_instr_src_components(instr, src);
1595 }
1596
1597 bool
1598 nir_alu_instr_is_comparison(const nir_alu_instr *instr);
1599
1600 bool nir_const_value_negative_equal(nir_const_value c1, nir_const_value c2,
1601 nir_alu_type full_type);
1602
1603 bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2,
1604 unsigned src1, unsigned src2);
1605
1606 bool nir_alu_srcs_negative_equal(const nir_alu_instr *alu1,
1607 const nir_alu_instr *alu2,
1608 unsigned src1, unsigned src2);
1609
1610 bool nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn);
1611
1612 typedef enum {
1613 nir_deref_type_var,
1614 nir_deref_type_array,
1615 nir_deref_type_array_wildcard,
1616 nir_deref_type_ptr_as_array,
1617 nir_deref_type_struct,
1618 nir_deref_type_cast,
1619 } nir_deref_type;
1620
1621 typedef struct {
1622 nir_instr instr;
1623
1624 /** The type of this deref instruction */
1625 nir_deref_type deref_type;
1626
1627 /** Bitmask what modes the underlying variable might be
1628 *
1629 * For OpenCL-style generic pointers, we may not know exactly what mode it
1630 * is at any given point in time in the compile process. This bitfield
1631 * contains the set of modes which it MAY be.
1632 *
1633 * Generally, this field should not be accessed directly. Use one of the
1634 * nir_deref_mode_ helpers instead.
1635 */
1636 nir_variable_mode modes;
1637
1638 /** The dereferenced type of the resulting pointer value */
1639 const struct glsl_type *type;
1640
1641 union {
1642 /** Variable being dereferenced if deref_type is a deref_var */
1643 nir_variable *var;
1644
1645 /** Parent deref if deref_type is not deref_var */
1646 nir_src parent;
1647 };
1648
1649 /** Additional deref parameters */
1650 union {
1651 struct {
1652 nir_src index;
1653 bool in_bounds;
1654 } arr;
1655
1656 struct {
1657 unsigned index;
1658 } strct;
1659
1660 struct {
1661 unsigned ptr_stride;
1662 unsigned align_mul;
1663 unsigned align_offset;
1664 } cast;
1665 };
1666
1667 /** Destination to store the resulting "pointer" */
1668 nir_def def;
1669 } nir_deref_instr;
1670
1671 /**
1672 * Returns true if the cast is trivial, i.e. the source and destination type is
1673 * the same.
1674 */
1675 bool nir_deref_cast_is_trivial(nir_deref_instr *cast);
1676
1677 /** Returns true if deref might have one of the given modes
1678 *
1679 * For multi-mode derefs, this returns true if any of the possible modes for
1680 * the deref to have any of the specified modes. This function returning true
1681 * does NOT mean that the deref definitely has one of those modes. It simply
1682 * means that, with the best information we have at the time, it might.
1683 */
1684 static inline bool
nir_deref_mode_may_be(const nir_deref_instr * deref,nir_variable_mode modes)1685 nir_deref_mode_may_be(const nir_deref_instr *deref, nir_variable_mode modes)
1686 {
1687 assert(!(modes & ~nir_var_all));
1688 assert(deref->modes != 0);
1689 return deref->modes & modes;
1690 }
1691
1692 /** Returns true if deref must have one of the given modes
1693 *
1694 * For multi-mode derefs, this returns true if NIR can prove that the given
1695 * deref has one of the specified modes. This function returning false does
1696 * NOT mean that deref doesn't have one of the given mode. It very well may
1697 * have one of those modes, we just don't have enough information to prove
1698 * that it does for sure.
1699 */
1700 static inline bool
nir_deref_mode_must_be(const nir_deref_instr * deref,nir_variable_mode modes)1701 nir_deref_mode_must_be(const nir_deref_instr *deref, nir_variable_mode modes)
1702 {
1703 assert(!(modes & ~nir_var_all));
1704 assert(deref->modes != 0);
1705 return !(deref->modes & ~modes);
1706 }
1707
1708 /** Returns true if deref has the given mode
1709 *
1710 * This returns true if the deref has exactly the mode specified. If the
1711 * deref may have that mode but may also have a different mode (i.e. modes has
1712 * multiple bits set), this will assert-fail.
1713 *
1714 * If you're confused about which nir_deref_mode_ helper to use, use this one
1715 * or nir_deref_mode_is_one_of below.
1716 */
1717 static inline bool
nir_deref_mode_is(const nir_deref_instr * deref,nir_variable_mode mode)1718 nir_deref_mode_is(const nir_deref_instr *deref, nir_variable_mode mode)
1719 {
1720 assert(util_bitcount(mode) == 1 && (mode & nir_var_all));
1721 assert(deref->modes != 0);
1722
1723 /* This is only for "simple" cases so, if modes might interact with this
1724 * deref then the deref has to have a single mode.
1725 */
1726 if (nir_deref_mode_may_be(deref, mode)) {
1727 assert(util_bitcount(deref->modes) == 1);
1728 assert(deref->modes == mode);
1729 }
1730
1731 return deref->modes == mode;
1732 }
1733
1734 /** Returns true if deref has one of the given modes
1735 *
1736 * This returns true if the deref has exactly one possible mode and that mode
1737 * is one of the modes specified. If the deref may have one of those modes
1738 * but may also have a different mode (i.e. modes has multiple bits set), this
1739 * will assert-fail.
1740 */
1741 static inline bool
nir_deref_mode_is_one_of(const nir_deref_instr * deref,nir_variable_mode modes)1742 nir_deref_mode_is_one_of(const nir_deref_instr *deref, nir_variable_mode modes)
1743 {
1744 /* This is only for "simple" cases so, if modes might interact with this
1745 * deref then the deref has to have a single mode.
1746 */
1747 if (nir_deref_mode_may_be(deref, modes)) {
1748 assert(util_bitcount(deref->modes) == 1);
1749 assert(nir_deref_mode_must_be(deref, modes));
1750 }
1751
1752 return nir_deref_mode_may_be(deref, modes);
1753 }
1754
1755 /** Returns true if deref's possible modes lie in the given set of modes
1756 *
1757 * This returns true if the deref's modes lie in the given set of modes. If
1758 * the deref's modes overlap with the specified modes but aren't entirely
1759 * contained in the specified set of modes, this will assert-fail. In
1760 * particular, if this is used in a generic pointers scenario, the specified
1761 * modes has to contain all or none of the possible generic pointer modes.
1762 *
1763 * This is intended mostly for mass-lowering of derefs which might have
1764 * generic pointers.
1765 */
1766 static inline bool
nir_deref_mode_is_in_set(const nir_deref_instr * deref,nir_variable_mode modes)1767 nir_deref_mode_is_in_set(const nir_deref_instr *deref, nir_variable_mode modes)
1768 {
1769 if (nir_deref_mode_may_be(deref, modes))
1770 assert(nir_deref_mode_must_be(deref, modes));
1771
1772 return nir_deref_mode_may_be(deref, modes);
1773 }
1774
1775 static inline nir_deref_instr *nir_src_as_deref(nir_src src);
1776
1777 static inline nir_deref_instr *
nir_deref_instr_parent(const nir_deref_instr * instr)1778 nir_deref_instr_parent(const nir_deref_instr *instr)
1779 {
1780 if (instr->deref_type == nir_deref_type_var)
1781 return NULL;
1782 else
1783 return nir_src_as_deref(instr->parent);
1784 }
1785
1786 static inline nir_variable *
nir_deref_instr_get_variable(const nir_deref_instr * instr)1787 nir_deref_instr_get_variable(const nir_deref_instr *instr)
1788 {
1789 while (instr->deref_type != nir_deref_type_var) {
1790 if (instr->deref_type == nir_deref_type_cast)
1791 return NULL;
1792
1793 instr = nir_deref_instr_parent(instr);
1794 }
1795
1796 return instr->var;
1797 }
1798
1799 bool nir_deref_instr_has_indirect(nir_deref_instr *instr);
1800 bool nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr);
1801
1802 typedef enum {
1803 nir_deref_instr_has_complex_use_allow_memcpy_src = (1 << 0),
1804 nir_deref_instr_has_complex_use_allow_memcpy_dst = (1 << 1),
1805 nir_deref_instr_has_complex_use_allow_atomics = (1 << 2),
1806 } nir_deref_instr_has_complex_use_options;
1807
1808 bool nir_deref_instr_has_complex_use(nir_deref_instr *instr,
1809 nir_deref_instr_has_complex_use_options opts);
1810
1811 bool nir_deref_instr_remove_if_unused(nir_deref_instr *instr);
1812
1813 unsigned nir_deref_instr_array_stride(nir_deref_instr *instr);
1814
1815 typedef struct {
1816 nir_instr instr;
1817
1818 struct nir_function *callee;
1819
1820 unsigned num_params;
1821 nir_src params[];
1822 } nir_call_instr;
1823
1824 #include "nir_intrinsics.h"
1825
1826 #define NIR_INTRINSIC_MAX_CONST_INDEX 8
1827
1828 /** Represents an intrinsic
1829 *
1830 * An intrinsic is an instruction type for handling things that are
1831 * more-or-less regular operations but don't just consume and produce SSA
1832 * values like ALU operations do. Intrinsics are not for things that have
1833 * special semantic meaning such as phi nodes and parallel copies.
1834 * Examples of intrinsics include variable load/store operations, system
1835 * value loads, and the like. Even though texturing more-or-less falls
1836 * under this category, texturing is its own instruction type because
1837 * trying to represent texturing with intrinsics would lead to a
1838 * combinatorial explosion of intrinsic opcodes.
1839 *
1840 * By having a single instruction type for handling a lot of different
1841 * cases, optimization passes can look for intrinsics and, for the most
1842 * part, completely ignore them. Each intrinsic type also has a few
1843 * possible flags that govern whether or not they can be reordered or
1844 * eliminated. That way passes like dead code elimination can still work
1845 * on intrisics without understanding the meaning of each.
1846 *
1847 * Each intrinsic has some number of constant indices, some number of
1848 * variables, and some number of sources. What these sources, variables,
1849 * and indices mean depends on the intrinsic and is documented with the
1850 * intrinsic declaration in nir_intrinsics.h. Intrinsics and texture
1851 * instructions are the only types of instruction that can operate on
1852 * variables.
1853 */
1854 typedef struct {
1855 nir_instr instr;
1856
1857 nir_intrinsic_op intrinsic;
1858
1859 nir_def def;
1860
1861 /** number of components if this is a vectorized intrinsic
1862 *
1863 * Similarly to ALU operations, some intrinsics are vectorized.
1864 * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0.
1865 * For vectorized intrinsics, the num_components field specifies the
1866 * number of destination components and the number of source components
1867 * for all sources with nir_intrinsic_infos.src_components[i] == 0.
1868 */
1869 uint8_t num_components;
1870
1871 int const_index[NIR_INTRINSIC_MAX_CONST_INDEX];
1872
1873 nir_src src[];
1874 } nir_intrinsic_instr;
1875
1876 static inline nir_variable *
nir_intrinsic_get_var(const nir_intrinsic_instr * intrin,unsigned i)1877 nir_intrinsic_get_var(const nir_intrinsic_instr *intrin, unsigned i)
1878 {
1879 return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i]));
1880 }
1881
1882 typedef enum {
1883 /* Memory ordering. */
1884 NIR_MEMORY_ACQUIRE = 1 << 0,
1885 NIR_MEMORY_RELEASE = 1 << 1,
1886 NIR_MEMORY_ACQ_REL = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE,
1887
1888 /* Memory visibility operations. */
1889 NIR_MEMORY_MAKE_AVAILABLE = 1 << 2,
1890 NIR_MEMORY_MAKE_VISIBLE = 1 << 3,
1891 } nir_memory_semantics;
1892
1893 /**
1894 * NIR intrinsics semantic flags
1895 *
1896 * information about what the compiler can do with the intrinsics.
1897 *
1898 * :c:member:`nir_intrinsic_info.flags`
1899 */
1900 typedef enum {
1901 /**
1902 * whether the intrinsic can be safely eliminated if none of its output
1903 * value is not being used.
1904 */
1905 NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0),
1906
1907 /**
1908 * Whether the intrinsic can be reordered with respect to any other
1909 * intrinsic, i.e. whether the only reordering dependencies of the
1910 * intrinsic are due to the register reads/writes.
1911 */
1912 NIR_INTRINSIC_CAN_REORDER = (1 << 1),
1913 } nir_intrinsic_semantic_flag;
1914
1915 /**
1916 * Maximum valid value for a nir align_mul value (in intrinsics or derefs).
1917 *
1918 * Offsets can be signed, so this is the largest power of two in int32_t.
1919 */
1920 #define NIR_ALIGN_MUL_MAX 0x40000000
1921
1922 typedef struct nir_io_semantics {
1923 unsigned location : 7; /* gl_vert_attrib, gl_varying_slot, or gl_frag_result */
1924 unsigned num_slots : 6; /* max 32, may be pessimistic with const indexing */
1925 unsigned dual_source_blend_index : 1;
1926 unsigned fb_fetch_output : 1; /* for GL_KHR_blend_equation_advanced */
1927 unsigned gs_streams : 8; /* xxyyzzww: 2-bit stream index for each component */
1928 unsigned medium_precision : 1; /* GLSL mediump qualifier */
1929 unsigned per_view : 1;
1930 unsigned high_16bits : 1; /* whether accessing low or high half of the slot */
1931 unsigned invariant : 1; /* The variable has the invariant flag set */
1932 unsigned high_dvec2 : 1; /* whether accessing the high half of dvec3/dvec4 */
1933 /* CLIP_DISTn, LAYER, VIEWPORT, and TESS_LEVEL_* have up to 3 uses:
1934 * - an output consumed by the next stage
1935 * - a system value output affecting fixed-func hardware, e.g. the clipper
1936 * - a transform feedback output written to memory
1937 * The following fields disable the first two. Transform feedback is disabled
1938 * by transform feedback info.
1939 */
1940 unsigned no_varying : 1; /* whether this output isn't consumed by the next stage */
1941 unsigned no_sysval_output : 1; /* whether this system value output has no
1942 effect due to current pipeline states */
1943 unsigned _pad : 2;
1944 } nir_io_semantics;
1945
1946 /* Transform feedback info for 2 outputs. nir_intrinsic_store_output contains
1947 * this structure twice to support up to 4 outputs. The structure is limited
1948 * to 32 bits because it's stored in nir_intrinsic_instr::const_index[].
1949 */
1950 typedef struct nir_io_xfb {
1951 struct {
1952 /* start_component is equal to the index of out[]; add 2 for io_xfb2 */
1953 /* start_component is not relative to nir_intrinsic_component */
1954 /* get the stream index from nir_io_semantics */
1955 uint8_t num_components : 4; /* max 4; if this is 0, xfb is disabled */
1956 uint8_t buffer : 4; /* buffer index, max 3 */
1957 uint8_t offset; /* transform feedback buffer offset in dwords,
1958 max (1K - 4) bytes */
1959 } out[2];
1960 } nir_io_xfb;
1961
1962 unsigned
1963 nir_instr_xfb_write_mask(nir_intrinsic_instr *instr);
1964
1965 #define NIR_INTRINSIC_MAX_INPUTS 11
1966
1967 typedef struct {
1968 const char *name;
1969
1970 /** number of register/SSA inputs */
1971 uint8_t num_srcs;
1972
1973 /** number of components of each input register
1974 *
1975 * If this value is 0, the number of components is given by the
1976 * num_components field of nir_intrinsic_instr. If this value is -1, the
1977 * intrinsic consumes however many components are provided and it is not
1978 * validated at all.
1979 */
1980 int8_t src_components[NIR_INTRINSIC_MAX_INPUTS];
1981
1982 bool has_dest;
1983
1984 /** number of components of the output register
1985 *
1986 * If this value is 0, the number of components is given by the
1987 * num_components field of nir_intrinsic_instr.
1988 */
1989 uint8_t dest_components;
1990
1991 /** bitfield of legal bit sizes */
1992 uint8_t dest_bit_sizes;
1993
1994 /** source which the destination bit size must match
1995 *
1996 * Some intrinsics, such as subgroup intrinsics, are data manipulation
1997 * intrinsics and they have similar bit-size rules to ALU ops. This enables
1998 * validation to validate a bit more and enables auto-generated builder code
1999 * to properly determine destination bit sizes automatically.
2000 */
2001 int8_t bit_size_src;
2002
2003 /** the number of constant indices used by the intrinsic */
2004 uint8_t num_indices;
2005
2006 /** list of indices */
2007 uint8_t indices[NIR_INTRINSIC_MAX_CONST_INDEX];
2008
2009 /** indicates the usage of intr->const_index[n] */
2010 uint8_t index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS];
2011
2012 /** semantic flags for calls to this intrinsic */
2013 nir_intrinsic_semantic_flag flags;
2014 } nir_intrinsic_info;
2015
2016 extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
2017
2018 unsigned
2019 nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn);
2020
2021 unsigned
2022 nir_intrinsic_dest_components(nir_intrinsic_instr *intr);
2023
2024 nir_alu_type
2025 nir_intrinsic_instr_src_type(const nir_intrinsic_instr *intrin, unsigned src);
2026
2027 nir_alu_type
2028 nir_intrinsic_instr_dest_type(const nir_intrinsic_instr *intrin);
2029
2030 /**
2031 * Helper to copy const_index[] from src to dst, without assuming they
2032 * match in order.
2033 */
2034 void nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src);
2035
2036 #include "nir_intrinsics_indices.h"
2037
2038 static inline void
nir_intrinsic_set_align(nir_intrinsic_instr * intrin,unsigned align_mul,unsigned align_offset)2039 nir_intrinsic_set_align(nir_intrinsic_instr *intrin,
2040 unsigned align_mul, unsigned align_offset)
2041 {
2042 assert(util_is_power_of_two_nonzero(align_mul));
2043 assert(align_offset < align_mul);
2044 nir_intrinsic_set_align_mul(intrin, align_mul);
2045 nir_intrinsic_set_align_offset(intrin, align_offset);
2046 }
2047
2048 /** Returns a simple alignment for an align_mul/offset pair
2049 *
2050 * This helper converts from the full mul+offset alignment scheme used by
2051 * most NIR intrinsics to a simple alignment. The returned value is the
2052 * largest power of two which divides both align_mul and align_offset.
2053 * For any offset X which satisfies the complex alignment described by
2054 * align_mul/offset, X % align == 0.
2055 */
2056 static inline uint32_t
nir_combined_align(uint32_t align_mul,uint32_t align_offset)2057 nir_combined_align(uint32_t align_mul, uint32_t align_offset)
2058 {
2059 assert(util_is_power_of_two_nonzero(align_mul));
2060 assert(align_offset < align_mul);
2061 return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul;
2062 }
2063
2064 /** Returns a simple alignment for a load/store intrinsic offset
2065 *
2066 * Instead of the full mul+offset alignment scheme provided by the ALIGN_MUL
2067 * and ALIGN_OFFSET parameters, this helper takes both into account and
2068 * provides a single simple alignment parameter. The offset X is guaranteed
2069 * to satisfy X % align == 0.
2070 */
2071 static inline unsigned
nir_intrinsic_align(const nir_intrinsic_instr * intrin)2072 nir_intrinsic_align(const nir_intrinsic_instr *intrin)
2073 {
2074 return nir_combined_align(nir_intrinsic_align_mul(intrin),
2075 nir_intrinsic_align_offset(intrin));
2076 }
2077
2078 static inline bool
nir_intrinsic_has_align(const nir_intrinsic_instr * intrin)2079 nir_intrinsic_has_align(const nir_intrinsic_instr *intrin)
2080 {
2081 return nir_intrinsic_has_align_mul(intrin) &&
2082 nir_intrinsic_has_align_offset(intrin);
2083 }
2084
2085 unsigned
2086 nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr);
2087
2088 /* Converts a image_deref_* intrinsic into a image_* one */
2089 void nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr,
2090 nir_def *handle, bool bindless);
2091
2092 /* Determine if an intrinsic can be arbitrarily reordered and eliminated. */
2093 static inline bool
nir_intrinsic_can_reorder(nir_intrinsic_instr * instr)2094 nir_intrinsic_can_reorder(nir_intrinsic_instr *instr)
2095 {
2096 if (nir_intrinsic_has_access(instr) &&
2097 nir_intrinsic_access(instr) & ACCESS_VOLATILE)
2098 return false;
2099
2100 if (instr->intrinsic == nir_intrinsic_load_deref) {
2101 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
2102 return nir_deref_mode_is_in_set(deref, nir_var_read_only_modes) ||
2103 (nir_intrinsic_access(instr) & ACCESS_CAN_REORDER);
2104 } else if (instr->intrinsic == nir_intrinsic_load_ssbo ||
2105 instr->intrinsic == nir_intrinsic_bindless_image_load ||
2106 instr->intrinsic == nir_intrinsic_image_deref_load ||
2107 instr->intrinsic == nir_intrinsic_image_load ||
2108 instr->intrinsic == nir_intrinsic_ald_nv ||
2109 instr->intrinsic == nir_intrinsic_load_sysval_nv) {
2110 return nir_intrinsic_access(instr) & ACCESS_CAN_REORDER;
2111 } else {
2112 const nir_intrinsic_info *info =
2113 &nir_intrinsic_infos[instr->intrinsic];
2114 return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
2115 (info->flags & NIR_INTRINSIC_CAN_REORDER);
2116 }
2117 }
2118
2119 bool nir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr);
2120
2121 static inline bool
nir_intrinsic_is_ray_query(nir_intrinsic_op intrinsic)2122 nir_intrinsic_is_ray_query(nir_intrinsic_op intrinsic)
2123 {
2124 switch (intrinsic) {
2125 case nir_intrinsic_rq_confirm_intersection:
2126 case nir_intrinsic_rq_generate_intersection:
2127 case nir_intrinsic_rq_initialize:
2128 case nir_intrinsic_rq_load:
2129 case nir_intrinsic_rq_proceed:
2130 case nir_intrinsic_rq_terminate:
2131 return true;
2132 default:
2133 return false;
2134 }
2135 }
2136
2137 /** Texture instruction source type */
2138 typedef enum nir_tex_src_type {
2139 /** Texture coordinate
2140 *
2141 * Must have :c:member:`nir_tex_instr.coord_components` components.
2142 */
2143 nir_tex_src_coord,
2144
2145 /** Projector
2146 *
2147 * The texture coordinate (except for the array component, if any) is
2148 * divided by this value before LOD computation and sampling.
2149 *
2150 * Must be a float scalar.
2151 */
2152 nir_tex_src_projector,
2153
2154 /** Shadow comparator
2155 *
2156 * For shadow sampling, the fetched texel values are compared against the
2157 * shadow comparator using the compare op specified by the sampler object
2158 * and converted to 1.0 if the comparison succeeds and 0.0 if it fails.
2159 * Interpolation happens after this conversion so the actual result may be
2160 * anywhere in the range [0.0, 1.0].
2161 *
2162 * Only valid if :c:member:`nir_tex_instr.is_shadow` and must be a float
2163 * scalar.
2164 */
2165 nir_tex_src_comparator,
2166
2167 /** Coordinate offset
2168 *
2169 * An integer value that is added to the texel address before sampling.
2170 * This is only allowed with operations that take an explicit LOD as it is
2171 * applied in integer texel space after LOD selection and not normalized
2172 * coordinate space.
2173 */
2174 nir_tex_src_offset,
2175
2176 /** LOD bias
2177 *
2178 * This value is added to the computed LOD before mip-mapping.
2179 */
2180 nir_tex_src_bias,
2181
2182 /** Explicit LOD */
2183 nir_tex_src_lod,
2184
2185 /** Min LOD
2186 *
2187 * The computed LOD is clamped to be at least as large as min_lod before
2188 * mip-mapping.
2189 */
2190 nir_tex_src_min_lod,
2191
2192 /** MSAA sample index */
2193 nir_tex_src_ms_index,
2194
2195 /** Intel-specific MSAA compression data */
2196 nir_tex_src_ms_mcs_intel,
2197
2198 /** Explicit horizontal (X-major) coordinate derivative */
2199 nir_tex_src_ddx,
2200
2201 /** Explicit vertical (Y-major) coordinate derivative */
2202 nir_tex_src_ddy,
2203
2204 /** Texture variable dereference */
2205 nir_tex_src_texture_deref,
2206
2207 /** Sampler variable dereference */
2208 nir_tex_src_sampler_deref,
2209
2210 /** Texture index offset
2211 *
2212 * This is added to :c:member:`nir_tex_instr.texture_index`. Unless
2213 * :c:member:`nir_tex_instr.texture_non_uniform` is set, this is guaranteed
2214 * to be dynamically uniform.
2215 */
2216 nir_tex_src_texture_offset,
2217
2218 /** Dynamically uniform sampler index offset
2219 *
2220 * This is added to :c:member:`nir_tex_instr.sampler_index`. Unless
2221 * :c:member:`nir_tex_instr.sampler_non_uniform` is set, this is guaranteed to be
2222 * dynamically uniform. This should not be present until GLSL ES 3.20, GLSL
2223 * 4.00, or ARB_gpu_shader5, because in ES 3.10 and GL 3.30 samplers said
2224 * "When aggregated into arrays within a shader, samplers can only be indexed
2225 * with a constant integral expression."
2226 */
2227 nir_tex_src_sampler_offset,
2228
2229 /** Bindless texture handle
2230 *
2231 * This is, unfortunately, a bit overloaded at the moment. There are
2232 * generally two types of bindless handles:
2233 *
2234 * 1. For GL_ARB_bindless bindless handles. These are part of the
2235 * GL/Gallium-level API and are always a 64-bit integer.
2236 *
2237 * 2. HW-specific handles. GL_ARB_bindless handles may be lowered to
2238 * these. Also, these are used by many Vulkan drivers to implement
2239 * descriptor sets, especially for UPDATE_AFTER_BIND descriptors.
2240 * The details of hardware handles (bit size, format, etc.) is
2241 * HW-specific.
2242 *
2243 * Because of this overloading and the resulting ambiguity, we currently
2244 * don't validate anything for these.
2245 */
2246 nir_tex_src_texture_handle,
2247
2248 /** Bindless sampler handle
2249 *
2250 * See nir_tex_src_texture_handle,
2251 */
2252 nir_tex_src_sampler_handle,
2253
2254 /** Plane index for multi-plane YCbCr textures */
2255 nir_tex_src_plane,
2256
2257 /**
2258 * Backend-specific vec4 tex src argument.
2259 *
2260 * Can be used to have NIR optimization (copy propagation, lower_vec_to_regs)
2261 * apply to the packing of the tex srcs. This lowering must only happen
2262 * after nir_lower_tex().
2263 *
2264 * The nir_tex_instr_src_type() of this argument is float, so no lowering
2265 * will happen if nir_lower_int_to_float is used.
2266 */
2267 nir_tex_src_backend1,
2268
2269 /** Second backend-specific vec4 tex src argument, see nir_tex_src_backend1. */
2270 nir_tex_src_backend2,
2271
2272 nir_num_tex_src_types
2273 } nir_tex_src_type;
2274
2275 /** A texture instruction source */
2276 typedef struct nir_tex_src {
2277 /** Base source */
2278 nir_src src;
2279
2280 /** Type of this source */
2281 nir_tex_src_type src_type;
2282 } nir_tex_src;
2283
2284 /** Texture instruction opcode */
2285 typedef enum nir_texop {
2286 /** Regular texture look-up */
2287 nir_texop_tex,
2288 /** Texture look-up with LOD bias */
2289 nir_texop_txb,
2290 /** Texture look-up with explicit LOD */
2291 nir_texop_txl,
2292 /** Texture look-up with partial derivatives */
2293 nir_texop_txd,
2294 /** Texel fetch with explicit LOD */
2295 nir_texop_txf,
2296 /** Multisample texture fetch */
2297 nir_texop_txf_ms,
2298 /** Multisample texture fetch from framebuffer */
2299 nir_texop_txf_ms_fb,
2300 /** Multisample compression value fetch */
2301 nir_texop_txf_ms_mcs_intel,
2302 /** Texture size */
2303 nir_texop_txs,
2304 /** Texture lod query */
2305 nir_texop_lod,
2306 /** Texture gather */
2307 nir_texop_tg4,
2308 /** Texture levels query */
2309 nir_texop_query_levels,
2310 /** Texture samples query */
2311 nir_texop_texture_samples,
2312 /** Query whether all samples are definitely identical. */
2313 nir_texop_samples_identical,
2314 /** Regular texture look-up, eligible for pre-dispatch */
2315 nir_texop_tex_prefetch,
2316 /** Multisample fragment color texture fetch */
2317 nir_texop_fragment_fetch_amd,
2318 /** Multisample fragment mask texture fetch */
2319 nir_texop_fragment_mask_fetch_amd,
2320 /** Returns a buffer or image descriptor. */
2321 nir_texop_descriptor_amd,
2322 /** Returns a sampler descriptor. */
2323 nir_texop_sampler_descriptor_amd,
2324 /** Returns the sampler's LOD bias */
2325 nir_texop_lod_bias_agx,
2326 /** Maps to TXQ.DIMENSION */
2327 nir_texop_hdr_dim_nv,
2328 /** Maps to TXQ.TEXTURE_TYPE */
2329 nir_texop_tex_type_nv,
2330 } nir_texop;
2331
2332 /** Represents a texture instruction */
2333 typedef struct nir_tex_instr {
2334 /** Base instruction */
2335 nir_instr instr;
2336
2337 /** Dimensionality of the texture operation
2338 *
2339 * This will typically match the dimensionality of the texture deref type
2340 * if a nir_tex_src_texture_deref is present. However, it may not if
2341 * texture lowering has occurred.
2342 */
2343 enum glsl_sampler_dim sampler_dim;
2344
2345 /** ALU type of the destination
2346 *
2347 * This is the canonical sampled type for this texture operation and may
2348 * not exactly match the sampled type of the deref type when a
2349 * nir_tex_src_texture_deref is present. For OpenCL, the sampled type of
2350 * the texture deref will be GLSL_TYPE_VOID and this is allowed to be
2351 * anything. With SPIR-V, the signedness of integer types is allowed to
2352 * differ. For all APIs, the bit size may differ if the driver has done
2353 * any sort of mediump or similar lowering since texture types always have
2354 * 32-bit sampled types.
2355 */
2356 nir_alu_type dest_type;
2357
2358 /** Texture opcode */
2359 nir_texop op;
2360
2361 /** Destination */
2362 nir_def def;
2363
2364 /** Array of sources
2365 *
2366 * This array has :c:member:`nir_tex_instr.num_srcs` elements
2367 */
2368 nir_tex_src *src;
2369
2370 /** Number of sources */
2371 unsigned num_srcs;
2372
2373 /** Number of components in the coordinate, if any */
2374 unsigned coord_components;
2375
2376 /** True if the texture instruction acts on an array texture */
2377 bool is_array;
2378
2379 /** True if the texture instruction performs a shadow comparison
2380 *
2381 * If this is true, the texture instruction must have a
2382 * nir_tex_src_comparator.
2383 */
2384 bool is_shadow;
2385
2386 /**
2387 * If is_shadow is true, whether this is the old-style shadow that outputs
2388 * 4 components or the new-style shadow that outputs 1 component.
2389 */
2390 bool is_new_style_shadow;
2391
2392 /**
2393 * True if this texture instruction should return a sparse residency code.
2394 * The code is in the last component of the result.
2395 */
2396 bool is_sparse;
2397
2398 /** nir_texop_tg4 component selector
2399 *
2400 * This determines which RGBA component is gathered.
2401 */
2402 unsigned component : 2;
2403
2404 /** Validation needs to know this for gradient component count */
2405 unsigned array_is_lowered_cube : 1;
2406
2407 /** True if this tg4 instruction has an implicit LOD or LOD bias, instead of using level 0 */
2408 unsigned is_gather_implicit_lod : 1;
2409
2410 /** Gather offsets */
2411 int8_t tg4_offsets[4][2];
2412
2413 /** True if the texture index or handle is not dynamically uniform */
2414 bool texture_non_uniform;
2415
2416 /** True if the sampler index or handle is not dynamically uniform.
2417 *
2418 * This may be set when VK_EXT_descriptor_indexing is supported and the
2419 * appropriate capability is enabled.
2420 *
2421 * This should always be false in GLSL (GLSL ES 3.20 says "When aggregated
2422 * into arrays within a shader, opaque types can only be indexed with a
2423 * dynamically uniform integral expression", and GLSL 4.60 says "When
2424 * aggregated into arrays within a shader, [texture, sampler, and
2425 * samplerShadow] types can only be indexed with a dynamically uniform
2426 * expression, or texture lookup will result in undefined values.").
2427 */
2428 bool sampler_non_uniform;
2429
2430 /** The texture index
2431 *
2432 * If this texture instruction has a nir_tex_src_texture_offset source,
2433 * then the texture index is given by texture_index + texture_offset.
2434 */
2435 unsigned texture_index;
2436
2437 /** The sampler index
2438 *
2439 * The following operations do not require a sampler and, as such, this
2440 * field should be ignored:
2441 *
2442 * - nir_texop_txf
2443 * - nir_texop_txf_ms
2444 * - nir_texop_txs
2445 * - nir_texop_query_levels
2446 * - nir_texop_texture_samples
2447 * - nir_texop_samples_identical
2448 *
2449 * If this texture instruction has a nir_tex_src_sampler_offset source,
2450 * then the sampler index is given by sampler_index + sampler_offset.
2451 */
2452 unsigned sampler_index;
2453
2454 /* Back-end specific flags, intended to be used in combination with
2455 * nir_tex_src_backend1/2 to provide additional hw-specific information
2456 * to the back-end compiler.
2457 */
2458 uint32_t backend_flags;
2459 } nir_tex_instr;
2460
2461 /**
2462 * Returns true if the texture operation requires a sampler as a general rule
2463 *
2464 * Note that the specific hw/driver backend could require to a sampler
2465 * object/configuration packet in any case, for some other reason.
2466 *
2467 * See also :c:member:`nir_tex_instr.sampler_index`.
2468 */
2469 bool nir_tex_instr_need_sampler(const nir_tex_instr *instr);
2470
2471 /** Returns the number of components returned by this nir_tex_instr
2472 *
2473 * Useful for code building texture instructions when you don't want to think
2474 * about how many components a particular texture op returns. This does not
2475 * include the sparse residency code.
2476 */
2477 unsigned
2478 nir_tex_instr_result_size(const nir_tex_instr *instr);
2479
2480 /**
2481 * Returns the destination size of this nir_tex_instr including the sparse
2482 * residency code, if any.
2483 */
2484 static inline unsigned
nir_tex_instr_dest_size(const nir_tex_instr * instr)2485 nir_tex_instr_dest_size(const nir_tex_instr *instr)
2486 {
2487 /* One more component is needed for the residency code. */
2488 return nir_tex_instr_result_size(instr) + instr->is_sparse;
2489 }
2490
2491 /**
2492 * Returns true if this texture operation queries something about the texture
2493 * rather than actually sampling it.
2494 */
2495 bool
2496 nir_tex_instr_is_query(const nir_tex_instr *instr);
2497
2498 /** Returns true if this texture instruction does implicit derivatives
2499 *
2500 * This is important as there are extra control-flow rules around derivatives
2501 * and texture instructions which perform them implicitly.
2502 */
2503 bool
2504 nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr);
2505
2506 /** Returns the ALU type of the given texture instruction source */
2507 nir_alu_type
2508 nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src);
2509
2510 /**
2511 * Returns the number of components required by the given texture instruction
2512 * source
2513 */
2514 unsigned
2515 nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src);
2516
2517 /**
2518 * Returns the index of the texture instruction source with the given
2519 * nir_tex_src_type or -1 if no such source exists.
2520 */
2521 static inline int
nir_tex_instr_src_index(const nir_tex_instr * instr,nir_tex_src_type type)2522 nir_tex_instr_src_index(const nir_tex_instr *instr, nir_tex_src_type type)
2523 {
2524 for (unsigned i = 0; i < instr->num_srcs; i++)
2525 if (instr->src[i].src_type == type)
2526 return (int)i;
2527
2528 return -1;
2529 }
2530
2531 /** Adds a source to a texture instruction */
2532 void nir_tex_instr_add_src(nir_tex_instr *tex,
2533 nir_tex_src_type src_type,
2534 nir_def *src);
2535
2536 /** Removes a source from a texture instruction */
2537 void nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx);
2538
2539 bool nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex);
2540
2541 typedef struct {
2542 nir_instr instr;
2543
2544 nir_def def;
2545
2546 nir_const_value value[];
2547 } nir_load_const_instr;
2548
2549 typedef enum {
2550 /** Return from a function
2551 *
2552 * This instruction is a classic function return. It jumps to
2553 * nir_function_impl::end_block. No return value is provided in this
2554 * instruction. Instead, the function is expected to write any return
2555 * data to a deref passed in from the caller.
2556 */
2557 nir_jump_return,
2558
2559 /** Immediately exit the current shader
2560 *
2561 * This instruction is roughly the equivalent of C's "exit()" in that it
2562 * immediately terminates the current shader invocation. From a CFG
2563 * perspective, it looks like a jump to nir_function_impl::end_block but
2564 * it actually jumps to the end block of the shader entrypoint. A halt
2565 * instruction in the shader entrypoint itself is semantically identical
2566 * to a return.
2567 *
2568 * For shaders with built-in I/O, any outputs written prior to a halt
2569 * instruction remain written and any outputs not written prior to the
2570 * halt have undefined values. It does NOT cause an implicit discard of
2571 * written results. If one wants discard results in a fragment shader,
2572 * for instance, a discard or demote intrinsic is required.
2573 */
2574 nir_jump_halt,
2575
2576 /** Break out of the inner-most loop
2577 *
2578 * This has the same semantics as C's "break" statement.
2579 */
2580 nir_jump_break,
2581
2582 /** Jump back to the top of the inner-most loop
2583 *
2584 * This has the same semantics as C's "continue" statement assuming that a
2585 * NIR loop is implemented as "while (1) { body }".
2586 */
2587 nir_jump_continue,
2588
2589 /** Jumps for unstructured CFG.
2590 *
2591 * As within an unstructured CFG we can't rely on block ordering we need to
2592 * place explicit jumps at the end of every block.
2593 */
2594 nir_jump_goto,
2595 nir_jump_goto_if,
2596 } nir_jump_type;
2597
2598 typedef struct {
2599 nir_instr instr;
2600 nir_jump_type type;
2601 nir_src condition;
2602 struct nir_block *target;
2603 struct nir_block *else_target;
2604 } nir_jump_instr;
2605
2606 /* creates a new SSA variable in an undefined state */
2607
2608 typedef struct {
2609 nir_instr instr;
2610 nir_def def;
2611 } nir_undef_instr;
2612
2613 typedef struct {
2614 struct exec_node node;
2615
2616 /* The predecessor block corresponding to this source */
2617 struct nir_block *pred;
2618
2619 nir_src src;
2620 } nir_phi_src;
2621
2622 #define nir_foreach_phi_src(phi_src, phi) \
2623 foreach_list_typed(nir_phi_src, phi_src, node, &(phi)->srcs)
2624 #define nir_foreach_phi_src_safe(phi_src, phi) \
2625 foreach_list_typed_safe(nir_phi_src, phi_src, node, &(phi)->srcs)
2626
2627 typedef struct {
2628 nir_instr instr;
2629
2630 /** list of nir_phi_src */
2631 struct exec_list srcs;
2632
2633 nir_def def;
2634 } nir_phi_instr;
2635
2636 static inline nir_phi_src *
nir_phi_get_src_from_block(nir_phi_instr * phi,struct nir_block * block)2637 nir_phi_get_src_from_block(nir_phi_instr *phi, struct nir_block *block)
2638 {
2639 nir_foreach_phi_src(src, phi) {
2640 if (src->pred == block)
2641 return src;
2642 }
2643
2644 assert(!"Block is not a predecessor of phi.");
2645 return NULL;
2646 }
2647
2648 typedef struct {
2649 struct exec_node node;
2650 bool src_is_reg;
2651 bool dest_is_reg;
2652 nir_src src;
2653 union {
2654 nir_def def;
2655 nir_src reg;
2656 } dest;
2657 } nir_parallel_copy_entry;
2658
2659 #define nir_foreach_parallel_copy_entry(entry, pcopy) \
2660 foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries)
2661
2662 typedef struct {
2663 nir_instr instr;
2664
2665 /* A list of nir_parallel_copy_entrys. The sources of all of the
2666 * entries are copied to the corresponding destinations "in parallel".
2667 * In other words, if we have two entries: a -> b and b -> a, the values
2668 * get swapped.
2669 */
2670 struct exec_list entries;
2671 } nir_parallel_copy_instr;
2672
2673 NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr,
2674 type, nir_instr_type_alu)
2675 NIR_DEFINE_CAST(nir_instr_as_deref, nir_instr, nir_deref_instr, instr,
2676 type, nir_instr_type_deref)
2677 NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr,
2678 type, nir_instr_type_call)
2679 NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr,
2680 type, nir_instr_type_jump)
2681 NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr,
2682 type, nir_instr_type_tex)
2683 NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr,
2684 type, nir_instr_type_intrinsic)
2685 NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr,
2686 type, nir_instr_type_load_const)
2687 NIR_DEFINE_CAST(nir_instr_as_undef, nir_instr, nir_undef_instr, instr,
2688 type, nir_instr_type_undef)
2689 NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr,
2690 type, nir_instr_type_phi)
2691 NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr,
2692 nir_parallel_copy_instr, instr,
2693 type, nir_instr_type_parallel_copy)
2694
2695 #define NIR_DEFINE_SRC_AS_CONST(type, suffix) \
2696 static inline type \
2697 nir_src_comp_as_##suffix(nir_src src, unsigned comp) \
2698 { \
2699 assert(nir_src_is_const(src)); \
2700 nir_load_const_instr *load = \
2701 nir_instr_as_load_const(src.ssa->parent_instr); \
2702 assert(comp < load->def.num_components); \
2703 return nir_const_value_as_##suffix(load->value[comp], \
2704 load->def.bit_size); \
2705 } \
2706 \
2707 static inline type \
2708 nir_src_as_##suffix(nir_src src) \
2709 { \
2710 assert(nir_src_num_components(src) == 1); \
2711 return nir_src_comp_as_##suffix(src, 0); \
2712 }
2713
2714 NIR_DEFINE_SRC_AS_CONST(int64_t, int)
2715 NIR_DEFINE_SRC_AS_CONST(uint64_t, uint)
2716 NIR_DEFINE_SRC_AS_CONST(bool, bool)
2717 NIR_DEFINE_SRC_AS_CONST(double, float)
2718
2719 #undef NIR_DEFINE_SRC_AS_CONST
2720
2721 typedef struct {
2722 nir_def *def;
2723 unsigned comp;
2724 } nir_scalar;
2725
2726 static inline bool
nir_scalar_is_const(nir_scalar s)2727 nir_scalar_is_const(nir_scalar s)
2728 {
2729 return s.def->parent_instr->type == nir_instr_type_load_const;
2730 }
2731
2732 static inline bool
nir_scalar_is_undef(nir_scalar s)2733 nir_scalar_is_undef(nir_scalar s)
2734 {
2735 return s.def->parent_instr->type == nir_instr_type_undef;
2736 }
2737
2738 static inline nir_const_value
nir_scalar_as_const_value(nir_scalar s)2739 nir_scalar_as_const_value(nir_scalar s)
2740 {
2741 assert(s.comp < s.def->num_components);
2742 nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr);
2743 return load->value[s.comp];
2744 }
2745
2746 #define NIR_DEFINE_SCALAR_AS_CONST(type, suffix) \
2747 static inline type \
2748 nir_scalar_as_##suffix(nir_scalar s) \
2749 { \
2750 return nir_const_value_as_##suffix( \
2751 nir_scalar_as_const_value(s), s.def->bit_size); \
2752 }
2753
NIR_DEFINE_SCALAR_AS_CONST(int64_t,int)2754 NIR_DEFINE_SCALAR_AS_CONST(int64_t, int)
2755 NIR_DEFINE_SCALAR_AS_CONST(uint64_t, uint)
2756 NIR_DEFINE_SCALAR_AS_CONST(bool, bool)
2757 NIR_DEFINE_SCALAR_AS_CONST(double, float)
2758
2759 #undef NIR_DEFINE_SCALAR_AS_CONST
2760
2761 static inline bool
2762 nir_scalar_is_alu(nir_scalar s)
2763 {
2764 return s.def->parent_instr->type == nir_instr_type_alu;
2765 }
2766
2767 static inline nir_op
nir_scalar_alu_op(nir_scalar s)2768 nir_scalar_alu_op(nir_scalar s)
2769 {
2770 return nir_instr_as_alu(s.def->parent_instr)->op;
2771 }
2772
2773 static inline bool
nir_scalar_is_intrinsic(nir_scalar s)2774 nir_scalar_is_intrinsic(nir_scalar s)
2775 {
2776 return s.def->parent_instr->type == nir_instr_type_intrinsic;
2777 }
2778
2779 static inline nir_intrinsic_op
nir_scalar_intrinsic_op(nir_scalar s)2780 nir_scalar_intrinsic_op(nir_scalar s)
2781 {
2782 return nir_instr_as_intrinsic(s.def->parent_instr)->intrinsic;
2783 }
2784
2785 static inline nir_scalar
nir_scalar_chase_alu_src(nir_scalar s,unsigned alu_src_idx)2786 nir_scalar_chase_alu_src(nir_scalar s, unsigned alu_src_idx)
2787 {
2788 nir_scalar out = { NULL, 0 };
2789
2790 nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
2791 assert(alu_src_idx < nir_op_infos[alu->op].num_inputs);
2792
2793 /* Our component must be written */
2794 assert(s.comp < s.def->num_components);
2795
2796 out.def = alu->src[alu_src_idx].src.ssa;
2797
2798 if (nir_op_infos[alu->op].input_sizes[alu_src_idx] == 0) {
2799 /* The ALU src is unsized so the source component follows the
2800 * destination component.
2801 */
2802 out.comp = alu->src[alu_src_idx].swizzle[s.comp];
2803 } else {
2804 /* This is a sized source so all source components work together to
2805 * produce all the destination components. Since we need to return a
2806 * scalar, this only works if the source is a scalar.
2807 */
2808 assert(nir_op_infos[alu->op].input_sizes[alu_src_idx] == 1);
2809 out.comp = alu->src[alu_src_idx].swizzle[0];
2810 }
2811 assert(out.comp < out.def->num_components);
2812
2813 return out;
2814 }
2815
2816 nir_scalar nir_scalar_chase_movs(nir_scalar s);
2817
2818 static inline nir_scalar
nir_get_scalar(nir_def * def,unsigned channel)2819 nir_get_scalar(nir_def *def, unsigned channel)
2820 {
2821 nir_scalar s = { def, channel };
2822 return s;
2823 }
2824
2825 /** Returns a nir_scalar where we've followed the bit-exact mov/vec use chain to the original definition */
2826 static inline nir_scalar
nir_scalar_resolved(nir_def * def,unsigned channel)2827 nir_scalar_resolved(nir_def *def, unsigned channel)
2828 {
2829 return nir_scalar_chase_movs(nir_get_scalar(def, channel));
2830 }
2831
2832 static inline bool
nir_scalar_equal(nir_scalar s1,nir_scalar s2)2833 nir_scalar_equal(nir_scalar s1, nir_scalar s2)
2834 {
2835 return s1.def == s2.def && s1.comp == s2.comp;
2836 }
2837
2838 static inline uint64_t
nir_alu_src_as_uint(nir_alu_src src)2839 nir_alu_src_as_uint(nir_alu_src src)
2840 {
2841 nir_scalar scalar = nir_get_scalar(src.src.ssa, src.swizzle[0]);
2842 return nir_scalar_as_uint(scalar);
2843 }
2844
2845 typedef struct {
2846 bool success;
2847
2848 nir_variable *var;
2849 unsigned desc_set;
2850 unsigned binding;
2851 unsigned num_indices;
2852 nir_src indices[4];
2853 bool read_first_invocation;
2854 } nir_binding;
2855
2856 nir_binding nir_chase_binding(nir_src rsrc);
2857 nir_variable *nir_get_binding_variable(struct nir_shader *shader, nir_binding binding);
2858
2859 /*
2860 * Control flow
2861 *
2862 * Control flow consists of a tree of control flow nodes, which include
2863 * if-statements and loops. The leaves of the tree are basic blocks, lists of
2864 * instructions that always run start-to-finish. Each basic block also keeps
2865 * track of its successors (blocks which may run immediately after the current
2866 * block) and predecessors (blocks which could have run immediately before the
2867 * current block). Each function also has a start block and an end block which
2868 * all return statements point to (which is always empty). Together, all the
2869 * blocks with their predecessors and successors make up the control flow
2870 * graph (CFG) of the function. There are helpers that modify the tree of
2871 * control flow nodes while modifying the CFG appropriately; these should be
2872 * used instead of modifying the tree directly.
2873 */
2874
2875 typedef enum {
2876 nir_cf_node_block,
2877 nir_cf_node_if,
2878 nir_cf_node_loop,
2879 nir_cf_node_function
2880 } nir_cf_node_type;
2881
2882 typedef struct nir_cf_node {
2883 struct exec_node node;
2884 nir_cf_node_type type;
2885 struct nir_cf_node *parent;
2886 } nir_cf_node;
2887
2888 typedef struct nir_block {
2889 nir_cf_node cf_node;
2890
2891 /** list of nir_instr */
2892 struct exec_list instr_list;
2893
2894 /** generic block index; generated by nir_index_blocks */
2895 unsigned index;
2896
2897 /*
2898 * Each block can only have up to 2 successors, so we put them in a simple
2899 * array - no need for anything more complicated.
2900 */
2901 struct nir_block *successors[2];
2902
2903 /* Set of nir_block predecessors in the CFG */
2904 struct set *predecessors;
2905
2906 /*
2907 * this node's immediate dominator in the dominance tree - set to NULL for
2908 * the start block.
2909 */
2910 struct nir_block *imm_dom;
2911
2912 /* This node's children in the dominance tree */
2913 unsigned num_dom_children;
2914 struct nir_block **dom_children;
2915
2916 /* Set of nir_blocks on the dominance frontier of this block */
2917 struct set *dom_frontier;
2918
2919 /*
2920 * These two indices have the property that dom_{pre,post}_index for each
2921 * child of this block in the dominance tree will always be between
2922 * dom_pre_index and dom_post_index for this block, which makes testing if
2923 * a given block is dominated by another block an O(1) operation.
2924 */
2925 uint32_t dom_pre_index, dom_post_index;
2926
2927 /**
2928 * Value just before the first nir_instr->index in the block, but after
2929 * end_ip that of any predecessor block.
2930 */
2931 uint32_t start_ip;
2932 /**
2933 * Value just after the last nir_instr->index in the block, but before the
2934 * start_ip of any successor block.
2935 */
2936 uint32_t end_ip;
2937
2938 /* SSA def live in and out for this block; used for liveness analysis.
2939 * Indexed by ssa_def->index
2940 */
2941 BITSET_WORD *live_in;
2942 BITSET_WORD *live_out;
2943 } nir_block;
2944
2945 static inline bool
nir_block_is_reachable(nir_block * b)2946 nir_block_is_reachable(nir_block *b)
2947 {
2948 /* See also nir_block_dominates */
2949 return b->dom_post_index != 0;
2950 }
2951
2952 static inline nir_instr *
nir_block_first_instr(nir_block * block)2953 nir_block_first_instr(nir_block *block)
2954 {
2955 struct exec_node *head = exec_list_get_head(&block->instr_list);
2956 return exec_node_data(nir_instr, head, node);
2957 }
2958
2959 static inline nir_instr *
nir_block_last_instr(nir_block * block)2960 nir_block_last_instr(nir_block *block)
2961 {
2962 struct exec_node *tail = exec_list_get_tail(&block->instr_list);
2963 return exec_node_data(nir_instr, tail, node);
2964 }
2965
2966 static inline bool
nir_block_ends_in_jump(nir_block * block)2967 nir_block_ends_in_jump(nir_block *block)
2968 {
2969 return !exec_list_is_empty(&block->instr_list) &&
2970 nir_block_last_instr(block)->type == nir_instr_type_jump;
2971 }
2972
2973 static inline bool
nir_block_ends_in_return_or_halt(nir_block * block)2974 nir_block_ends_in_return_or_halt(nir_block *block)
2975 {
2976 if (exec_list_is_empty(&block->instr_list))
2977 return false;
2978
2979 nir_instr *instr = nir_block_last_instr(block);
2980 if (instr->type != nir_instr_type_jump)
2981 return false;
2982
2983 nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
2984 return jump_instr->type == nir_jump_return ||
2985 jump_instr->type == nir_jump_halt;
2986 }
2987
2988 static inline bool
nir_block_ends_in_break(nir_block * block)2989 nir_block_ends_in_break(nir_block *block)
2990 {
2991 if (exec_list_is_empty(&block->instr_list))
2992 return false;
2993
2994 nir_instr *instr = nir_block_last_instr(block);
2995 return instr->type == nir_instr_type_jump &&
2996 nir_instr_as_jump(instr)->type == nir_jump_break;
2997 }
2998
2999 #define nir_foreach_instr(instr, block) \
3000 foreach_list_typed(nir_instr, instr, node, &(block)->instr_list)
3001 #define nir_foreach_instr_reverse(instr, block) \
3002 foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list)
3003 #define nir_foreach_instr_safe(instr, block) \
3004 foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list)
3005 #define nir_foreach_instr_reverse_safe(instr, block) \
3006 foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list)
3007
3008 /* Phis come first in the block */
3009 static inline nir_phi_instr *
nir_first_phi_in_block(nir_block * block)3010 nir_first_phi_in_block(nir_block *block)
3011 {
3012 nir_foreach_instr(instr, block) {
3013 if (instr->type == nir_instr_type_phi)
3014 return nir_instr_as_phi(instr);
3015 else
3016 return NULL;
3017 }
3018
3019 return NULL;
3020 }
3021
3022 static inline nir_phi_instr *
nir_next_phi(nir_phi_instr * phi)3023 nir_next_phi(nir_phi_instr *phi)
3024 {
3025 nir_instr *next = nir_instr_next(&phi->instr);
3026
3027 if (next && next->type == nir_instr_type_phi)
3028 return nir_instr_as_phi(next);
3029 else
3030 return NULL;
3031 }
3032
3033 #define nir_foreach_phi(instr, block) \
3034 for (nir_phi_instr *instr = nir_first_phi_in_block(block); instr != NULL; \
3035 instr = nir_next_phi(instr))
3036
3037 #define nir_foreach_phi_safe(instr, block) \
3038 for (nir_phi_instr *instr = nir_first_phi_in_block(block), \
3039 *__next = instr ? nir_next_phi(instr) : NULL; \
3040 instr != NULL; \
3041 instr = __next, __next = instr ? nir_next_phi(instr) : NULL)
3042
3043 static inline nir_phi_instr *
nir_block_last_phi_instr(nir_block * block)3044 nir_block_last_phi_instr(nir_block *block)
3045 {
3046 nir_phi_instr *last_phi = NULL;
3047 nir_foreach_phi(instr, block)
3048 last_phi = instr;
3049
3050 return last_phi;
3051 }
3052
3053 typedef enum {
3054 nir_selection_control_none = 0x0,
3055
3056 /**
3057 * Defined by SPIR-V spec 3.22 "Selection Control".
3058 * The application prefers to remove control flow.
3059 */
3060 nir_selection_control_flatten = 0x1,
3061
3062 /**
3063 * Defined by SPIR-V spec 3.22 "Selection Control".
3064 * The application prefers to keep control flow.
3065 */
3066 nir_selection_control_dont_flatten = 0x2,
3067
3068 /**
3069 * May be applied by the compiler stack when it knows
3070 * that a branch is divergent, and:
3071 * - either both the if and else are always taken
3072 * - the if or else is empty and the other is always taken
3073 */
3074 nir_selection_control_divergent_always_taken = 0x3,
3075 } nir_selection_control;
3076
3077 typedef struct nir_if {
3078 nir_cf_node cf_node;
3079 nir_src condition;
3080 nir_selection_control control;
3081
3082 /** list of nir_cf_node */
3083 struct exec_list then_list;
3084
3085 /** list of nir_cf_node */
3086 struct exec_list else_list;
3087 } nir_if;
3088
3089 typedef struct {
3090 nir_if *nif;
3091
3092 /** Instruction that generates nif::condition. */
3093 nir_instr *conditional_instr;
3094
3095 /** Block within ::nif that has the break instruction. */
3096 nir_block *break_block;
3097
3098 /** Last block for the then- or else-path that does not contain the break. */
3099 nir_block *continue_from_block;
3100
3101 /** True when ::break_block is in the else-path of ::nif. */
3102 bool continue_from_then;
3103 bool induction_rhs;
3104
3105 /* This is true if the terminators exact trip count is unknown. For
3106 * example:
3107 *
3108 * for (int i = 0; i < imin(x, 4); i++)
3109 * ...
3110 *
3111 * Here loop analysis would have set a max_trip_count of 4 however we dont
3112 * know for sure that this is the exact trip count.
3113 */
3114 bool exact_trip_count_unknown;
3115
3116 struct list_head loop_terminator_link;
3117 } nir_loop_terminator;
3118
3119 typedef struct {
3120 /* Induction variable. */
3121 nir_def *def;
3122
3123 /* Init statement with only uniform. */
3124 nir_src *init_src;
3125
3126 /* Update statement with only uniform. */
3127 nir_alu_src *update_src;
3128 } nir_loop_induction_variable;
3129
3130 typedef struct {
3131 /* Estimated cost (in number of instructions) of the loop */
3132 unsigned instr_cost;
3133
3134 /* Contains fp64 ops that will be lowered */
3135 bool has_soft_fp64;
3136
3137 /* Guessed trip count based on array indexing */
3138 unsigned guessed_trip_count;
3139
3140 /* Maximum number of times the loop is run (if known) */
3141 unsigned max_trip_count;
3142
3143 /* Do we know the exact number of times the loop will be run */
3144 bool exact_trip_count_known;
3145
3146 /* Unroll the loop regardless of its size */
3147 bool force_unroll;
3148
3149 /* Does the loop contain complex loop terminators, continues or other
3150 * complex behaviours? If this is true we can't rely on
3151 * loop_terminator_list to be complete or accurate.
3152 */
3153 bool complex_loop;
3154
3155 nir_loop_terminator *limiting_terminator;
3156
3157 /* A list of loop_terminators terminating this loop. */
3158 struct list_head loop_terminator_list;
3159
3160 /* array of induction variables for this loop */
3161 nir_loop_induction_variable *induction_vars;
3162 unsigned num_induction_vars;
3163 } nir_loop_info;
3164
3165 typedef enum {
3166 nir_loop_control_none = 0x0,
3167 nir_loop_control_unroll = 0x1,
3168 nir_loop_control_dont_unroll = 0x2,
3169 } nir_loop_control;
3170
3171 typedef struct {
3172 nir_cf_node cf_node;
3173
3174 /** list of nir_cf_node */
3175 struct exec_list body;
3176
3177 /** (optional) list of nir_cf_node */
3178 struct exec_list continue_list;
3179
3180 nir_loop_info *info;
3181 nir_loop_control control;
3182 bool partially_unrolled;
3183 bool divergent;
3184 } nir_loop;
3185
3186 /**
3187 * Various bits of metadata that can may be created or required by
3188 * optimization and analysis passes
3189 */
3190 typedef enum {
3191 nir_metadata_none = 0x0,
3192
3193 /** Indicates that nir_block::index values are valid.
3194 *
3195 * The start block has index 0 and they increase through a natural walk of
3196 * the CFG. nir_function_impl::num_blocks is the number of blocks and
3197 * every block index is in the range [0, nir_function_impl::num_blocks].
3198 *
3199 * A pass can preserve this metadata type if it doesn't touch the CFG.
3200 */
3201 nir_metadata_block_index = 0x1,
3202
3203 /** Indicates that block dominance information is valid
3204 *
3205 * This includes:
3206 *
3207 * - nir_block::num_dom_children
3208 * - nir_block::dom_children
3209 * - nir_block::dom_frontier
3210 * - nir_block::dom_pre_index
3211 * - nir_block::dom_post_index
3212 *
3213 * A pass can preserve this metadata type if it doesn't touch the CFG.
3214 */
3215 nir_metadata_dominance = 0x2,
3216
3217 /** Indicates that SSA def data-flow liveness information is valid
3218 *
3219 * This includes:
3220 *
3221 * - nir_block::live_in
3222 * - nir_block::live_out
3223 *
3224 * A pass can preserve this metadata type if it never adds or removes any
3225 * SSA defs or uses of SSA defs (most passes shouldn't preserve this
3226 * metadata type).
3227 */
3228 nir_metadata_live_defs = 0x4,
3229
3230 /** A dummy metadata value to track when a pass forgot to call
3231 * nir_metadata_preserve.
3232 *
3233 * A pass should always clear this value even if it doesn't make any
3234 * progress to indicate that it thought about preserving metadata.
3235 */
3236 nir_metadata_not_properly_reset = 0x8,
3237
3238 /** Indicates that loop analysis information is valid.
3239 *
3240 * This includes everything pointed to by nir_loop::info.
3241 *
3242 * A pass can preserve this metadata type if it is guaranteed to not affect
3243 * any loop metadata. However, since loop metadata includes things like
3244 * loop counts which depend on arithmetic in the loop, this is very hard to
3245 * determine. Most passes shouldn't preserve this metadata type.
3246 */
3247 nir_metadata_loop_analysis = 0x10,
3248
3249 /** Indicates that nir_instr::index values are valid.
3250 *
3251 * The start instruction has index 0 and they increase through a natural
3252 * walk of instructions in blocks in the CFG. The indices my have holes
3253 * after passes such as DCE.
3254 *
3255 * A pass can preserve this metadata type if it never adds or moves any
3256 * instructions (most passes shouldn't preserve this metadata type), but
3257 * can preserve it if it only removes instructions.
3258 */
3259 nir_metadata_instr_index = 0x20,
3260
3261 /** All metadata
3262 *
3263 * This includes all nir_metadata flags except not_properly_reset. Passes
3264 * which do not change the shader in any way should call
3265 *
3266 * nir_metadata_preserve(impl, nir_metadata_all);
3267 */
3268 nir_metadata_all = ~nir_metadata_not_properly_reset,
3269 } nir_metadata;
3270 MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_metadata)
3271
3272 typedef struct {
3273 nir_cf_node cf_node;
3274
3275 /** pointer to the function of which this is an implementation */
3276 struct nir_function *function;
3277
3278 /**
3279 * For entrypoints, a pointer to a nir_function_impl which runs before
3280 * it, once per draw or dispatch, communicating via store_preamble and
3281 * load_preamble intrinsics. If NULL then there is no preamble.
3282 */
3283 struct nir_function *preamble;
3284
3285 /** list of nir_cf_node */
3286 struct exec_list body;
3287
3288 nir_block *end_block;
3289
3290 /** list for all local variables in the function */
3291 struct exec_list locals;
3292
3293 /** next available SSA value index */
3294 unsigned ssa_alloc;
3295
3296 /* total number of basic blocks, only valid when block_index_dirty = false */
3297 unsigned num_blocks;
3298
3299 /** True if this nir_function_impl uses structured control-flow
3300 *
3301 * Structured nir_function_impls have different validation rules.
3302 */
3303 bool structured;
3304
3305 nir_metadata valid_metadata;
3306 } nir_function_impl;
3307
3308 #define nir_foreach_function_temp_variable(var, impl) \
3309 foreach_list_typed(nir_variable, var, node, &(impl)->locals)
3310
3311 #define nir_foreach_function_temp_variable_safe(var, impl) \
3312 foreach_list_typed_safe(nir_variable, var, node, &(impl)->locals)
3313
3314 ATTRIBUTE_RETURNS_NONNULL static inline nir_block *
nir_start_block(nir_function_impl * impl)3315 nir_start_block(nir_function_impl *impl)
3316 {
3317 return (nir_block *)impl->body.head_sentinel.next;
3318 }
3319
3320 ATTRIBUTE_RETURNS_NONNULL static inline nir_block *
nir_impl_last_block(nir_function_impl * impl)3321 nir_impl_last_block(nir_function_impl *impl)
3322 {
3323 return (nir_block *)impl->body.tail_sentinel.prev;
3324 }
3325
3326 static inline nir_cf_node *
nir_cf_node_next(nir_cf_node * node)3327 nir_cf_node_next(nir_cf_node *node)
3328 {
3329 struct exec_node *next = exec_node_get_next(&node->node);
3330 if (exec_node_is_tail_sentinel(next))
3331 return NULL;
3332 else
3333 return exec_node_data(nir_cf_node, next, node);
3334 }
3335
3336 static inline nir_cf_node *
nir_cf_node_prev(nir_cf_node * node)3337 nir_cf_node_prev(nir_cf_node *node)
3338 {
3339 struct exec_node *prev = exec_node_get_prev(&node->node);
3340 if (exec_node_is_head_sentinel(prev))
3341 return NULL;
3342 else
3343 return exec_node_data(nir_cf_node, prev, node);
3344 }
3345
3346 static inline bool
nir_cf_node_is_first(const nir_cf_node * node)3347 nir_cf_node_is_first(const nir_cf_node *node)
3348 {
3349 return exec_node_is_head_sentinel(node->node.prev);
3350 }
3351
3352 static inline bool
nir_cf_node_is_last(const nir_cf_node * node)3353 nir_cf_node_is_last(const nir_cf_node *node)
3354 {
3355 return exec_node_is_tail_sentinel(node->node.next);
3356 }
3357
NIR_DEFINE_CAST(nir_cf_node_as_block,nir_cf_node,nir_block,cf_node,type,nir_cf_node_block)3358 NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node,
3359 type, nir_cf_node_block)
3360 NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node,
3361 type, nir_cf_node_if)
3362 NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node,
3363 type, nir_cf_node_loop)
3364 NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node,
3365 nir_function_impl, cf_node, type, nir_cf_node_function)
3366
3367 static inline nir_block *
3368 nir_if_first_then_block(nir_if *if_stmt)
3369 {
3370 struct exec_node *head = exec_list_get_head(&if_stmt->then_list);
3371 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
3372 }
3373
3374 static inline nir_block *
nir_if_last_then_block(nir_if * if_stmt)3375 nir_if_last_then_block(nir_if *if_stmt)
3376 {
3377 struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list);
3378 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
3379 }
3380
3381 static inline nir_block *
nir_if_first_else_block(nir_if * if_stmt)3382 nir_if_first_else_block(nir_if *if_stmt)
3383 {
3384 struct exec_node *head = exec_list_get_head(&if_stmt->else_list);
3385 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
3386 }
3387
3388 static inline nir_block *
nir_if_last_else_block(nir_if * if_stmt)3389 nir_if_last_else_block(nir_if *if_stmt)
3390 {
3391 struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list);
3392 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
3393 }
3394
3395 static inline nir_block *
nir_loop_first_block(nir_loop * loop)3396 nir_loop_first_block(nir_loop *loop)
3397 {
3398 struct exec_node *head = exec_list_get_head(&loop->body);
3399 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
3400 }
3401
3402 static inline nir_block *
nir_loop_last_block(nir_loop * loop)3403 nir_loop_last_block(nir_loop *loop)
3404 {
3405 struct exec_node *tail = exec_list_get_tail(&loop->body);
3406 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
3407 }
3408
3409 static inline bool
nir_loop_has_continue_construct(const nir_loop * loop)3410 nir_loop_has_continue_construct(const nir_loop *loop)
3411 {
3412 return !exec_list_is_empty(&loop->continue_list);
3413 }
3414
3415 static inline nir_block *
nir_loop_first_continue_block(nir_loop * loop)3416 nir_loop_first_continue_block(nir_loop *loop)
3417 {
3418 assert(nir_loop_has_continue_construct(loop));
3419 struct exec_node *head = exec_list_get_head(&loop->continue_list);
3420 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
3421 }
3422
3423 static inline nir_block *
nir_loop_last_continue_block(nir_loop * loop)3424 nir_loop_last_continue_block(nir_loop *loop)
3425 {
3426 assert(nir_loop_has_continue_construct(loop));
3427 struct exec_node *tail = exec_list_get_tail(&loop->continue_list);
3428 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
3429 }
3430
3431 /**
3432 * Return the target block of a nir_jump_continue statement
3433 */
3434 static inline nir_block *
nir_loop_continue_target(nir_loop * loop)3435 nir_loop_continue_target(nir_loop *loop)
3436 {
3437 if (nir_loop_has_continue_construct(loop))
3438 return nir_loop_first_continue_block(loop);
3439 else
3440 return nir_loop_first_block(loop);
3441 }
3442
3443 /**
3444 * Return true if this list of cf_nodes contains a single empty block.
3445 */
3446 static inline bool
nir_cf_list_is_empty_block(struct exec_list * cf_list)3447 nir_cf_list_is_empty_block(struct exec_list *cf_list)
3448 {
3449 if (exec_list_is_singular(cf_list)) {
3450 struct exec_node *head = exec_list_get_head(cf_list);
3451 nir_block *block =
3452 nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
3453 return exec_list_is_empty(&block->instr_list);
3454 }
3455 return false;
3456 }
3457
3458 typedef struct {
3459 uint8_t num_components;
3460 uint8_t bit_size;
3461 } nir_parameter;
3462
3463 typedef struct nir_function {
3464 struct exec_node node;
3465
3466 const char *name;
3467 struct nir_shader *shader;
3468
3469 unsigned num_params;
3470 nir_parameter *params;
3471
3472 /** The implementation of this function.
3473 *
3474 * If the function is only declared and not implemented, this is NULL.
3475 *
3476 * Unless setting to NULL or NIR_SERIALIZE_FUNC_HAS_IMPL, set with
3477 * nir_function_set_impl to maintain IR invariants.
3478 */
3479 nir_function_impl *impl;
3480
3481 bool is_entrypoint;
3482 /* from SPIR-V linkage, only for libraries */
3483 bool is_exported;
3484 bool is_preamble;
3485 /* from SPIR-V function control */
3486 bool should_inline;
3487 bool dont_inline; /* from SPIR-V */
3488 } nir_function;
3489
3490 typedef enum {
3491 nir_lower_imul64 = (1 << 0),
3492 nir_lower_isign64 = (1 << 1),
3493 /** Lower all int64 modulus and division opcodes */
3494 nir_lower_divmod64 = (1 << 2),
3495 /** Lower all 64-bit umul_high and imul_high opcodes */
3496 nir_lower_imul_high64 = (1 << 3),
3497 nir_lower_bcsel64 = (1 << 4),
3498 nir_lower_icmp64 = (1 << 5),
3499 nir_lower_iadd64 = (1 << 6),
3500 nir_lower_iabs64 = (1 << 7),
3501 nir_lower_ineg64 = (1 << 8),
3502 nir_lower_logic64 = (1 << 9),
3503 nir_lower_minmax64 = (1 << 10),
3504 nir_lower_shift64 = (1 << 11),
3505 nir_lower_imul_2x32_64 = (1 << 12),
3506 nir_lower_extract64 = (1 << 13),
3507 nir_lower_ufind_msb64 = (1 << 14),
3508 nir_lower_bit_count64 = (1 << 15),
3509 nir_lower_subgroup_shuffle64 = (1 << 16),
3510 nir_lower_scan_reduce_bitwise64 = (1 << 17),
3511 nir_lower_scan_reduce_iadd64 = (1 << 18),
3512 nir_lower_vote_ieq64 = (1 << 19),
3513 nir_lower_usub_sat64 = (1 << 20),
3514 nir_lower_iadd_sat64 = (1 << 21),
3515 nir_lower_find_lsb64 = (1 << 22),
3516 nir_lower_conv64 = (1 << 23),
3517 } nir_lower_int64_options;
3518
3519 typedef enum {
3520 nir_lower_drcp = (1 << 0),
3521 nir_lower_dsqrt = (1 << 1),
3522 nir_lower_drsq = (1 << 2),
3523 nir_lower_dtrunc = (1 << 3),
3524 nir_lower_dfloor = (1 << 4),
3525 nir_lower_dceil = (1 << 5),
3526 nir_lower_dfract = (1 << 6),
3527 nir_lower_dround_even = (1 << 7),
3528 nir_lower_dmod = (1 << 8),
3529 nir_lower_dsub = (1 << 9),
3530 nir_lower_ddiv = (1 << 10),
3531 nir_lower_dsign = (1 << 11),
3532 nir_lower_dminmax = (1 << 12),
3533 nir_lower_dsat = (1 << 13),
3534 nir_lower_fp64_full_software = (1 << 14),
3535 } nir_lower_doubles_options;
3536
3537 typedef enum {
3538 nir_divergence_single_prim_per_subgroup = (1 << 0),
3539 nir_divergence_single_patch_per_tcs_subgroup = (1 << 1),
3540 nir_divergence_single_patch_per_tes_subgroup = (1 << 2),
3541 nir_divergence_view_index_uniform = (1 << 3),
3542 nir_divergence_single_frag_shading_rate_per_subgroup = (1 << 4),
3543 nir_divergence_multiple_workgroup_per_compute_subgroup = (1 << 5),
3544 nir_divergence_shader_record_ptr_uniform = (1 << 6),
3545 } nir_divergence_options;
3546
3547 typedef enum {
3548 /**
3549 * Whether a fragment shader can interpolate the same input multiple times
3550 * with different modes (smooth, noperspective) and locations (pixel,
3551 * centroid, sample, at_offset, at_sample), excluding the flat mode.
3552 *
3553 * This matches AMD GPU flexibility and limitations and is a superset of
3554 * the GL4 requirement that each input can be interpolated at its specified
3555 * location, and then also as centroid, at_offset, and at_sample.
3556 */
3557 nir_io_has_flexible_input_interpolation_except_flat = BITFIELD_BIT(0),
3558
3559 /* Options affecting the GLSL compiler are below. */
3560
3561 /**
3562 * Lower load_deref/store_deref to load_input/store_output/etc. intrinsics.
3563 * This is only affects GLSL compilation.
3564 */
3565 nir_io_glsl_lower_derefs = BITFIELD_BIT(16),
3566 } nir_io_options;
3567
3568 /** An instruction filtering callback
3569 *
3570 * Returns true if the instruction should be processed and false otherwise.
3571 */
3572 typedef bool (*nir_instr_filter_cb)(const nir_instr *, const void *);
3573
3574 /** A vectorization width callback
3575 *
3576 * Returns the maximum vectorization width per instruction.
3577 * 0, if the instruction must not be modified.
3578 *
3579 * The vectorization width must be a power of 2.
3580 */
3581 typedef uint8_t (*nir_vectorize_cb)(const nir_instr *, const void *);
3582
3583 typedef struct nir_shader_compiler_options {
3584 bool lower_fdiv;
3585 bool lower_ffma16;
3586 bool lower_ffma32;
3587 bool lower_ffma64;
3588 bool fuse_ffma16;
3589 bool fuse_ffma32;
3590 bool fuse_ffma64;
3591 bool lower_flrp16;
3592 bool lower_flrp32;
3593 /** Lowers flrp when it does not support doubles */
3594 bool lower_flrp64;
3595 bool lower_fpow;
3596 bool lower_fsat;
3597 bool lower_fsqrt;
3598 bool lower_sincos;
3599 bool lower_fmod;
3600 /** Lowers ibitfield_extract/ubitfield_extract. */
3601 bool lower_bitfield_extract;
3602 /** Lowers bitfield_insert. */
3603 bool lower_bitfield_insert;
3604 /** Lowers bitfield_reverse to shifts. */
3605 bool lower_bitfield_reverse;
3606 /** Lowers bit_count to shifts. */
3607 bool lower_bit_count;
3608 /** Lowers ifind_msb. */
3609 bool lower_ifind_msb;
3610 /** Lowers ufind_msb. */
3611 bool lower_ufind_msb;
3612 /** Lowers find_lsb to ufind_msb and logic ops */
3613 bool lower_find_lsb;
3614 bool lower_uadd_carry;
3615 bool lower_usub_borrow;
3616 /** Lowers imul_high/umul_high to 16-bit multiplies and carry operations. */
3617 bool lower_mul_high;
3618 /** lowers fneg to fmul(x, -1.0). Driver must call nir_opt_algebraic_late() */
3619 bool lower_fneg;
3620 /** lowers ineg to isub. Driver must call nir_opt_algebraic_late(). */
3621 bool lower_ineg;
3622 /** lowers fisnormal to alu ops. */
3623 bool lower_fisnormal;
3624
3625 /* lower {slt,sge,seq,sne} to {flt,fge,feq,fneu} + b2f: */
3626 bool lower_scmp;
3627
3628 /* lower b/fall_equalN/b/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */
3629 bool lower_vector_cmp;
3630
3631 /** enable rules to avoid bit ops */
3632 bool lower_bitops;
3633
3634 /** enables rules to lower isign to imin+imax */
3635 bool lower_isign;
3636
3637 /** enables rules to lower fsign to fsub and flt */
3638 bool lower_fsign;
3639
3640 /** enables rules to lower iabs to ineg+imax */
3641 bool lower_iabs;
3642
3643 /** enable rules that avoid generating umax from signed integer ops */
3644 bool lower_umax;
3645
3646 /** enable rules that avoid generating umin from signed integer ops */
3647 bool lower_umin;
3648
3649 /* lower fdph to fdot4 */
3650 bool lower_fdph;
3651
3652 /** lower fdot to fmul and fsum/fadd. */
3653 bool lower_fdot;
3654
3655 /* Does the native fdot instruction replicate its result for four
3656 * components? If so, then opt_algebraic_late will turn all fdotN
3657 * instructions into fdotN_replicated instructions.
3658 */
3659 bool fdot_replicates;
3660
3661 /** lowers ffloor to fsub+ffract: */
3662 bool lower_ffloor;
3663
3664 /** lowers ffract to fsub+ffloor: */
3665 bool lower_ffract;
3666
3667 /** lowers fceil to fneg+ffloor+fneg: */
3668 bool lower_fceil;
3669
3670 bool lower_ftrunc;
3671
3672 /** Lowers fround_even to ffract+feq+csel.
3673 *
3674 * Not correct in that it doesn't correctly handle the "_even" part of the
3675 * rounding, but good enough for DX9 array indexing handling on DX9-class
3676 * hardware.
3677 */
3678 bool lower_fround_even;
3679
3680 bool lower_ldexp;
3681
3682 bool lower_pack_half_2x16;
3683 bool lower_pack_unorm_2x16;
3684 bool lower_pack_snorm_2x16;
3685 bool lower_pack_unorm_4x8;
3686 bool lower_pack_snorm_4x8;
3687 bool lower_pack_64_2x32;
3688 bool lower_pack_64_4x16;
3689 bool lower_pack_32_2x16;
3690 bool lower_pack_64_2x32_split;
3691 bool lower_pack_32_2x16_split;
3692 bool lower_unpack_half_2x16;
3693 bool lower_unpack_unorm_2x16;
3694 bool lower_unpack_snorm_2x16;
3695 bool lower_unpack_unorm_4x8;
3696 bool lower_unpack_snorm_4x8;
3697 bool lower_unpack_64_2x32_split;
3698 bool lower_unpack_32_2x16_split;
3699
3700 bool lower_pack_split;
3701
3702 bool lower_extract_byte;
3703 bool lower_extract_word;
3704 bool lower_insert_byte;
3705 bool lower_insert_word;
3706
3707 bool lower_all_io_to_temps;
3708 bool lower_all_io_to_elements;
3709
3710 /* Indicates that the driver only has zero-based vertex id */
3711 bool vertex_id_zero_based;
3712
3713 /**
3714 * If enabled, gl_BaseVertex will be lowered as:
3715 * is_indexed_draw (~0/0) & firstvertex
3716 */
3717 bool lower_base_vertex;
3718
3719 /**
3720 * If enabled, gl_HelperInvocation will be lowered as:
3721 *
3722 * !((1 << sample_id) & sample_mask_in))
3723 *
3724 * This depends on some possibly hw implementation details, which may
3725 * not be true for all hw. In particular that the FS is only executed
3726 * for covered samples or for helper invocations. So, do not blindly
3727 * enable this option.
3728 *
3729 * Note: See also issue #22 in ARB_shader_image_load_store
3730 */
3731 bool lower_helper_invocation;
3732
3733 /**
3734 * Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
3735 *
3736 * gl_SampleMaskIn == 0 ---> gl_HelperInvocation
3737 * gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
3738 */
3739 bool optimize_sample_mask_in;
3740
3741 /**
3742 * Optimize boolean reductions of quad broadcasts. This should only be enabled if
3743 * nir_intrinsic_reduce supports INCLUDE_HELPERS.
3744 */
3745 bool optimize_quad_vote_to_reduce;
3746
3747 bool lower_cs_local_index_to_id;
3748 bool lower_cs_local_id_to_index;
3749
3750 /* Prevents lowering global_invocation_id to be in terms of workgroup_id */
3751 bool has_cs_global_id;
3752
3753 bool lower_device_index_to_zero;
3754
3755 /* Set if nir_lower_pntc_ytransform() should invert gl_PointCoord.
3756 * Either when frame buffer is flipped or GL_POINT_SPRITE_COORD_ORIGIN
3757 * is GL_LOWER_LEFT.
3758 */
3759 bool lower_wpos_pntc;
3760
3761 /**
3762 * Set if nir_op_[iu]hadd and nir_op_[iu]rhadd instructions should be
3763 * lowered to simple arithmetic.
3764 *
3765 * If this flag is set, the lowering will be applied to all bit-sizes of
3766 * these instructions.
3767 *
3768 * :c:member:`lower_hadd64`
3769 */
3770 bool lower_hadd;
3771
3772 /**
3773 * Set if only 64-bit nir_op_[iu]hadd and nir_op_[iu]rhadd instructions
3774 * should be lowered to simple arithmetic.
3775 *
3776 * If this flag is set, the lowering will be applied to only 64-bit
3777 * versions of these instructions.
3778 *
3779 * :c:member:`lower_hadd`
3780 */
3781 bool lower_hadd64;
3782
3783 /**
3784 * Set if nir_op_uadd_sat should be lowered to simple arithmetic.
3785 *
3786 * If this flag is set, the lowering will be applied to all bit-sizes of
3787 * these instructions.
3788 */
3789 bool lower_uadd_sat;
3790
3791 /**
3792 * Set if nir_op_usub_sat should be lowered to simple arithmetic.
3793 *
3794 * If this flag is set, the lowering will be applied to all bit-sizes of
3795 * these instructions.
3796 */
3797 bool lower_usub_sat;
3798
3799 /**
3800 * Set if nir_op_iadd_sat and nir_op_isub_sat should be lowered to simple
3801 * arithmetic.
3802 *
3803 * If this flag is set, the lowering will be applied to all bit-sizes of
3804 * these instructions.
3805 */
3806 bool lower_iadd_sat;
3807
3808 /**
3809 * Set if imul_32x16 and umul_32x16 should be lowered to simple
3810 * arithmetic.
3811 */
3812 bool lower_mul_32x16;
3813
3814 /**
3815 * Should IO be re-vectorized? Some scalar ISAs still operate on vec4's
3816 * for IO purposes and would prefer loads/stores be vectorized.
3817 */
3818 bool vectorize_io;
3819 bool vectorize_tess_levels;
3820 bool lower_to_scalar;
3821 nir_instr_filter_cb lower_to_scalar_filter;
3822
3823 /**
3824 * Disables potentially harmful algebraic transformations for architectures
3825 * with SIMD-within-a-register semantics.
3826 *
3827 * Note, to actually vectorize 16bit instructions, use nir_opt_vectorize()
3828 * with a suitable callback function.
3829 */
3830 bool vectorize_vec2_16bit;
3831
3832 /**
3833 * Should the linker unify inputs_read/outputs_written between adjacent
3834 * shader stages which are linked into a single program?
3835 */
3836 bool unify_interfaces;
3837
3838 /**
3839 * Should nir_lower_io() create load_interpolated_input intrinsics?
3840 *
3841 * If not, it generates regular load_input intrinsics and interpolation
3842 * information must be inferred from the list of input nir_variables.
3843 */
3844 bool use_interpolated_input_intrinsics;
3845
3846 /**
3847 * Whether nir_lower_io() will lower interpolateAt functions to
3848 * load_interpolated_input intrinsics.
3849 *
3850 * Unlike use_interpolated_input_intrinsics this will only lower these
3851 * functions and leave input load intrinsics untouched.
3852 */
3853 bool lower_interpolate_at;
3854
3855 /* Lowers when 32x32->64 bit multiplication is not supported */
3856 bool lower_mul_2x32_64;
3857
3858 /* Indicates that urol and uror are supported */
3859 bool has_rotate8;
3860 bool has_rotate16;
3861 bool has_rotate32;
3862
3863 /** Backend supports ternary addition */
3864 bool has_iadd3;
3865
3866 /**
3867 * Backend supports imul24, and would like to use it (when possible)
3868 * for address/offset calculation. If true, driver should call
3869 * nir_lower_amul(). (If not set, amul will automatically be lowered
3870 * to imul.)
3871 */
3872 bool has_imul24;
3873
3874 /** Backend supports umul24, if not set umul24 will automatically be lowered
3875 * to imul with masked inputs */
3876 bool has_umul24;
3877
3878 /** Backend supports 32-bit imad */
3879 bool has_imad32;
3880
3881 /** Backend supports umad24, if not set umad24 will automatically be lowered
3882 * to imul with masked inputs and iadd */
3883 bool has_umad24;
3884
3885 /* Backend supports fused comapre against zero and csel */
3886 bool has_fused_comp_and_csel;
3887
3888 /** Backend supports fsub, if not set fsub will automatically be lowered to
3889 * fadd(x, fneg(y)). If true, driver should call nir_opt_algebraic_late(). */
3890 bool has_fsub;
3891
3892 /** Backend supports isub, if not set isub will automatically be lowered to
3893 * iadd(x, ineg(y)). If true, driver should call nir_opt_algebraic_late(). */
3894 bool has_isub;
3895
3896 /** Backend supports pack_32_4x8 or pack_32_4x8_split. */
3897 bool has_pack_32_4x8;
3898
3899 /** Backend supports nir_load_texture_scale and prefers it over txs for nir
3900 * lowerings. */
3901 bool has_texture_scaling;
3902
3903 /** Backend supports sdot_4x8_iadd. */
3904 bool has_sdot_4x8;
3905
3906 /** Backend supports udot_4x8_uadd. */
3907 bool has_udot_4x8;
3908
3909 /** Backend supports sudot_4x8_iadd. */
3910 bool has_sudot_4x8;
3911
3912 /** Backend supports sdot_4x8_iadd_sat. */
3913 bool has_sdot_4x8_sat;
3914
3915 /** Backend supports udot_4x8_uadd_sat. */
3916 bool has_udot_4x8_sat;
3917
3918 /** Backend supports sudot_4x8_iadd_sat. */
3919 bool has_sudot_4x8_sat;
3920
3921 /** Backend supports sdot_2x16 and udot_2x16 opcodes. */
3922 bool has_dot_2x16;
3923
3924 /** Backend supports fmulz (and ffmaz if lower_ffma32=false) */
3925 bool has_fmulz;
3926
3927 /**
3928 * Backend supports fmulz (and ffmaz if lower_ffma32=false) but only if
3929 * FLOAT_CONTROLS_DENORM_PRESERVE_FP32 is not set
3930 */
3931 bool has_fmulz_no_denorms;
3932
3933 /** Backend supports 32bit ufind_msb_rev and ifind_msb_rev. */
3934 bool has_find_msb_rev;
3935
3936 /** Backend supports pack_half_2x16_rtz_split. */
3937 bool has_pack_half_2x16_rtz;
3938
3939 /** Backend supports bitz/bitnz. */
3940 bool has_bit_test;
3941
3942 /** Backend supports ubfe/ibfe. */
3943 bool has_bfe;
3944
3945 /** Backend supports bfm. */
3946 bool has_bfm;
3947
3948 /** Backend supports bfi. */
3949 bool has_bfi;
3950
3951 /** Backend supports bitfield_select. */
3952 bool has_bitfield_select;
3953
3954 /** Backend supports uclz. */
3955 bool has_uclz;
3956
3957 /** Backend support msad_u4x8. */
3958 bool has_msad;
3959
3960 /**
3961 * Is this the Intel vec4 backend?
3962 *
3963 * Used to inhibit algebraic optimizations that are known to be harmful on
3964 * the Intel vec4 backend. This is generally applicable to any
3965 * optimization that might cause more immediate values to be used in
3966 * 3-source (e.g., ffma and flrp) instructions.
3967 */
3968 bool intel_vec4;
3969
3970 /**
3971 * For most Intel GPUs, all ternary operations such as FMA and BFE cannot
3972 * have immediates, so two to three instructions may eventually be needed.
3973 */
3974 bool avoid_ternary_with_two_constants;
3975
3976 /** Whether 8-bit ALU is supported. */
3977 bool support_8bit_alu;
3978
3979 /** Whether 16-bit ALU is supported. */
3980 bool support_16bit_alu;
3981
3982 unsigned max_unroll_iterations;
3983 unsigned max_unroll_iterations_aggressive;
3984 unsigned max_unroll_iterations_fp64;
3985
3986 bool lower_uniforms_to_ubo;
3987
3988 /* If the precision is ignored, backends that don't handle
3989 * different precisions when passing data between stages and use
3990 * vectorized IO can pack more varyings when linking. */
3991 bool linker_ignore_precision;
3992
3993 /* Specifies if indirect sampler array access will trigger forced loop
3994 * unrolling.
3995 */
3996 bool force_indirect_unrolling_sampler;
3997
3998 /* Some older drivers don't support GLSL versions with the concept of flat
3999 * varyings and also don't support integers. This setting helps us avoid
4000 * marking varyings as flat and potentially having them changed to ints via
4001 * varying packing.
4002 */
4003 bool no_integers;
4004
4005 /**
4006 * Specifies which type of indirectly accessed variables should force
4007 * loop unrolling.
4008 */
4009 nir_variable_mode force_indirect_unrolling;
4010
4011 bool driver_functions;
4012
4013 nir_lower_int64_options lower_int64_options;
4014 nir_lower_doubles_options lower_doubles_options;
4015 nir_divergence_options divergence_analysis_options;
4016
4017 /**
4018 * The masks of shader stages that support indirect indexing with
4019 * load_input and store_output intrinsics. It's used by
4020 * nir_lower_io_passes.
4021 */
4022 uint8_t support_indirect_inputs;
4023 uint8_t support_indirect_outputs;
4024
4025 /**
4026 * Remove varying loaded from uniform, let fragment shader load the
4027 * uniform directly. GPU passing varying by memory can benifit from it
4028 * for sure; but GPU passing varying by on chip resource may not.
4029 * Because it saves on chip resource but may increase memory pressure when
4030 * fragment task is far more than vertex one, so better left it disabled.
4031 */
4032 bool lower_varying_from_uniform;
4033
4034 /** store the variable offset into the instrinsic range_base instead
4035 * of adding it to the image index.
4036 */
4037 bool lower_image_offset_to_range_base;
4038
4039 /** store the variable offset into the instrinsic range_base instead
4040 * of adding it to the atomic source
4041 */
4042 bool lower_atomic_offset_to_range_base;
4043
4044 /** Don't convert medium-precision casts (e.g. f2fmp) into concrete
4045 * type casts (e.g. f2f16).
4046 */
4047 bool preserve_mediump;
4048
4049 /** lowers fquantize2f16 to alu ops. */
4050 bool lower_fquantize2f16;
4051
4052 /** Lower f2f16 to f2f16_rtz when execution mode is not rtne. */
4053 bool force_f2f16_rtz;
4054
4055 /** Lower VARYING_SLOT_LAYER in FS to SYSTEM_VALUE_LAYER_ID. */
4056 bool lower_layer_fs_input_to_sysval;
4057
4058 /** Options determining lowering and behavior of inputs and outputs. */
4059 nir_io_options io_options;
4060
4061 /** Driver callback where drivers can define how to lower mediump.
4062 * Used by nir_lower_io_passes.
4063 */
4064 void (*lower_mediump_io)(struct nir_shader *nir);
4065 } nir_shader_compiler_options;
4066
4067 typedef struct nir_shader {
4068 gc_ctx *gctx;
4069
4070 /** list of uniforms (nir_variable) */
4071 struct exec_list variables;
4072
4073 /** Set of driver-specific options for the shader.
4074 *
4075 * The memory for the options is expected to be kept in a single static
4076 * copy by the driver.
4077 */
4078 const struct nir_shader_compiler_options *options;
4079
4080 /** Various bits of compile-time information about a given shader */
4081 struct shader_info info;
4082
4083 /** list of nir_function */
4084 struct exec_list functions;
4085
4086 /**
4087 * The size of the variable space for load_input_*, load_uniform_*, etc.
4088 * intrinsics. This is in back-end specific units which is likely one of
4089 * bytes, dwords, or vec4s depending on context and back-end.
4090 */
4091 unsigned num_inputs, num_uniforms, num_outputs;
4092
4093 /** Size in bytes of required implicitly bound global memory */
4094 unsigned global_mem_size;
4095
4096 /** Size in bytes of required scratch space */
4097 unsigned scratch_size;
4098
4099 /** Constant data associated with this shader.
4100 *
4101 * Constant data is loaded through load_constant intrinsics (as compared to
4102 * the NIR load_const instructions which have the constant value inlined
4103 * into them). This is usually generated by nir_opt_large_constants (so
4104 * shaders don't have to load_const into a temporary array when they want
4105 * to indirect on a const array).
4106 */
4107 void *constant_data;
4108 /** Size of the constant data associated with the shader, in bytes */
4109 unsigned constant_data_size;
4110
4111 struct nir_xfb_info *xfb_info;
4112
4113 unsigned printf_info_count;
4114 u_printf_info *printf_info;
4115 } nir_shader;
4116
4117 #define nir_foreach_function(func, shader) \
4118 foreach_list_typed(nir_function, func, node, &(shader)->functions)
4119
4120 #define nir_foreach_function_safe(func, shader) \
4121 foreach_list_typed_safe(nir_function, func, node, &(shader)->functions)
4122
4123 static inline nir_function *
nir_foreach_function_with_impl_first(const nir_shader * shader)4124 nir_foreach_function_with_impl_first(const nir_shader *shader)
4125 {
4126 foreach_list_typed(nir_function, func, node, &shader->functions) {
4127 if (func->impl != NULL)
4128 return func;
4129 }
4130
4131 return NULL;
4132 }
4133
4134 static inline nir_function_impl *
nir_foreach_function_with_impl_next(nir_function ** it)4135 nir_foreach_function_with_impl_next(nir_function **it)
4136 {
4137 foreach_list_typed_from(nir_function, func, node, _, (*it)->node.next) {
4138 if (func->impl != NULL) {
4139 *it = func;
4140 return func->impl;
4141 }
4142 }
4143
4144 return NULL;
4145 }
4146
4147 #define nir_foreach_function_with_impl(it, impl_it, shader) \
4148 for (nir_function *it = nir_foreach_function_with_impl_first(shader); \
4149 it != NULL; \
4150 it = NULL) \
4151 \
4152 for (nir_function_impl *impl_it = it->impl; \
4153 impl_it != NULL; \
4154 impl_it = nir_foreach_function_with_impl_next(&it))
4155
4156 /* Equivalent to
4157 *
4158 * nir_foreach_function(func, shader) {
4159 * if (func->impl != NULL) {
4160 * ...
4161 * }
4162 * }
4163 *
4164 * Carefully written to ensure break/continue work in the user code.
4165 */
4166
4167 #define nir_foreach_function_impl(it, shader) \
4168 nir_foreach_function_with_impl(_func_##it, it, shader)
4169
4170 static inline nir_function_impl *
nir_shader_get_entrypoint(const nir_shader * shader)4171 nir_shader_get_entrypoint(const nir_shader *shader)
4172 {
4173 nir_function *func = NULL;
4174
4175 nir_foreach_function(function, shader) {
4176 assert(func == NULL);
4177 if (function->is_entrypoint) {
4178 func = function;
4179 #ifndef NDEBUG
4180 break;
4181 #endif
4182 }
4183 }
4184
4185 if (!func)
4186 return NULL;
4187
4188 assert(func->num_params == 0);
4189 assert(func->impl);
4190 return func->impl;
4191 }
4192
4193 static inline nir_function *
nir_shader_get_function_for_name(const nir_shader * shader,const char * name)4194 nir_shader_get_function_for_name(const nir_shader *shader, const char *name)
4195 {
4196 nir_foreach_function(func, shader) {
4197 if (strcmp(func->name, name) == 0)
4198 return func;
4199 }
4200
4201 return NULL;
4202 }
4203
4204 /*
4205 * After all functions are forcibly inlined, these passes remove redundant
4206 * functions from a shader and library respectively.
4207 */
4208 void nir_remove_non_entrypoints(nir_shader *shader);
4209 void nir_remove_non_exported(nir_shader *shader);
4210
4211 nir_shader *nir_shader_create(void *mem_ctx,
4212 gl_shader_stage stage,
4213 const nir_shader_compiler_options *options,
4214 shader_info *si);
4215
4216 /** Adds a variable to the appropriate list in nir_shader */
4217 void nir_shader_add_variable(nir_shader *shader, nir_variable *var);
4218
4219 static inline void
nir_function_impl_add_variable(nir_function_impl * impl,nir_variable * var)4220 nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var)
4221 {
4222 assert(var->data.mode == nir_var_function_temp);
4223 exec_list_push_tail(&impl->locals, &var->node);
4224 }
4225
4226 /** creates a variable, sets a few defaults, and adds it to the list */
4227 nir_variable *nir_variable_create(nir_shader *shader,
4228 nir_variable_mode mode,
4229 const struct glsl_type *type,
4230 const char *name);
4231 /** creates a local variable and adds it to the list */
4232 nir_variable *nir_local_variable_create(nir_function_impl *impl,
4233 const struct glsl_type *type,
4234 const char *name);
4235
4236 /** Creates a uniform builtin state variable. */
4237 nir_variable *
4238 nir_state_variable_create(nir_shader *shader,
4239 const struct glsl_type *type,
4240 const char *name,
4241 const gl_state_index16 tokens[STATE_LENGTH]);
4242
4243 /* Gets the variable for the given mode and location, creating it (with the given
4244 * type) if necessary.
4245 */
4246 nir_variable *
4247 nir_get_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location,
4248 const struct glsl_type *type);
4249
4250 /* Creates a variable for the given mode and location.
4251 */
4252 nir_variable *
4253 nir_create_variable_with_location(nir_shader *shader, nir_variable_mode mode, int location,
4254 const struct glsl_type *type);
4255
4256 nir_variable *nir_find_variable_with_location(nir_shader *shader,
4257 nir_variable_mode mode,
4258 unsigned location);
4259
4260 nir_variable *nir_find_variable_with_driver_location(nir_shader *shader,
4261 nir_variable_mode mode,
4262 unsigned location);
4263
4264 nir_variable *nir_find_state_variable(nir_shader *s,
4265 gl_state_index16 tokens[STATE_LENGTH]);
4266
4267 nir_variable *nir_find_sampler_variable_with_tex_index(nir_shader *shader,
4268 unsigned texture_index);
4269
4270 void nir_sort_variables_with_modes(nir_shader *shader,
4271 int (*compar)(const nir_variable *,
4272 const nir_variable *),
4273 nir_variable_mode modes);
4274
4275 /** creates a function and adds it to the shader's list of functions */
4276 nir_function *nir_function_create(nir_shader *shader, const char *name);
4277
4278 static inline void
nir_function_set_impl(nir_function * func,nir_function_impl * impl)4279 nir_function_set_impl(nir_function *func, nir_function_impl *impl)
4280 {
4281 func->impl = impl;
4282 impl->function = func;
4283 }
4284
4285 nir_function_impl *nir_function_impl_create(nir_function *func);
4286 /** creates a function_impl that isn't tied to any particular function */
4287 nir_function_impl *nir_function_impl_create_bare(nir_shader *shader);
4288
4289 nir_block *nir_block_create(nir_shader *shader);
4290 nir_if *nir_if_create(nir_shader *shader);
4291 nir_loop *nir_loop_create(nir_shader *shader);
4292
4293 nir_function_impl *nir_cf_node_get_function(nir_cf_node *node);
4294
4295 /** requests that the given pieces of metadata be generated */
4296 void nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...);
4297 /** dirties all but the preserved metadata */
4298 void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved);
4299 /** Preserves all metadata for the given shader */
4300 void nir_shader_preserve_all_metadata(nir_shader *shader);
4301
4302 /** creates an instruction with default swizzle/writemask/etc. with NULL registers */
4303 nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op);
4304
4305 nir_deref_instr *nir_deref_instr_create(nir_shader *shader,
4306 nir_deref_type deref_type);
4307
4308 nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type);
4309
4310 nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader,
4311 unsigned num_components,
4312 unsigned bit_size);
4313
4314 nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader,
4315 nir_intrinsic_op op);
4316
4317 nir_call_instr *nir_call_instr_create(nir_shader *shader,
4318 nir_function *callee);
4319
4320 /** Creates a NIR texture instruction */
4321 nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs);
4322
4323 nir_phi_instr *nir_phi_instr_create(nir_shader *shader);
4324 nir_phi_src *nir_phi_instr_add_src(nir_phi_instr *instr,
4325 nir_block *pred, nir_def *src);
4326
4327 nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader);
4328
4329 nir_undef_instr *nir_undef_instr_create(nir_shader *shader,
4330 unsigned num_components,
4331 unsigned bit_size);
4332
4333 nir_const_value nir_alu_binop_identity(nir_op binop, unsigned bit_size);
4334
4335 /**
4336 * NIR Cursors and Instruction Insertion API
4337 * @{
4338 *
4339 * A tiny struct representing a point to insert/extract instructions or
4340 * control flow nodes. Helps reduce the combinatorial explosion of possible
4341 * points to insert/extract.
4342 *
4343 * \sa nir_control_flow.h
4344 */
4345 typedef enum {
4346 nir_cursor_before_block,
4347 nir_cursor_after_block,
4348 nir_cursor_before_instr,
4349 nir_cursor_after_instr,
4350 } nir_cursor_option;
4351
4352 typedef struct {
4353 nir_cursor_option option;
4354 union {
4355 nir_block *block;
4356 nir_instr *instr;
4357 };
4358 } nir_cursor;
4359
4360 static inline nir_block *
nir_cursor_current_block(nir_cursor cursor)4361 nir_cursor_current_block(nir_cursor cursor)
4362 {
4363 if (cursor.option == nir_cursor_before_instr ||
4364 cursor.option == nir_cursor_after_instr) {
4365 return cursor.instr->block;
4366 } else {
4367 return cursor.block;
4368 }
4369 }
4370
4371 bool nir_cursors_equal(nir_cursor a, nir_cursor b);
4372
4373 static inline nir_cursor
nir_before_block(nir_block * block)4374 nir_before_block(nir_block *block)
4375 {
4376 nir_cursor cursor;
4377 cursor.option = nir_cursor_before_block;
4378 cursor.block = block;
4379 return cursor;
4380 }
4381
4382 static inline nir_cursor
nir_after_block(nir_block * block)4383 nir_after_block(nir_block *block)
4384 {
4385 nir_cursor cursor;
4386 cursor.option = nir_cursor_after_block;
4387 cursor.block = block;
4388 return cursor;
4389 }
4390
4391 static inline nir_cursor
nir_before_instr(nir_instr * instr)4392 nir_before_instr(nir_instr *instr)
4393 {
4394 nir_cursor cursor;
4395 cursor.option = nir_cursor_before_instr;
4396 cursor.instr = instr;
4397 return cursor;
4398 }
4399
4400 static inline nir_cursor
nir_after_instr(nir_instr * instr)4401 nir_after_instr(nir_instr *instr)
4402 {
4403 nir_cursor cursor;
4404 cursor.option = nir_cursor_after_instr;
4405 cursor.instr = instr;
4406 return cursor;
4407 }
4408
4409 static inline nir_cursor
nir_before_block_after_phis(nir_block * block)4410 nir_before_block_after_phis(nir_block *block)
4411 {
4412 nir_phi_instr *last_phi = nir_block_last_phi_instr(block);
4413 if (last_phi)
4414 return nir_after_instr(&last_phi->instr);
4415 else
4416 return nir_before_block(block);
4417 }
4418
4419 static inline nir_cursor
nir_after_block_before_jump(nir_block * block)4420 nir_after_block_before_jump(nir_block *block)
4421 {
4422 nir_instr *last_instr = nir_block_last_instr(block);
4423 if (last_instr && last_instr->type == nir_instr_type_jump) {
4424 return nir_before_instr(last_instr);
4425 } else {
4426 return nir_after_block(block);
4427 }
4428 }
4429
4430 static inline nir_cursor
nir_before_src(nir_src * src)4431 nir_before_src(nir_src *src)
4432 {
4433 if (nir_src_is_if(src)) {
4434 nir_block *prev_block =
4435 nir_cf_node_as_block(nir_cf_node_prev(&nir_src_parent_if(src)->cf_node));
4436 return nir_after_block(prev_block);
4437 } else if (nir_src_parent_instr(src)->type == nir_instr_type_phi) {
4438 #ifndef NDEBUG
4439 nir_phi_instr *cond_phi = nir_instr_as_phi(nir_src_parent_instr(src));
4440 bool found = false;
4441 nir_foreach_phi_src(phi_src, cond_phi) {
4442 if (phi_src->src.ssa == src->ssa) {
4443 found = true;
4444 break;
4445 }
4446 }
4447 assert(found);
4448 #endif
4449 /* The list_entry() macro is a generic container-of macro, it just happens
4450 * to have a more specific name.
4451 */
4452 nir_phi_src *phi_src = list_entry(src, nir_phi_src, src);
4453 return nir_after_block_before_jump(phi_src->pred);
4454 } else {
4455 return nir_before_instr(nir_src_parent_instr(src));
4456 }
4457 }
4458
4459 static inline nir_cursor
nir_before_cf_node(nir_cf_node * node)4460 nir_before_cf_node(nir_cf_node *node)
4461 {
4462 if (node->type == nir_cf_node_block)
4463 return nir_before_block(nir_cf_node_as_block(node));
4464
4465 return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node)));
4466 }
4467
4468 static inline nir_cursor
nir_after_cf_node(nir_cf_node * node)4469 nir_after_cf_node(nir_cf_node *node)
4470 {
4471 if (node->type == nir_cf_node_block)
4472 return nir_after_block(nir_cf_node_as_block(node));
4473
4474 return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node)));
4475 }
4476
4477 static inline nir_cursor
nir_after_phis(nir_block * block)4478 nir_after_phis(nir_block *block)
4479 {
4480 nir_foreach_instr(instr, block) {
4481 if (instr->type != nir_instr_type_phi)
4482 return nir_before_instr(instr);
4483 }
4484 return nir_after_block(block);
4485 }
4486
4487 static inline nir_cursor
nir_after_instr_and_phis(nir_instr * instr)4488 nir_after_instr_and_phis(nir_instr *instr)
4489 {
4490 if (instr->type == nir_instr_type_phi)
4491 return nir_after_phis(instr->block);
4492 else
4493 return nir_after_instr(instr);
4494 }
4495
4496 static inline nir_cursor
nir_after_cf_node_and_phis(nir_cf_node * node)4497 nir_after_cf_node_and_phis(nir_cf_node *node)
4498 {
4499 if (node->type == nir_cf_node_block)
4500 return nir_after_block(nir_cf_node_as_block(node));
4501
4502 nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node));
4503
4504 return nir_after_phis(block);
4505 }
4506
4507 static inline nir_cursor
nir_before_cf_list(struct exec_list * cf_list)4508 nir_before_cf_list(struct exec_list *cf_list)
4509 {
4510 nir_cf_node *first_node = exec_node_data(nir_cf_node,
4511 exec_list_get_head(cf_list), node);
4512 return nir_before_cf_node(first_node);
4513 }
4514
4515 static inline nir_cursor
nir_after_cf_list(struct exec_list * cf_list)4516 nir_after_cf_list(struct exec_list *cf_list)
4517 {
4518 nir_cf_node *last_node = exec_node_data(nir_cf_node,
4519 exec_list_get_tail(cf_list), node);
4520 return nir_after_cf_node(last_node);
4521 }
4522
4523 static inline nir_cursor
nir_before_impl(nir_function_impl * impl)4524 nir_before_impl(nir_function_impl *impl)
4525 {
4526 return nir_before_cf_list(&impl->body);
4527 }
4528
4529 static inline nir_cursor
nir_after_impl(nir_function_impl * impl)4530 nir_after_impl(nir_function_impl *impl)
4531 {
4532 return nir_after_cf_list(&impl->body);
4533 }
4534
4535 /**
4536 * Insert a NIR instruction at the given cursor.
4537 *
4538 * Note: This does not update the cursor.
4539 */
4540 void nir_instr_insert(nir_cursor cursor, nir_instr *instr);
4541
4542 bool nir_instr_move(nir_cursor cursor, nir_instr *instr);
4543
4544 static inline void
nir_instr_insert_before(nir_instr * instr,nir_instr * before)4545 nir_instr_insert_before(nir_instr *instr, nir_instr *before)
4546 {
4547 nir_instr_insert(nir_before_instr(instr), before);
4548 }
4549
4550 static inline void
nir_instr_insert_after(nir_instr * instr,nir_instr * after)4551 nir_instr_insert_after(nir_instr *instr, nir_instr *after)
4552 {
4553 nir_instr_insert(nir_after_instr(instr), after);
4554 }
4555
4556 static inline void
nir_instr_insert_before_block(nir_block * block,nir_instr * before)4557 nir_instr_insert_before_block(nir_block *block, nir_instr *before)
4558 {
4559 nir_instr_insert(nir_before_block(block), before);
4560 }
4561
4562 static inline void
nir_instr_insert_after_block(nir_block * block,nir_instr * after)4563 nir_instr_insert_after_block(nir_block *block, nir_instr *after)
4564 {
4565 nir_instr_insert(nir_after_block(block), after);
4566 }
4567
4568 static inline void
nir_instr_insert_before_cf(nir_cf_node * node,nir_instr * before)4569 nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before)
4570 {
4571 nir_instr_insert(nir_before_cf_node(node), before);
4572 }
4573
4574 static inline void
nir_instr_insert_after_cf(nir_cf_node * node,nir_instr * after)4575 nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after)
4576 {
4577 nir_instr_insert(nir_after_cf_node(node), after);
4578 }
4579
4580 static inline void
nir_instr_insert_before_cf_list(struct exec_list * list,nir_instr * before)4581 nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before)
4582 {
4583 nir_instr_insert(nir_before_cf_list(list), before);
4584 }
4585
4586 static inline void
nir_instr_insert_after_cf_list(struct exec_list * list,nir_instr * after)4587 nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after)
4588 {
4589 nir_instr_insert(nir_after_cf_list(list), after);
4590 }
4591
4592 void nir_instr_remove_v(nir_instr *instr);
4593 void nir_instr_free(nir_instr *instr);
4594 void nir_instr_free_list(struct exec_list *list);
4595
4596 static inline nir_cursor
nir_instr_remove(nir_instr * instr)4597 nir_instr_remove(nir_instr *instr)
4598 {
4599 nir_cursor cursor;
4600 nir_instr *prev = nir_instr_prev(instr);
4601 if (prev) {
4602 cursor = nir_after_instr(prev);
4603 } else {
4604 cursor = nir_before_block(instr->block);
4605 }
4606 nir_instr_remove_v(instr);
4607 return cursor;
4608 }
4609
4610 nir_cursor nir_instr_free_and_dce(nir_instr *instr);
4611
4612 /** @} */
4613
4614 nir_def *nir_instr_def(nir_instr *instr);
4615
4616 typedef bool (*nir_foreach_def_cb)(nir_def *def, void *state);
4617 typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state);
4618 static inline bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state);
4619 bool nir_foreach_phi_src_leaving_block(nir_block *instr,
4620 nir_foreach_src_cb cb,
4621 void *state);
4622
4623 nir_const_value *nir_src_as_const_value(nir_src src);
4624
4625 #define NIR_SRC_AS_(name, c_type, type_enum, cast_macro) \
4626 static inline c_type * \
4627 nir_src_as_##name(nir_src src) \
4628 { \
4629 return src.ssa->parent_instr->type == type_enum \
4630 ? cast_macro(src.ssa->parent_instr) \
4631 : NULL; \
4632 }
4633
4634 NIR_SRC_AS_(alu_instr, nir_alu_instr, nir_instr_type_alu, nir_instr_as_alu)
4635 NIR_SRC_AS_(intrinsic, nir_intrinsic_instr,
4636 nir_instr_type_intrinsic, nir_instr_as_intrinsic)
4637 NIR_SRC_AS_(deref, nir_deref_instr, nir_instr_type_deref, nir_instr_as_deref)
4638
4639 bool nir_src_is_always_uniform(nir_src src);
4640 bool nir_srcs_equal(nir_src src1, nir_src src2);
4641 bool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2);
4642
4643 static inline void
nir_src_rewrite(nir_src * src,nir_def * new_ssa)4644 nir_src_rewrite(nir_src *src, nir_def *new_ssa)
4645 {
4646 assert(src->ssa);
4647 assert(nir_src_is_if(src) ? (nir_src_parent_if(src) != NULL) : (nir_src_parent_instr(src) != NULL));
4648 list_del(&src->use_link);
4649 src->ssa = new_ssa;
4650 list_addtail(&src->use_link, &new_ssa->uses);
4651 }
4652
4653 /** Initialize a nir_src
4654 *
4655 * This is almost never the helper you want to use. This helper assumes that
4656 * the source is uninitialized garbage and blasts over it without doing any
4657 * tear-down the existing source, including removing it from uses lists.
4658 * Using this helper on a source that currently exists in any uses list will
4659 * result in linked list corruption. It also assumes that the instruction is
4660 * currently live in the IR and adds the source to the uses list for the given
4661 * nir_def as part of setup.
4662 *
4663 * This is pretty much only useful for adding sources to extant instructions
4664 * or manipulating parallel copy instructions as part of out-of-SSA.
4665 *
4666 * When in doubt, use nir_src_rewrite() instead.
4667 */
4668 void nir_instr_init_src(nir_instr *instr, nir_src *src, nir_def *def);
4669
4670 /** Clear a nir_src
4671 *
4672 * This helper clears a nir_src by removing it from any uses lists and
4673 * resetting its contents to NIR_SRC_INIT. This is typically used as a
4674 * precursor to removing the source from the instruction by adjusting a
4675 * num_srcs parameter somewhere or overwriting it with nir_instr_move_src().
4676 */
4677 void nir_instr_clear_src(nir_instr *instr, nir_src *src);
4678
4679 void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
4680
4681 void nir_def_init(nir_instr *instr, nir_def *def,
4682 unsigned num_components, unsigned bit_size);
4683 static inline void
nir_def_init_for_type(nir_instr * instr,nir_def * def,const struct glsl_type * type)4684 nir_def_init_for_type(nir_instr *instr, nir_def *def,
4685 const struct glsl_type *type)
4686 {
4687 assert(glsl_type_is_vector_or_scalar(type));
4688 nir_def_init(instr, def, glsl_get_components(type),
4689 glsl_get_bit_size(type));
4690 }
4691 void nir_def_rewrite_uses(nir_def *def, nir_def *new_ssa);
4692 void nir_def_rewrite_uses_src(nir_def *def, nir_src new_src);
4693 void nir_def_rewrite_uses_after(nir_def *def, nir_def *new_ssa,
4694 nir_instr *after_me);
4695
4696 nir_component_mask_t nir_src_components_read(const nir_src *src);
4697 nir_component_mask_t nir_def_components_read(const nir_def *def);
4698 bool nir_def_all_uses_are_fsat(const nir_def *def);
4699
4700 static inline bool
nir_def_is_unused(nir_def * ssa)4701 nir_def_is_unused(nir_def *ssa)
4702 {
4703 return list_is_empty(&ssa->uses);
4704 }
4705
4706 /** Returns the next block, disregarding structure
4707 *
4708 * The ordering is deterministic but has no guarantees beyond that. In
4709 * particular, it is not guaranteed to be dominance-preserving.
4710 */
4711 nir_block *nir_block_unstructured_next(nir_block *block);
4712 nir_block *nir_unstructured_start_block(nir_function_impl *impl);
4713
4714 #define nir_foreach_block_unstructured(block, impl) \
4715 for (nir_block *block = nir_unstructured_start_block(impl); block != NULL; \
4716 block = nir_block_unstructured_next(block))
4717
4718 #define nir_foreach_block_unstructured_safe(block, impl) \
4719 for (nir_block *block = nir_unstructured_start_block(impl), \
4720 *next = nir_block_unstructured_next(block); \
4721 block != NULL; \
4722 block = next, next = nir_block_unstructured_next(block))
4723
4724 /*
4725 * finds the next basic block in source-code order, returns NULL if there is
4726 * none
4727 */
4728
4729 nir_block *nir_block_cf_tree_next(nir_block *block);
4730
4731 /* Performs the opposite of nir_block_cf_tree_next() */
4732
4733 nir_block *nir_block_cf_tree_prev(nir_block *block);
4734
4735 /* Gets the first block in a CF node in source-code order */
4736
4737 nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node);
4738
4739 /* Gets the last block in a CF node in source-code order */
4740
4741 nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node);
4742
4743 /* Gets the next block after a CF node in source-code order */
4744
4745 nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node);
4746
4747 /* Gets the block before a CF node in source-code order */
4748
4749 nir_block *nir_cf_node_cf_tree_prev(nir_cf_node *node);
4750
4751 /* Macros for loops that visit blocks in source-code order */
4752
4753 #define nir_foreach_block(block, impl) \
4754 for (nir_block *block = nir_start_block(impl); block != NULL; \
4755 block = nir_block_cf_tree_next(block))
4756
4757 #define nir_foreach_block_safe(block, impl) \
4758 for (nir_block *block = nir_start_block(impl), \
4759 *next = nir_block_cf_tree_next(block); \
4760 block != NULL; \
4761 block = next, next = nir_block_cf_tree_next(block))
4762
4763 #define nir_foreach_block_reverse(block, impl) \
4764 for (nir_block *block = nir_impl_last_block(impl); block != NULL; \
4765 block = nir_block_cf_tree_prev(block))
4766
4767 #define nir_foreach_block_reverse_safe(block, impl) \
4768 for (nir_block *block = nir_impl_last_block(impl), \
4769 *prev = nir_block_cf_tree_prev(block); \
4770 block != NULL; \
4771 block = prev, prev = nir_block_cf_tree_prev(block))
4772
4773 #define nir_foreach_block_in_cf_node(block, node) \
4774 for (nir_block *block = nir_cf_node_cf_tree_first(node); \
4775 block != nir_cf_node_cf_tree_next(node); \
4776 block = nir_block_cf_tree_next(block))
4777
4778 #define nir_foreach_block_in_cf_node_reverse(block, node) \
4779 for (nir_block *block = nir_cf_node_cf_tree_last(node); \
4780 block != nir_cf_node_cf_tree_prev(node); \
4781 block = nir_block_cf_tree_prev(block))
4782
4783 /* If the following CF node is an if, this function returns that if.
4784 * Otherwise, it returns NULL.
4785 */
4786 nir_if *nir_block_get_following_if(nir_block *block);
4787
4788 nir_loop *nir_block_get_following_loop(nir_block *block);
4789
4790 nir_block **nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx);
4791
4792 void nir_index_ssa_defs(nir_function_impl *impl);
4793 unsigned nir_index_instrs(nir_function_impl *impl);
4794
4795 void nir_index_blocks(nir_function_impl *impl);
4796
4797 void nir_shader_clear_pass_flags(nir_shader *shader);
4798
4799 unsigned nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes);
4800 unsigned nir_function_impl_index_vars(nir_function_impl *impl);
4801
4802 void nir_print_shader(nir_shader *shader, FILE *fp);
4803 void nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors);
4804 void nir_print_instr(const nir_instr *instr, FILE *fp);
4805 void nir_print_deref(const nir_deref_instr *deref, FILE *fp);
4806 void nir_log_shader_annotated_tagged(enum mesa_log_level level, const char *tag, nir_shader *shader, struct hash_table *annotations);
4807 #define nir_log_shadere(s) nir_log_shader_annotated_tagged(MESA_LOG_ERROR, (MESA_LOG_TAG), (s), NULL)
4808 #define nir_log_shaderw(s) nir_log_shader_annotated_tagged(MESA_LOG_WARN, (MESA_LOG_TAG), (s), NULL)
4809 #define nir_log_shaderi(s) nir_log_shader_annotated_tagged(MESA_LOG_INFO, (MESA_LOG_TAG), (s), NULL)
4810 #define nir_log_shader_annotated(s, annotations) nir_log_shader_annotated_tagged(MESA_LOG_ERROR, (MESA_LOG_TAG), (s), annotations)
4811
4812 char *nir_shader_as_str(nir_shader *nir, void *mem_ctx);
4813 char *nir_shader_as_str_annotated(nir_shader *nir, struct hash_table *annotations, void *mem_ctx);
4814 char *nir_instr_as_str(const nir_instr *instr, void *mem_ctx);
4815
4816 /** Shallow clone of a single instruction. */
4817 nir_instr *nir_instr_clone(nir_shader *s, const nir_instr *orig);
4818
4819 /** Clone a single instruction, including a remap table to rewrite sources. */
4820 nir_instr *nir_instr_clone_deep(nir_shader *s, const nir_instr *orig,
4821 struct hash_table *remap_table);
4822
4823 /** Shallow clone of a single ALU instruction. */
4824 nir_alu_instr *nir_alu_instr_clone(nir_shader *s, const nir_alu_instr *orig);
4825
4826 nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s);
4827 nir_function *nir_function_clone(nir_shader *ns, const nir_function *fxn);
4828 nir_function_impl *nir_function_impl_clone(nir_shader *shader,
4829 const nir_function_impl *fi);
4830 nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var);
4831 nir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader);
4832
4833 void nir_shader_replace(nir_shader *dest, nir_shader *src);
4834
4835 void nir_shader_serialize_deserialize(nir_shader *s);
4836
4837 #ifndef NDEBUG
4838 void nir_validate_shader(nir_shader *shader, const char *when);
4839 void nir_validate_ssa_dominance(nir_shader *shader, const char *when);
4840 void nir_metadata_set_validation_flag(nir_shader *shader);
4841 void nir_metadata_check_validation_flag(nir_shader *shader);
4842
4843 static inline bool
should_skip_nir(const char * name)4844 should_skip_nir(const char *name)
4845 {
4846 static const char *list = NULL;
4847 if (!list) {
4848 /* Comma separated list of names to skip. */
4849 list = getenv("NIR_SKIP");
4850 if (!list)
4851 list = "";
4852 }
4853
4854 if (!list[0])
4855 return false;
4856
4857 return comma_separated_list_contains(list, name);
4858 }
4859
4860 static inline bool
should_print_nir(nir_shader * shader)4861 should_print_nir(nir_shader *shader)
4862 {
4863 if ((shader->info.internal && !NIR_DEBUG(PRINT_INTERNAL)) ||
4864 shader->info.stage < 0 ||
4865 shader->info.stage > MESA_SHADER_KERNEL)
4866 return false;
4867
4868 return unlikely(nir_debug_print_shader[shader->info.stage]);
4869 }
4870 #else
4871 static inline void
nir_validate_shader(nir_shader * shader,const char * when)4872 nir_validate_shader(nir_shader *shader, const char *when)
4873 {
4874 (void)shader;
4875 (void)when;
4876 }
4877 static inline void
nir_validate_ssa_dominance(nir_shader * shader,const char * when)4878 nir_validate_ssa_dominance(nir_shader *shader, const char *when)
4879 {
4880 (void)shader;
4881 (void)when;
4882 }
4883 static inline void
nir_metadata_set_validation_flag(nir_shader * shader)4884 nir_metadata_set_validation_flag(nir_shader *shader)
4885 {
4886 (void)shader;
4887 }
4888 static inline void
nir_metadata_check_validation_flag(nir_shader * shader)4889 nir_metadata_check_validation_flag(nir_shader *shader)
4890 {
4891 (void)shader;
4892 }
4893 static inline bool
should_skip_nir(UNUSED const char * pass_name)4894 should_skip_nir(UNUSED const char *pass_name)
4895 {
4896 return false;
4897 }
4898 static inline bool
should_print_nir(UNUSED nir_shader * shader)4899 should_print_nir(UNUSED nir_shader *shader)
4900 {
4901 return false;
4902 }
4903 #endif /* NDEBUG */
4904
4905 #define _PASS(pass, nir, do_pass) \
4906 do { \
4907 if (should_skip_nir(#pass)) { \
4908 printf("skipping %s\n", #pass); \
4909 break; \
4910 } \
4911 do_pass if (NIR_DEBUG(CLONE)) \
4912 { \
4913 nir_shader *_clone = nir_shader_clone(ralloc_parent(nir), nir);\
4914 nir_shader_replace(nir, _clone); \
4915 } \
4916 if (NIR_DEBUG(SERIALIZE)) { \
4917 nir_shader_serialize_deserialize(nir); \
4918 } \
4919 } while (0)
4920
4921 #define NIR_PASS(progress, nir, pass, ...) _PASS(pass, nir, { \
4922 nir_metadata_set_validation_flag(nir); \
4923 if (should_print_nir(nir)) \
4924 printf("%s\n", #pass); \
4925 if (pass(nir, ##__VA_ARGS__)) { \
4926 nir_validate_shader(nir, "after " #pass " in " __FILE__); \
4927 UNUSED bool _; \
4928 progress = true; \
4929 if (should_print_nir(nir)) \
4930 nir_print_shader(nir, stdout); \
4931 nir_metadata_check_validation_flag(nir); \
4932 } \
4933 })
4934
4935 #define NIR_PASS_V(nir, pass, ...) _PASS(pass, nir, { \
4936 if (should_print_nir(nir)) \
4937 printf("%s\n", #pass); \
4938 pass(nir, ##__VA_ARGS__); \
4939 nir_validate_shader(nir, "after " #pass " in " __FILE__); \
4940 if (should_print_nir(nir)) \
4941 nir_print_shader(nir, stdout); \
4942 })
4943
4944 #define _NIR_LOOP_PASS(progress, idempotent, skip, nir, pass, ...) \
4945 do { \
4946 bool nir_loop_pass_progress = false; \
4947 if (!_mesa_set_search(skip, (void (*)())&pass)) \
4948 NIR_PASS(nir_loop_pass_progress, nir, pass, ##__VA_ARGS__); \
4949 if (nir_loop_pass_progress) \
4950 _mesa_set_clear(skip, NULL); \
4951 if (idempotent || !nir_loop_pass_progress) \
4952 _mesa_set_add(skip, (void (*)())&pass); \
4953 UNUSED bool _ = false; \
4954 progress |= nir_loop_pass_progress; \
4955 } while (0)
4956
4957 /* Helper to skip a pass if no different passes have made progress since it was
4958 * previously run. Note that two passes are considered the same if they have
4959 * the same function pointer, even if they used different options.
4960 *
4961 * The usage of this is mostly identical to NIR_PASS. "skip" is a "struct set *"
4962 * (created by _mesa_pointer_set_create) which the macro uses to keep track of
4963 * already run passes.
4964 *
4965 * Example:
4966 * bool progress = true;
4967 * struct set *skip = _mesa_pointer_set_create(NULL);
4968 * while (progress) {
4969 * progress = false;
4970 * NIR_LOOP_PASS(progress, skip, nir, pass1);
4971 * NIR_LOOP_PASS_NOT_IDEMPOTENT(progress, skip, nir, nir_opt_algebraic);
4972 * NIR_LOOP_PASS(progress, skip, nir, pass2);
4973 * ...
4974 * }
4975 * _mesa_set_destroy(skip, NULL);
4976 *
4977 * You shouldn't mix usage of this with the NIR_PASS set of helpers, without
4978 * using a new "skip" in-between.
4979 */
4980 #define NIR_LOOP_PASS(progress, skip, nir, pass, ...) \
4981 _NIR_LOOP_PASS(progress, true, skip, nir, pass, ##__VA_ARGS__)
4982
4983 /* Like NIR_LOOP_PASS, but use this for passes which may make further progress
4984 * when repeated.
4985 */
4986 #define NIR_LOOP_PASS_NOT_IDEMPOTENT(progress, skip, nir, pass, ...) \
4987 _NIR_LOOP_PASS(progress, false, skip, nir, pass, ##__VA_ARGS__)
4988
4989 #define NIR_SKIP(name) should_skip_nir(#name)
4990
4991 /** An instruction filtering callback with writemask
4992 *
4993 * Returns true if the instruction should be processed with the associated
4994 * writemask and false otherwise.
4995 */
4996 typedef bool (*nir_instr_writemask_filter_cb)(const nir_instr *,
4997 unsigned writemask, const void *);
4998
4999 /** A simple instruction lowering callback
5000 *
5001 * Many instruction lowering passes can be written as a simple function which
5002 * takes an instruction as its input and returns a sequence of instructions
5003 * that implement the consumed instruction. This function type represents
5004 * such a lowering function. When called, a function with this prototype
5005 * should either return NULL indicating that no lowering needs to be done or
5006 * emit a sequence of instructions using the provided builder (whose cursor
5007 * will already be placed after the instruction to be lowered) and return the
5008 * resulting nir_def.
5009 */
5010 typedef nir_def *(*nir_lower_instr_cb)(struct nir_builder *,
5011 nir_instr *, void *);
5012
5013 /**
5014 * Special return value for nir_lower_instr_cb when some progress occurred
5015 * (like changing an input to the instr) that didn't result in a replacement
5016 * SSA def being generated.
5017 */
5018 #define NIR_LOWER_INSTR_PROGRESS ((nir_def *)(uintptr_t)1)
5019
5020 /**
5021 * Special return value for nir_lower_instr_cb when some progress occurred
5022 * that should remove the current instruction that doesn't create an output
5023 * (like a store)
5024 */
5025
5026 #define NIR_LOWER_INSTR_PROGRESS_REPLACE ((nir_def *)(uintptr_t)2)
5027
5028 /** Iterate over all the instructions in a nir_function_impl and lower them
5029 * using the provided callbacks
5030 *
5031 * This function implements the guts of a standard lowering pass for you. It
5032 * iterates over all of the instructions in a nir_function_impl and calls the
5033 * filter callback on each one. If the filter callback returns true, it then
5034 * calls the lowering call back on the instruction. (Splitting it this way
5035 * allows us to avoid some save/restore work for instructions we know won't be
5036 * lowered.) If the instruction is dead after the lowering is complete, it
5037 * will be removed. If new instructions are added, the lowering callback will
5038 * also be called on them in case multiple lowerings are required.
5039 *
5040 * If the callback indicates that the original instruction is replaced (either
5041 * through a new SSA def or NIR_LOWER_INSTR_PROGRESS_REPLACE), then the
5042 * instruction is removed along with any now-dead SSA defs it used.
5043 *
5044 * The metadata for the nir_function_impl will also be updated. If any blocks
5045 * are added (they cannot be removed), dominance and block indices will be
5046 * invalidated.
5047 */
5048 bool nir_function_impl_lower_instructions(nir_function_impl *impl,
5049 nir_instr_filter_cb filter,
5050 nir_lower_instr_cb lower,
5051 void *cb_data);
5052 bool nir_shader_lower_instructions(nir_shader *shader,
5053 nir_instr_filter_cb filter,
5054 nir_lower_instr_cb lower,
5055 void *cb_data);
5056
5057 void nir_calc_dominance_impl(nir_function_impl *impl);
5058 void nir_calc_dominance(nir_shader *shader);
5059
5060 nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2);
5061 bool nir_block_dominates(nir_block *parent, nir_block *child);
5062 bool nir_block_is_unreachable(nir_block *block);
5063
5064 void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp);
5065 void nir_dump_dom_tree(nir_shader *shader, FILE *fp);
5066
5067 void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp);
5068 void nir_dump_dom_frontier(nir_shader *shader, FILE *fp);
5069
5070 void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
5071 void nir_dump_cfg(nir_shader *shader, FILE *fp);
5072
5073 void nir_gs_count_vertices_and_primitives(const nir_shader *shader,
5074 int *out_vtxcnt,
5075 int *out_prmcnt,
5076 int *out_decomposed_prmcnt,
5077 unsigned num_streams);
5078
5079 typedef enum {
5080 nir_group_all,
5081 nir_group_same_resource_only,
5082 } nir_load_grouping;
5083
5084 void nir_group_loads(nir_shader *shader, nir_load_grouping grouping,
5085 unsigned max_distance);
5086
5087 bool nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes);
5088 bool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes);
5089 bool nir_split_var_copies(nir_shader *shader);
5090 bool nir_split_per_member_structs(nir_shader *shader);
5091 bool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes);
5092
5093 bool nir_lower_returns_impl(nir_function_impl *impl);
5094 bool nir_lower_returns(nir_shader *shader);
5095
5096 void nir_inline_function_impl(struct nir_builder *b,
5097 const nir_function_impl *impl,
5098 nir_def **params,
5099 struct hash_table *shader_var_remap);
5100 bool nir_inline_functions(nir_shader *shader);
5101 void nir_cleanup_functions(nir_shader *shader);
5102 bool nir_link_shader_functions(nir_shader *shader,
5103 const nir_shader *link_shader);
5104
5105 void nir_find_inlinable_uniforms(nir_shader *shader);
5106 void nir_inline_uniforms(nir_shader *shader, unsigned num_uniforms,
5107 const uint32_t *uniform_values,
5108 const uint16_t *uniform_dw_offsets);
5109 bool nir_collect_src_uniforms(const nir_src *src, int component,
5110 uint32_t *uni_offsets, uint8_t *num_offsets,
5111 unsigned max_num_bo, unsigned max_offset);
5112 void nir_add_inlinable_uniforms(const nir_src *cond, nir_loop_info *info,
5113 uint32_t *uni_offsets, uint8_t *num_offsets,
5114 unsigned max_num_bo, unsigned max_offset);
5115
5116 bool nir_propagate_invariant(nir_shader *shader, bool invariant_prim);
5117
5118 void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader);
5119 void nir_lower_deref_copy_instr(struct nir_builder *b,
5120 nir_intrinsic_instr *copy);
5121 bool nir_lower_var_copies(nir_shader *shader);
5122
5123 bool nir_opt_memcpy(nir_shader *shader);
5124 bool nir_lower_memcpy(nir_shader *shader);
5125
5126 void nir_fixup_deref_modes(nir_shader *shader);
5127 void nir_fixup_deref_types(nir_shader *shader);
5128
5129 bool nir_lower_global_vars_to_local(nir_shader *shader);
5130
5131 typedef enum {
5132 nir_lower_direct_array_deref_of_vec_load = (1 << 0),
5133 nir_lower_indirect_array_deref_of_vec_load = (1 << 1),
5134 nir_lower_direct_array_deref_of_vec_store = (1 << 2),
5135 nir_lower_indirect_array_deref_of_vec_store = (1 << 3),
5136 } nir_lower_array_deref_of_vec_options;
5137
5138 bool nir_lower_array_deref_of_vec(nir_shader *shader, nir_variable_mode modes,
5139 nir_lower_array_deref_of_vec_options options);
5140
5141 bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes,
5142 uint32_t max_lower_array_len);
5143
5144 bool nir_lower_indirect_var_derefs(nir_shader *shader,
5145 const struct set *vars);
5146
5147 bool nir_lower_locals_to_regs(nir_shader *shader, uint8_t bool_bitsize);
5148
5149 bool nir_lower_io_to_temporaries(nir_shader *shader,
5150 nir_function_impl *entrypoint,
5151 bool outputs, bool inputs);
5152
5153 bool nir_lower_vars_to_scratch(nir_shader *shader,
5154 nir_variable_mode modes,
5155 int size_threshold,
5156 glsl_type_size_align_func size_align);
5157
5158 void nir_lower_clip_halfz(nir_shader *shader);
5159
5160 void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
5161
5162 void nir_gather_types(nir_function_impl *impl,
5163 BITSET_WORD *float_types,
5164 BITSET_WORD *int_types);
5165
5166 void nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
5167 unsigned *size,
5168 int (*type_size)(const struct glsl_type *, bool));
5169
5170 /* Some helpers to do very simple linking */
5171 bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer);
5172 bool nir_remove_unused_io_vars(nir_shader *shader, nir_variable_mode mode,
5173 uint64_t *used_by_other_stage,
5174 uint64_t *used_by_other_stage_patches);
5175 void nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
5176 bool default_to_smooth_interp);
5177 void nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer);
5178 bool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer);
5179 void nir_link_varying_precision(nir_shader *producer, nir_shader *consumer);
5180 nir_variable *nir_clone_uniform_variable(nir_shader *nir,
5181 nir_variable *uniform, bool spirv);
5182 nir_deref_instr *nir_clone_deref_instr(struct nir_builder *b,
5183 nir_variable *var,
5184 nir_deref_instr *deref);
5185
5186 bool nir_slot_is_sysval_output(gl_varying_slot slot,
5187 gl_shader_stage next_shader);
5188 bool nir_slot_is_varying(gl_varying_slot slot);
5189 bool nir_slot_is_sysval_output_and_varying(gl_varying_slot slot,
5190 gl_shader_stage next_shader);
5191 bool nir_remove_varying(nir_intrinsic_instr *intr, gl_shader_stage next_shader);
5192 bool nir_remove_sysval_output(nir_intrinsic_instr *intr);
5193
5194 bool nir_lower_amul(nir_shader *shader,
5195 int (*type_size)(const struct glsl_type *, bool));
5196
5197 bool nir_lower_ubo_vec4(nir_shader *shader);
5198
5199 void nir_sort_variables_by_location(nir_shader *shader, nir_variable_mode mode);
5200 void nir_assign_io_var_locations(nir_shader *shader,
5201 nir_variable_mode mode,
5202 unsigned *size,
5203 gl_shader_stage stage);
5204
5205 typedef struct {
5206 uint8_t num_linked_io_vars;
5207 uint8_t num_linked_patch_io_vars;
5208 } nir_linked_io_var_info;
5209
5210 nir_linked_io_var_info
5211 nir_assign_linked_io_var_locations(nir_shader *producer,
5212 nir_shader *consumer);
5213
5214 typedef enum {
5215 /* If set, this causes all 64-bit IO operations to be lowered on-the-fly
5216 * to 32-bit operations. This is only valid for nir_var_shader_in/out
5217 * modes.
5218 *
5219 * Note that this destroys dual-slot information i.e. whether an input
5220 * occupies the low or high half of dvec4. Instead, it adds an offset of 1
5221 * to the load (which is ambiguous) and expects driver locations of inputs
5222 * to be final, which prevents any further optimizations.
5223 *
5224 * TODO: remove this in favor of nir_lower_io_lower_64bit_to_32_new.
5225 */
5226 nir_lower_io_lower_64bit_to_32 = (1 << 0),
5227
5228 /* If set, this causes the subset of 64-bit IO operations involving floats to be lowered on-the-fly
5229 * to 32-bit operations. This is only valid for nir_var_shader_in/out
5230 * modes.
5231 */
5232 nir_lower_io_lower_64bit_float_to_32 = (1 << 1),
5233
5234 /* This causes all 64-bit IO operations to be lowered to 32-bit operations.
5235 * This is only valid for nir_var_shader_in/out modes.
5236 *
5237 * Only VS inputs: Dual slot information is preserved as nir_io_semantics::
5238 * high_dvec2 and gathered into shader_info::dual_slot_inputs, so that
5239 * the shader can be arbitrarily optimized and the low or high half of
5240 * dvec4 can be DCE'd independently without affecting the other half.
5241 */
5242 nir_lower_io_lower_64bit_to_32_new = (1 << 2),
5243 } nir_lower_io_options;
5244 bool nir_lower_io(nir_shader *shader,
5245 nir_variable_mode modes,
5246 int (*type_size)(const struct glsl_type *, bool),
5247 nir_lower_io_options);
5248
5249 bool nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes);
5250 bool nir_lower_color_inputs(nir_shader *nir);
5251 void nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs);
5252 bool nir_io_add_intrinsic_xfb_info(nir_shader *nir);
5253
5254 bool
5255 nir_lower_vars_to_explicit_types(nir_shader *shader,
5256 nir_variable_mode modes,
5257 glsl_type_size_align_func type_info);
5258 void
5259 nir_gather_explicit_io_initializers(nir_shader *shader,
5260 void *dst, size_t dst_size,
5261 nir_variable_mode mode);
5262
5263 bool nir_lower_vec3_to_vec4(nir_shader *shader, nir_variable_mode modes);
5264
5265 typedef enum {
5266 /**
5267 * An address format which is a simple 32-bit global GPU address.
5268 */
5269 nir_address_format_32bit_global,
5270
5271 /**
5272 * An address format which is a simple 64-bit global GPU address.
5273 */
5274 nir_address_format_64bit_global,
5275
5276 /**
5277 * An address format which is a 64-bit global GPU address encoded as a
5278 * 2x32-bit vector.
5279 */
5280 nir_address_format_2x32bit_global,
5281
5282 /**
5283 * An address format which is a 64-bit global base address and a 32-bit
5284 * offset.
5285 *
5286 * This is identical to 64bit_bounded_global except that bounds checking
5287 * is not applied when lowering to global access. Even though the size is
5288 * never used for an actual bounds check, it needs to be valid so we can
5289 * lower deref_buffer_array_length properly.
5290 */
5291 nir_address_format_64bit_global_32bit_offset,
5292
5293 /**
5294 * An address format which is a bounds-checked 64-bit global GPU address.
5295 *
5296 * The address is comprised as a 32-bit vec4 where .xy are a uint64_t base
5297 * address stored with the low bits in .x and high bits in .y, .z is a
5298 * size, and .w is an offset. When the final I/O operation is lowered, .w
5299 * is checked against .z and the operation is predicated on the result.
5300 */
5301 nir_address_format_64bit_bounded_global,
5302
5303 /**
5304 * An address format which is comprised of a vec2 where the first
5305 * component is a buffer index and the second is an offset.
5306 */
5307 nir_address_format_32bit_index_offset,
5308
5309 /**
5310 * An address format which is a 64-bit value, where the high 32 bits
5311 * are a buffer index, and the low 32 bits are an offset.
5312 */
5313 nir_address_format_32bit_index_offset_pack64,
5314
5315 /**
5316 * An address format which is comprised of a vec3 where the first two
5317 * components specify the buffer and the third is an offset.
5318 */
5319 nir_address_format_vec2_index_32bit_offset,
5320
5321 /**
5322 * An address format which represents generic pointers with a 62-bit
5323 * pointer and a 2-bit enum in the top two bits. The top two bits have
5324 * the following meanings:
5325 *
5326 * - 0x0: Global memory
5327 * - 0x1: Shared memory
5328 * - 0x2: Scratch memory
5329 * - 0x3: Global memory
5330 *
5331 * The redundancy between 0x0 and 0x3 is because of Intel sign-extension of
5332 * addresses. Valid global memory addresses may naturally have either 0 or
5333 * ~0 as their high bits.
5334 *
5335 * Shared and scratch pointers are represented as 32-bit offsets with the
5336 * top 32 bits only being used for the enum. This allows us to avoid
5337 * 64-bit address calculations in a bunch of cases.
5338 */
5339 nir_address_format_62bit_generic,
5340
5341 /**
5342 * An address format which is a simple 32-bit offset.
5343 */
5344 nir_address_format_32bit_offset,
5345
5346 /**
5347 * An address format which is a simple 32-bit offset cast to 64-bit.
5348 */
5349 nir_address_format_32bit_offset_as_64bit,
5350
5351 /**
5352 * An address format representing a purely logical addressing model. In
5353 * this model, all deref chains must be complete from the dereference
5354 * operation to the variable. Cast derefs are not allowed. These
5355 * addresses will be 32-bit scalars but the format is immaterial because
5356 * you can always chase the chain.
5357 */
5358 nir_address_format_logical,
5359 } nir_address_format;
5360
5361 unsigned
5362 nir_address_format_bit_size(nir_address_format addr_format);
5363
5364 unsigned
5365 nir_address_format_num_components(nir_address_format addr_format);
5366
5367 static inline const struct glsl_type *
nir_address_format_to_glsl_type(nir_address_format addr_format)5368 nir_address_format_to_glsl_type(nir_address_format addr_format)
5369 {
5370 unsigned bit_size = nir_address_format_bit_size(addr_format);
5371 assert(bit_size == 32 || bit_size == 64);
5372 return glsl_vector_type(bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64,
5373 nir_address_format_num_components(addr_format));
5374 }
5375
5376 const nir_const_value *nir_address_format_null_value(nir_address_format addr_format);
5377
5378 nir_def *nir_build_addr_iadd(struct nir_builder *b, nir_def *addr,
5379 nir_address_format addr_format,
5380 nir_variable_mode modes,
5381 nir_def *offset);
5382
5383 nir_def *nir_build_addr_iadd_imm(struct nir_builder *b, nir_def *addr,
5384 nir_address_format addr_format,
5385 nir_variable_mode modes,
5386 int64_t offset);
5387
5388 nir_def *nir_build_addr_ieq(struct nir_builder *b, nir_def *addr0, nir_def *addr1,
5389 nir_address_format addr_format);
5390
5391 nir_def *nir_build_addr_isub(struct nir_builder *b, nir_def *addr0, nir_def *addr1,
5392 nir_address_format addr_format);
5393
5394 nir_def *nir_explicit_io_address_from_deref(struct nir_builder *b,
5395 nir_deref_instr *deref,
5396 nir_def *base_addr,
5397 nir_address_format addr_format);
5398
5399 bool nir_get_explicit_deref_align(nir_deref_instr *deref,
5400 bool default_to_type_align,
5401 uint32_t *align_mul,
5402 uint32_t *align_offset);
5403
5404 void nir_lower_explicit_io_instr(struct nir_builder *b,
5405 nir_intrinsic_instr *io_instr,
5406 nir_def *addr,
5407 nir_address_format addr_format);
5408
5409 bool nir_lower_explicit_io(nir_shader *shader,
5410 nir_variable_mode modes,
5411 nir_address_format);
5412
5413 typedef struct {
5414 uint8_t num_components;
5415 uint8_t bit_size;
5416 uint16_t align;
5417 } nir_mem_access_size_align;
5418
5419 /* clang-format off */
5420 typedef nir_mem_access_size_align
5421 (*nir_lower_mem_access_bit_sizes_cb)(nir_intrinsic_op intrin,
5422 uint8_t bytes,
5423 uint8_t bit_size,
5424 uint32_t align_mul,
5425 uint32_t align_offset,
5426 bool offset_is_const,
5427 const void *cb_data);
5428 /* clang-format on */
5429
5430 typedef struct {
5431 nir_lower_mem_access_bit_sizes_cb callback;
5432 nir_variable_mode modes;
5433 bool may_lower_unaligned_stores_to_atomics;
5434 void *cb_data;
5435 } nir_lower_mem_access_bit_sizes_options;
5436
5437 bool nir_lower_mem_access_bit_sizes(nir_shader *shader,
5438 const nir_lower_mem_access_bit_sizes_options *options);
5439
5440 typedef struct {
5441 /* Lower load_ubo to be robust. Out-of-bounds loads will return UNDEFINED
5442 * values (not necessarily zero).
5443 */
5444 bool lower_ubo;
5445
5446 /* Lower load_ssbo/store_ssbo/ssbo_atomic(_swap) to be robust. Out-of-bounds
5447 * loads and atomics will return UNDEFINED values (not necessarily zero).
5448 * Out-of-bounds stores and atomics CORRUPT the contents of the SSBO.
5449 *
5450 * This suffices for robustBufferAccess but not robustBufferAccess2.
5451 */
5452 bool lower_ssbo;
5453
5454 /* Lower all image_load/image_store/image_atomic(_swap) instructions to be
5455 * robust. Out-of-bounds loads will return ZERO.
5456 *
5457 * This suffices for robustImageAccess but not robustImageAccess2.
5458 */
5459 bool lower_image;
5460
5461 /* Lower all buffer image instructions as above. Implied by lower_image. */
5462 bool lower_buffer_image;
5463
5464 /* Lower image_atomic(_swap) for all dimensions. Implied by lower_image. */
5465 bool lower_image_atomic;
5466
5467 /* Vulkan's robustBufferAccess feature is only concerned with buffers that
5468 * are bound through descriptor sets, so shared memory is not included, but
5469 * it may be useful to enable this for debugging.
5470 */
5471 bool lower_shared;
5472 } nir_lower_robust_access_options;
5473
5474 bool nir_lower_robust_access(nir_shader *s,
5475 const nir_lower_robust_access_options *opts);
5476
5477 /* clang-format off */
5478 typedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul,
5479 unsigned align_offset,
5480 unsigned bit_size,
5481 unsigned num_components,
5482 nir_intrinsic_instr *low,
5483 nir_intrinsic_instr *high,
5484 void *data);
5485 /* clang-format on */
5486
5487 typedef struct {
5488 nir_should_vectorize_mem_func callback;
5489 nir_variable_mode modes;
5490 nir_variable_mode robust_modes;
5491 void *cb_data;
5492 bool has_shared2_amd;
5493 } nir_load_store_vectorize_options;
5494
5495 bool nir_opt_load_store_vectorize(nir_shader *shader, const nir_load_store_vectorize_options *options);
5496
5497 typedef bool (*nir_lower_shader_calls_should_remat_func)(nir_instr *instr, void *data);
5498
5499 typedef struct nir_lower_shader_calls_options {
5500 /* Address format used for load/store operations on the call stack. */
5501 nir_address_format address_format;
5502
5503 /* Stack alignment */
5504 unsigned stack_alignment;
5505
5506 /* Put loads from the stack as close as possible from where they're needed.
5507 * You might want to disable combined_loads for best effects.
5508 */
5509 bool localized_loads;
5510
5511 /* If this function pointer is not NULL, lower_shader_calls will run
5512 * nir_opt_load_store_vectorize for stack load/store operations. Otherwise
5513 * the optimizaion is not run.
5514 */
5515 nir_should_vectorize_mem_func vectorizer_callback;
5516
5517 /* Data passed to vectorizer_callback */
5518 void *vectorizer_data;
5519
5520 /* If this function pointer is not NULL, lower_shader_calls will call this
5521 * function on instructions that require spill/fill/rematerialization of
5522 * their value. If this function returns true, lower_shader_calls will
5523 * ensure that the instruction is rematerialized, adding the sources of the
5524 * instruction to be spilled/filled.
5525 */
5526 nir_lower_shader_calls_should_remat_func should_remat_callback;
5527
5528 /* Data passed to should_remat_callback */
5529 void *should_remat_data;
5530 } nir_lower_shader_calls_options;
5531
5532 bool
5533 nir_lower_shader_calls(nir_shader *shader,
5534 const nir_lower_shader_calls_options *options,
5535 nir_shader ***resume_shaders_out,
5536 uint32_t *num_resume_shaders_out,
5537 void *mem_ctx);
5538
5539 int nir_get_io_offset_src_number(const nir_intrinsic_instr *instr);
5540 int nir_get_io_arrayed_index_src_number(const nir_intrinsic_instr *instr);
5541
5542 nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr);
5543 nir_src *nir_get_io_arrayed_index_src(nir_intrinsic_instr *instr);
5544 nir_src *nir_get_shader_call_payload_src(nir_intrinsic_instr *call);
5545
5546 bool nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage);
5547
5548 bool nir_lower_reg_intrinsics_to_ssa_impl(nir_function_impl *impl);
5549 bool nir_lower_reg_intrinsics_to_ssa(nir_shader *shader);
5550 bool nir_lower_vars_to_ssa(nir_shader *shader);
5551
5552 bool nir_remove_dead_derefs(nir_shader *shader);
5553 bool nir_remove_dead_derefs_impl(nir_function_impl *impl);
5554
5555 typedef struct nir_remove_dead_variables_options {
5556 bool (*can_remove_var)(nir_variable *var, void *data);
5557 void *can_remove_var_data;
5558 } nir_remove_dead_variables_options;
5559
5560 bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes,
5561 const nir_remove_dead_variables_options *options);
5562
5563 bool nir_lower_variable_initializers(nir_shader *shader,
5564 nir_variable_mode modes);
5565 bool nir_zero_initialize_shared_memory(nir_shader *shader,
5566 const unsigned shared_size,
5567 const unsigned chunk_size);
5568 bool nir_clear_shared_memory(nir_shader *shader,
5569 const unsigned shared_size,
5570 const unsigned chunk_size);
5571
5572 bool nir_move_vec_src_uses_to_dest(nir_shader *shader, bool skip_const_srcs);
5573 bool nir_lower_vec_to_regs(nir_shader *shader, nir_instr_writemask_filter_cb cb,
5574 const void *_data);
5575 bool nir_lower_alpha_test(nir_shader *shader, enum compare_func func,
5576 bool alpha_to_one,
5577 const gl_state_index16 *alpha_ref_state_tokens);
5578 bool nir_lower_alu(nir_shader *shader);
5579
5580 bool nir_lower_flrp(nir_shader *shader, unsigned lowering_mask,
5581 bool always_precise);
5582
5583 bool nir_scale_fdiv(nir_shader *shader);
5584
5585 bool nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *data);
5586 bool nir_lower_alu_width(nir_shader *shader, nir_vectorize_cb cb, const void *data);
5587 bool nir_lower_alu_vec8_16_srcs(nir_shader *shader);
5588 bool nir_lower_bool_to_bitsize(nir_shader *shader);
5589 bool nir_lower_bool_to_float(nir_shader *shader, bool has_fcsel_ne);
5590 bool nir_lower_bool_to_int32(nir_shader *shader);
5591 bool nir_opt_simplify_convert_alu_types(nir_shader *shader);
5592 bool nir_lower_const_arrays_to_uniforms(nir_shader *shader,
5593 unsigned max_uniform_components);
5594 bool nir_lower_convert_alu_types(nir_shader *shader,
5595 bool (*should_lower)(nir_intrinsic_instr *));
5596 bool nir_lower_constant_convert_alu_types(nir_shader *shader);
5597 bool nir_lower_alu_conversion_to_intrinsic(nir_shader *shader);
5598 bool nir_lower_int_to_float(nir_shader *shader);
5599 bool nir_lower_load_const_to_scalar(nir_shader *shader);
5600 bool nir_lower_read_invocation_to_scalar(nir_shader *shader);
5601 bool nir_lower_phis_to_scalar(nir_shader *shader, bool lower_all);
5602 void nir_lower_io_arrays_to_elements(nir_shader *producer, nir_shader *consumer);
5603 bool nir_lower_io_arrays_to_elements_no_indirects(nir_shader *shader,
5604 bool outputs_only);
5605 bool nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask, nir_instr_filter_cb filter, void *filter_data);
5606 bool nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask);
5607 bool nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode mask);
5608 bool nir_vectorize_tess_levels(nir_shader *shader);
5609 nir_shader *nir_create_passthrough_tcs_impl(const nir_shader_compiler_options *options,
5610 unsigned *locations, unsigned num_locations,
5611 uint8_t patch_vertices);
5612 nir_shader *nir_create_passthrough_tcs(const nir_shader_compiler_options *options,
5613 const nir_shader *vs, uint8_t patch_vertices);
5614 nir_shader *nir_create_passthrough_gs(const nir_shader_compiler_options *options,
5615 const nir_shader *prev_stage,
5616 enum mesa_prim primitive_type,
5617 enum mesa_prim output_primitive_type,
5618 bool emulate_edgeflags,
5619 bool force_line_strip_out);
5620
5621 bool nir_lower_fragcolor(nir_shader *shader, unsigned max_cbufs);
5622 bool nir_lower_fragcoord_wtrans(nir_shader *shader);
5623 bool nir_lower_frag_coord_to_pixel_coord(nir_shader *shader);
5624 bool nir_lower_viewport_transform(nir_shader *shader);
5625 bool nir_lower_uniforms_to_ubo(nir_shader *shader, bool dword_packed, bool load_vec4);
5626
5627 bool nir_lower_is_helper_invocation(nir_shader *shader);
5628
5629 bool nir_lower_single_sampled(nir_shader *shader);
5630
5631 typedef struct nir_lower_subgroups_options {
5632 uint8_t subgroup_size;
5633 uint8_t ballot_bit_size;
5634 uint8_t ballot_components;
5635 bool lower_to_scalar : 1;
5636 bool lower_vote_trivial : 1;
5637 bool lower_vote_eq : 1;
5638 bool lower_vote_bool_eq : 1;
5639 bool lower_first_invocation_to_ballot : 1;
5640 bool lower_read_first_invocation : 1;
5641 bool lower_subgroup_masks : 1;
5642 bool lower_relative_shuffle : 1;
5643 bool lower_shuffle_to_32bit : 1;
5644 bool lower_shuffle_to_swizzle_amd : 1;
5645 bool lower_shuffle : 1;
5646 bool lower_quad : 1;
5647 bool lower_quad_broadcast_dynamic : 1;
5648 bool lower_quad_broadcast_dynamic_to_const : 1;
5649 bool lower_elect : 1;
5650 bool lower_read_invocation_to_cond : 1;
5651 bool lower_rotate_to_shuffle : 1;
5652 bool lower_ballot_bit_count_to_mbcnt_amd : 1;
5653 bool lower_inverse_ballot : 1;
5654 bool lower_boolean_reduce : 1;
5655 bool lower_boolean_shuffle : 1;
5656 } nir_lower_subgroups_options;
5657
5658 bool nir_lower_subgroups(nir_shader *shader,
5659 const nir_lower_subgroups_options *options);
5660
5661 bool nir_lower_system_values(nir_shader *shader);
5662
5663 nir_def *
5664 nir_build_lowered_load_helper_invocation(struct nir_builder *b);
5665
5666 typedef struct nir_lower_compute_system_values_options {
5667 bool has_base_global_invocation_id : 1;
5668 bool has_base_workgroup_id : 1;
5669 bool shuffle_local_ids_for_quad_derivatives : 1;
5670 bool lower_local_invocation_index : 1;
5671 bool lower_cs_local_id_to_index : 1;
5672 bool lower_workgroup_id_to_index : 1;
5673 /* At shader execution time, check if WorkGroupId should be 1D
5674 * and compute it quickly. Fall back to slow computation if not.
5675 */
5676 bool shortcut_1d_workgroup_id : 1;
5677 uint32_t num_workgroups[3]; /* Compile-time-known dispatch sizes, or 0 if unknown. */
5678 } nir_lower_compute_system_values_options;
5679
5680 bool nir_lower_compute_system_values(nir_shader *shader,
5681 const nir_lower_compute_system_values_options *options);
5682
5683 struct nir_lower_sysvals_to_varyings_options {
5684 bool frag_coord : 1;
5685 bool front_face : 1;
5686 bool point_coord : 1;
5687 };
5688
5689 bool
5690 nir_lower_sysvals_to_varyings(nir_shader *shader,
5691 const struct nir_lower_sysvals_to_varyings_options *options);
5692
5693 /***/
5694 enum ENUM_PACKED nir_lower_tex_packing {
5695 /** No packing */
5696 nir_lower_tex_packing_none = 0,
5697 /**
5698 * The sampler returns up to 2 32-bit words of half floats or 16-bit signed
5699 * or unsigned ints based on the sampler type
5700 */
5701 nir_lower_tex_packing_16,
5702 /** The sampler returns 1 32-bit word of 4x8 unorm */
5703 nir_lower_tex_packing_8,
5704 };
5705
5706 /***/
5707 typedef struct nir_lower_tex_options {
5708 /**
5709 * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which
5710 * sampler types a texture projector is lowered.
5711 */
5712 unsigned lower_txp;
5713
5714 /**
5715 * If true, lower texture projector for any array sampler dims
5716 */
5717 bool lower_txp_array;
5718
5719 /**
5720 * If true, lower away nir_tex_src_offset for all texelfetch instructions.
5721 */
5722 bool lower_txf_offset;
5723
5724 /**
5725 * If true, lower away nir_tex_src_offset for all rect textures.
5726 */
5727 bool lower_rect_offset;
5728
5729 /**
5730 * If not NULL, this filter will return true for tex instructions that
5731 * should lower away nir_tex_src_offset.
5732 */
5733 nir_instr_filter_cb lower_offset_filter;
5734
5735 /**
5736 * If true, lower rect textures to 2D, using txs to fetch the
5737 * texture dimensions and dividing the texture coords by the
5738 * texture dims to normalize.
5739 */
5740 bool lower_rect;
5741
5742 /**
5743 * If true, lower 1D textures to 2D. This requires the GL/VK driver to map 1D
5744 * textures to 2D textures with height=1.
5745 *
5746 * lower_1d_shadow does this lowering for shadow textures only.
5747 */
5748 bool lower_1d;
5749 bool lower_1d_shadow;
5750
5751 /**
5752 * If true, convert yuv to rgb.
5753 */
5754 unsigned lower_y_uv_external;
5755 unsigned lower_y_vu_external;
5756 unsigned lower_y_u_v_external;
5757 unsigned lower_yx_xuxv_external;
5758 unsigned lower_yx_xvxu_external;
5759 unsigned lower_xy_uxvx_external;
5760 unsigned lower_xy_vxux_external;
5761 unsigned lower_ayuv_external;
5762 unsigned lower_xyuv_external;
5763 unsigned lower_yuv_external;
5764 unsigned lower_yu_yv_external;
5765 unsigned lower_yv_yu_external;
5766 unsigned lower_y41x_external;
5767 unsigned bt709_external;
5768 unsigned bt2020_external;
5769 unsigned yuv_full_range_external;
5770
5771 /**
5772 * To emulate certain texture wrap modes, this can be used
5773 * to saturate the specified tex coord to [0.0, 1.0]. The
5774 * bits are according to sampler #, ie. if, for example:
5775 *
5776 * (conf->saturate_s & (1 << n))
5777 *
5778 * is true, then the s coord for sampler n is saturated.
5779 *
5780 * Note that clamping must happen *after* projector lowering
5781 * so any projected texture sample instruction with a clamped
5782 * coordinate gets automatically lowered, regardless of the
5783 * 'lower_txp' setting.
5784 */
5785 unsigned saturate_s;
5786 unsigned saturate_t;
5787 unsigned saturate_r;
5788
5789 /* Bitmask of textures that need swizzling.
5790 *
5791 * If (swizzle_result & (1 << texture_index)), then the swizzle in
5792 * swizzles[texture_index] is applied to the result of the texturing
5793 * operation.
5794 */
5795 unsigned swizzle_result;
5796
5797 /* A swizzle for each texture. Values 0-3 represent x, y, z, or w swizzles
5798 * while 4 and 5 represent 0 and 1 respectively.
5799 *
5800 * Indexed by texture-id.
5801 */
5802 uint8_t swizzles[32][4];
5803
5804 /* Can be used to scale sampled values in range required by the
5805 * format.
5806 *
5807 * Indexed by texture-id.
5808 */
5809 float scale_factors[32];
5810
5811 /**
5812 * Bitmap of textures that need srgb to linear conversion. If
5813 * (lower_srgb & (1 << texture_index)) then the rgb (xyz) components
5814 * of the texture are lowered to linear.
5815 */
5816 unsigned lower_srgb;
5817
5818 /**
5819 * If true, lower nir_texop_txd on cube maps with nir_texop_txl.
5820 */
5821 bool lower_txd_cube_map;
5822
5823 /**
5824 * If true, lower nir_texop_txd on 3D surfaces with nir_texop_txl.
5825 */
5826 bool lower_txd_3d;
5827
5828 /**
5829 * If true, lower nir_texop_txd any array surfaces with nir_texop_txl.
5830 */
5831 bool lower_txd_array;
5832
5833 /**
5834 * If true, lower nir_texop_txd on shadow samplers (except cube maps)
5835 * with nir_texop_txl. Notice that cube map shadow samplers are lowered
5836 * with lower_txd_cube_map.
5837 */
5838 bool lower_txd_shadow;
5839
5840 /**
5841 * If true, lower nir_texop_txd on all samplers to a nir_texop_txl.
5842 * Implies lower_txd_cube_map and lower_txd_shadow.
5843 */
5844 bool lower_txd;
5845
5846 /**
5847 * If true, lower nir_texop_txd when it uses min_lod.
5848 */
5849 bool lower_txd_clamp;
5850
5851 /**
5852 * If true, lower nir_texop_txb that try to use shadow compare and min_lod
5853 * at the same time to a nir_texop_lod, some math, and nir_texop_tex.
5854 */
5855 bool lower_txb_shadow_clamp;
5856
5857 /**
5858 * If true, lower nir_texop_txd on shadow samplers when it uses min_lod
5859 * with nir_texop_txl. This includes cube maps.
5860 */
5861 bool lower_txd_shadow_clamp;
5862
5863 /**
5864 * If true, lower nir_texop_txd on when it uses both offset and min_lod
5865 * with nir_texop_txl. This includes cube maps.
5866 */
5867 bool lower_txd_offset_clamp;
5868
5869 /**
5870 * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the
5871 * sampler is bindless.
5872 */
5873 bool lower_txd_clamp_bindless_sampler;
5874
5875 /**
5876 * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the
5877 * sampler index is not statically determinable to be less than 16.
5878 */
5879 bool lower_txd_clamp_if_sampler_index_not_lt_16;
5880
5881 /**
5882 * If true, lower nir_texop_txs with a non-0-lod into nir_texop_txs with
5883 * 0-lod followed by a nir_ishr.
5884 */
5885 bool lower_txs_lod;
5886
5887 /**
5888 * If true, lower nir_texop_txs for cube arrays to a nir_texop_txs with a
5889 * 2D array type followed by a nir_idiv by 6.
5890 */
5891 bool lower_txs_cube_array;
5892
5893 /**
5894 * If true, apply a .bagr swizzle on tg4 results to handle Broadcom's
5895 * mixed-up tg4 locations.
5896 */
5897 bool lower_tg4_broadcom_swizzle;
5898
5899 /**
5900 * If true, lowers tg4 with 4 constant offsets to 4 tg4 calls
5901 */
5902 bool lower_tg4_offsets;
5903
5904 /**
5905 * Lower txf_ms to fragment_mask_fetch and fragment_fetch and samples_identical to
5906 * fragment_mask_fetch.
5907 */
5908 bool lower_to_fragment_fetch_amd;
5909
5910 /**
5911 * To lower packed sampler return formats. This will be called for all
5912 * tex instructions.
5913 */
5914 enum nir_lower_tex_packing (*lower_tex_packing_cb)(const nir_tex_instr *tex, const void *data);
5915 const void *lower_tex_packing_data;
5916
5917 /**
5918 * If true, lower nir_texop_lod to return -FLT_MAX if the sum of the
5919 * absolute values of derivatives is 0 for all coordinates.
5920 */
5921 bool lower_lod_zero_width;
5922
5923 /* Turns nir_op_tex and other ops with an implicit derivative, in stages
5924 * without implicit derivatives (like the vertex shader) to have an explicit
5925 * LOD with a value of 0.
5926 */
5927 bool lower_invalid_implicit_lod;
5928
5929 /* If true, texture_index (sampler_index) will be zero if a texture_offset
5930 * (sampler_offset) source is present. This is convenient for backends that
5931 * support indirect indexing of textures (samplers) but not offsetting it.
5932 */
5933 bool lower_index_to_offset;
5934
5935 /**
5936 * Payload data to be sent to callback / filter functions.
5937 */
5938 void *callback_data;
5939 } nir_lower_tex_options;
5940
5941 /** Lowers complex texture instructions to simpler ones */
5942 bool nir_lower_tex(nir_shader *shader,
5943 const nir_lower_tex_options *options);
5944
5945 typedef struct nir_lower_tex_shadow_swizzle {
5946 unsigned swizzle_r : 3;
5947 unsigned swizzle_g : 3;
5948 unsigned swizzle_b : 3;
5949 unsigned swizzle_a : 3;
5950 } nir_lower_tex_shadow_swizzle;
5951
5952 bool
5953 nir_lower_tex_shadow(nir_shader *s,
5954 unsigned n_states,
5955 enum compare_func *compare_func,
5956 nir_lower_tex_shadow_swizzle *tex_swizzles);
5957
5958 typedef struct nir_lower_image_options {
5959 /**
5960 * If true, lower cube size operations.
5961 */
5962 bool lower_cube_size;
5963
5964 /**
5965 * Lower multi sample image load and samples_identical to use fragment_mask_load.
5966 */
5967 bool lower_to_fragment_mask_load_amd;
5968
5969 /**
5970 * Lower image_samples to a constant in case the driver doesn't support multisampled
5971 * images.
5972 */
5973 bool lower_image_samples_to_one;
5974 } nir_lower_image_options;
5975
5976 bool nir_lower_image(nir_shader *nir,
5977 const nir_lower_image_options *options);
5978
5979 bool
5980 nir_lower_image_atomics_to_global(nir_shader *s);
5981
5982 bool nir_lower_readonly_images_to_tex(nir_shader *shader, bool per_variable);
5983
5984 enum nir_lower_non_uniform_access_type {
5985 nir_lower_non_uniform_ubo_access = (1 << 0),
5986 nir_lower_non_uniform_ssbo_access = (1 << 1),
5987 nir_lower_non_uniform_texture_access = (1 << 2),
5988 nir_lower_non_uniform_image_access = (1 << 3),
5989 nir_lower_non_uniform_get_ssbo_size = (1 << 4),
5990 };
5991
5992 /* Given the nir_src used for the resource, return the channels which might be non-uniform. */
5993 typedef nir_component_mask_t (*nir_lower_non_uniform_access_callback)(const nir_src *, void *);
5994
5995 typedef struct nir_lower_non_uniform_access_options {
5996 enum nir_lower_non_uniform_access_type types;
5997 nir_lower_non_uniform_access_callback callback;
5998 void *callback_data;
5999 } nir_lower_non_uniform_access_options;
6000
6001 bool nir_has_non_uniform_access(nir_shader *shader, enum nir_lower_non_uniform_access_type types);
6002 bool nir_opt_non_uniform_access(nir_shader *shader);
6003 bool nir_lower_non_uniform_access(nir_shader *shader,
6004 const nir_lower_non_uniform_access_options *options);
6005
6006 typedef struct {
6007 /* Whether 16-bit floating point arithmetic should be allowed in 8-bit
6008 * division lowering
6009 */
6010 bool allow_fp16;
6011 } nir_lower_idiv_options;
6012
6013 bool nir_lower_idiv(nir_shader *shader, const nir_lower_idiv_options *options);
6014
6015 typedef struct nir_input_attachment_options {
6016 bool use_fragcoord_sysval;
6017 bool use_layer_id_sysval;
6018 bool use_view_id_for_layer;
6019 uint32_t unscaled_input_attachment_ir3;
6020 } nir_input_attachment_options;
6021
6022 bool nir_lower_input_attachments(nir_shader *shader,
6023 const nir_input_attachment_options *options);
6024
6025 bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables,
6026 bool use_vars,
6027 bool use_clipdist_array,
6028 const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]);
6029 bool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables,
6030 bool use_clipdist_array,
6031 const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]);
6032 bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables,
6033 bool use_clipdist_array);
6034
6035 bool nir_lower_clip_cull_distance_to_vec4s(nir_shader *shader);
6036 bool nir_lower_clip_cull_distance_arrays(nir_shader *nir);
6037 bool nir_lower_clip_disable(nir_shader *shader, unsigned clip_plane_enable);
6038
6039 bool nir_lower_point_size_mov(nir_shader *shader,
6040 const gl_state_index16 *pointsize_state_tokens);
6041
6042 bool nir_lower_frexp(nir_shader *nir);
6043
6044 bool nir_lower_two_sided_color(nir_shader *shader, bool face_sysval);
6045
6046 bool nir_lower_clamp_color_outputs(nir_shader *shader);
6047
6048 bool nir_lower_flatshade(nir_shader *shader);
6049
6050 bool nir_lower_passthrough_edgeflags(nir_shader *shader);
6051 bool nir_lower_patch_vertices(nir_shader *nir, unsigned static_count,
6052 const gl_state_index16 *uniform_state_tokens);
6053
6054 typedef struct nir_lower_wpos_ytransform_options {
6055 gl_state_index16 state_tokens[STATE_LENGTH];
6056 bool fs_coord_origin_upper_left : 1;
6057 bool fs_coord_origin_lower_left : 1;
6058 bool fs_coord_pixel_center_integer : 1;
6059 bool fs_coord_pixel_center_half_integer : 1;
6060 } nir_lower_wpos_ytransform_options;
6061
6062 bool nir_lower_wpos_ytransform(nir_shader *shader,
6063 const nir_lower_wpos_ytransform_options *options);
6064 bool nir_lower_wpos_center(nir_shader *shader);
6065
6066 bool nir_lower_pntc_ytransform(nir_shader *shader,
6067 const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]);
6068
6069 bool nir_lower_pntc_ytransform(nir_shader *shader,
6070 const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]);
6071
6072 bool nir_lower_wrmasks(nir_shader *shader, nir_instr_filter_cb cb, const void *data);
6073
6074 bool nir_lower_fb_read(nir_shader *shader);
6075
6076 typedef struct nir_lower_drawpixels_options {
6077 gl_state_index16 texcoord_state_tokens[STATE_LENGTH];
6078 gl_state_index16 scale_state_tokens[STATE_LENGTH];
6079 gl_state_index16 bias_state_tokens[STATE_LENGTH];
6080 unsigned drawpix_sampler;
6081 unsigned pixelmap_sampler;
6082 bool pixel_maps : 1;
6083 bool scale_and_bias : 1;
6084 } nir_lower_drawpixels_options;
6085
6086 bool nir_lower_drawpixels(nir_shader *shader,
6087 const nir_lower_drawpixels_options *options);
6088
6089 typedef struct nir_lower_bitmap_options {
6090 unsigned sampler;
6091 bool swizzle_xxxx;
6092 } nir_lower_bitmap_options;
6093
6094 bool nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options);
6095
6096 bool nir_lower_atomics_to_ssbo(nir_shader *shader, unsigned offset_align_state);
6097
6098 typedef enum {
6099 nir_lower_gs_intrinsics_per_stream = 1 << 0,
6100 nir_lower_gs_intrinsics_count_primitives = 1 << 1,
6101 nir_lower_gs_intrinsics_count_vertices_per_primitive = 1 << 2,
6102 nir_lower_gs_intrinsics_overwrite_incomplete = 1 << 3,
6103 nir_lower_gs_intrinsics_always_end_primitive = 1 << 4,
6104 nir_lower_gs_intrinsics_count_decomposed_primitives = 1 << 5,
6105 } nir_lower_gs_intrinsics_flags;
6106
6107 bool nir_lower_gs_intrinsics(nir_shader *shader, nir_lower_gs_intrinsics_flags options);
6108
6109 bool nir_lower_tess_coord_z(nir_shader *shader, bool triangles);
6110
6111 typedef struct {
6112 bool payload_to_shared_for_atomics : 1;
6113 bool payload_to_shared_for_small_types : 1;
6114 uint32_t payload_offset_in_bytes;
6115 } nir_lower_task_shader_options;
6116
6117 bool nir_lower_task_shader(nir_shader *shader, nir_lower_task_shader_options options);
6118
6119 typedef unsigned (*nir_lower_bit_size_callback)(const nir_instr *, void *);
6120
6121 bool nir_lower_bit_size(nir_shader *shader,
6122 nir_lower_bit_size_callback callback,
6123 void *callback_data);
6124 bool nir_lower_64bit_phis(nir_shader *shader);
6125
6126 bool nir_split_64bit_vec3_and_vec4(nir_shader *shader);
6127
6128 nir_lower_int64_options nir_lower_int64_op_to_options_mask(nir_op opcode);
6129 bool nir_lower_int64(nir_shader *shader);
6130 bool nir_lower_int64_float_conversions(nir_shader *shader);
6131
6132 nir_lower_doubles_options nir_lower_doubles_op_to_options_mask(nir_op opcode);
6133 bool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64,
6134 nir_lower_doubles_options options);
6135 bool nir_lower_pack(nir_shader *shader);
6136
6137 bool nir_recompute_io_bases(nir_shader *nir, nir_variable_mode modes);
6138 bool nir_lower_mediump_vars(nir_shader *nir, nir_variable_mode modes);
6139 bool nir_lower_mediump_io(nir_shader *nir, nir_variable_mode modes,
6140 uint64_t varying_mask, bool use_16bit_slots);
6141 bool nir_force_mediump_io(nir_shader *nir, nir_variable_mode modes,
6142 nir_alu_type types);
6143 bool nir_unpack_16bit_varying_slots(nir_shader *nir, nir_variable_mode modes);
6144
6145 struct nir_fold_tex_srcs_options {
6146 unsigned sampler_dims;
6147 unsigned src_types;
6148 };
6149
6150 struct nir_fold_16bit_tex_image_options {
6151 nir_rounding_mode rounding_mode;
6152 nir_alu_type fold_tex_dest_types;
6153 nir_alu_type fold_image_dest_types;
6154 bool fold_image_store_data;
6155 bool fold_image_srcs;
6156 unsigned fold_srcs_options_count;
6157 struct nir_fold_tex_srcs_options *fold_srcs_options;
6158 };
6159
6160 bool nir_fold_16bit_tex_image(nir_shader *nir,
6161 struct nir_fold_16bit_tex_image_options *options);
6162
6163 typedef struct {
6164 bool legalize_type; /* whether this src should be legalized */
6165 uint8_t bit_size; /* bit_size to enforce */
6166 nir_tex_src_type match_src; /* if bit_size is 0, match bit size of this */
6167 } nir_tex_src_type_constraint, nir_tex_src_type_constraints[nir_num_tex_src_types];
6168
6169 bool nir_legalize_16bit_sampler_srcs(nir_shader *nir,
6170 nir_tex_src_type_constraints constraints);
6171
6172 bool nir_lower_point_size(nir_shader *shader, float min, float max);
6173
6174 void nir_lower_texcoord_replace(nir_shader *s, unsigned coord_replace,
6175 bool point_coord_is_sysval, bool yinvert);
6176
6177 void nir_lower_texcoord_replace_late(nir_shader *s, unsigned coord_replace,
6178 bool point_coord_is_sysval);
6179
6180 typedef enum {
6181 nir_lower_interpolation_at_sample = (1 << 1),
6182 nir_lower_interpolation_at_offset = (1 << 2),
6183 nir_lower_interpolation_centroid = (1 << 3),
6184 nir_lower_interpolation_pixel = (1 << 4),
6185 nir_lower_interpolation_sample = (1 << 5),
6186 } nir_lower_interpolation_options;
6187
6188 bool nir_lower_interpolation(nir_shader *shader,
6189 nir_lower_interpolation_options options);
6190
6191 typedef enum {
6192 nir_lower_discard_if_to_cf = (1 << 0),
6193 nir_lower_demote_if_to_cf = (1 << 1),
6194 nir_lower_terminate_if_to_cf = (1 << 2),
6195 } nir_lower_discard_if_options;
6196
6197 bool nir_lower_discard_if(nir_shader *shader, nir_lower_discard_if_options options);
6198
6199 bool nir_lower_discard_or_demote(nir_shader *shader,
6200 bool force_correct_quad_ops_after_discard);
6201
6202 bool nir_lower_memory_model(nir_shader *shader);
6203
6204 bool nir_lower_goto_ifs(nir_shader *shader);
6205 bool nir_lower_continue_constructs(nir_shader *shader);
6206
6207 bool nir_shader_uses_view_index(nir_shader *shader);
6208 bool nir_can_lower_multiview(nir_shader *shader);
6209 bool nir_lower_multiview(nir_shader *shader, uint32_t view_mask);
6210
6211 typedef enum {
6212 nir_lower_fp16_rtz = (1 << 0),
6213 nir_lower_fp16_rtne = (1 << 1),
6214 nir_lower_fp16_ru = (1 << 2),
6215 nir_lower_fp16_rd = (1 << 3),
6216 nir_lower_fp16_all = 0xf,
6217 nir_lower_fp16_split_fp64 = (1 << 4),
6218 } nir_lower_fp16_cast_options;
6219 bool nir_lower_fp16_casts(nir_shader *shader, nir_lower_fp16_cast_options options);
6220 bool nir_normalize_cubemap_coords(nir_shader *shader);
6221
6222 bool nir_shader_supports_implicit_lod(nir_shader *shader);
6223
6224 void nir_live_defs_impl(nir_function_impl *impl);
6225
6226 const BITSET_WORD *nir_get_live_defs(nir_cursor cursor, void *mem_ctx);
6227
6228 void nir_loop_analyze_impl(nir_function_impl *impl,
6229 nir_variable_mode indirect_mask,
6230 bool force_unroll_sampler_indirect);
6231
6232 bool nir_defs_interfere(nir_def *a, nir_def *b);
6233
6234 bool nir_repair_ssa_impl(nir_function_impl *impl);
6235 bool nir_repair_ssa(nir_shader *shader);
6236
6237 void nir_convert_loop_to_lcssa(nir_loop *loop);
6238 bool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants);
6239 void nir_divergence_analysis(nir_shader *shader);
6240 void nir_vertex_divergence_analysis(nir_shader *shader);
6241 bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr);
6242 bool nir_has_divergent_loop(nir_shader *shader);
6243
6244 void
6245 nir_rewrite_uses_to_load_reg(struct nir_builder *b, nir_def *old,
6246 nir_def *reg);
6247
6248 /* If phi_webs_only is true, only convert SSA values involved in phi nodes to
6249 * registers. If false, convert all values (even those not involved in a phi
6250 * node) to registers.
6251 */
6252 bool nir_convert_from_ssa(nir_shader *shader,
6253 bool phi_webs_only);
6254
6255 bool nir_lower_phis_to_regs_block(nir_block *block);
6256 bool nir_lower_ssa_defs_to_regs_block(nir_block *block);
6257
6258 bool nir_rematerialize_deref_in_use_blocks(nir_deref_instr *instr);
6259 bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl);
6260
6261 bool nir_lower_samplers(nir_shader *shader);
6262 bool nir_lower_cl_images(nir_shader *shader, bool lower_image_derefs, bool lower_sampler_derefs);
6263 bool nir_dedup_inline_samplers(nir_shader *shader);
6264 bool nir_lower_ssbo(nir_shader *shader);
6265 bool nir_lower_helper_writes(nir_shader *shader, bool lower_plain_stores);
6266
6267 typedef struct nir_lower_printf_options {
6268 unsigned max_buffer_size;
6269 } nir_lower_printf_options;
6270
6271 bool nir_lower_printf(nir_shader *nir, const nir_lower_printf_options *options);
6272
6273 /* This is here for unit tests. */
6274 bool nir_opt_comparison_pre_impl(nir_function_impl *impl);
6275
6276 bool nir_opt_comparison_pre(nir_shader *shader);
6277
6278 typedef struct nir_opt_access_options {
6279 bool is_vulkan;
6280 } nir_opt_access_options;
6281
6282 bool nir_opt_access(nir_shader *shader, const nir_opt_access_options *options);
6283 bool nir_opt_algebraic(nir_shader *shader);
6284 bool nir_opt_algebraic_before_ffma(nir_shader *shader);
6285 bool nir_opt_algebraic_late(nir_shader *shader);
6286 bool nir_opt_algebraic_distribute_src_mods(nir_shader *shader);
6287 bool nir_opt_constant_folding(nir_shader *shader);
6288
6289 /* Try to combine a and b into a. Return true if combination was possible,
6290 * which will result in b being removed by the pass. Return false if
6291 * combination wasn't possible.
6292 */
6293 typedef bool (*nir_combine_barrier_cb)(
6294 nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *data);
6295
6296 bool nir_opt_combine_barriers(nir_shader *shader,
6297 nir_combine_barrier_cb combine_cb,
6298 void *data);
6299 bool nir_opt_barrier_modes(nir_shader *shader);
6300
6301 bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes);
6302
6303 bool nir_copy_prop_impl(nir_function_impl *impl);
6304 bool nir_copy_prop(nir_shader *shader);
6305
6306 bool nir_opt_copy_prop_vars(nir_shader *shader);
6307
6308 bool nir_opt_cse(nir_shader *shader);
6309
6310 bool nir_opt_dce(nir_shader *shader);
6311
6312 bool nir_opt_dead_cf(nir_shader *shader);
6313
6314 bool nir_opt_dead_write_vars(nir_shader *shader);
6315
6316 bool nir_opt_deref_impl(nir_function_impl *impl);
6317 bool nir_opt_deref(nir_shader *shader);
6318
6319 bool nir_opt_find_array_copies(nir_shader *shader);
6320
6321 bool nir_opt_fragdepth(nir_shader *shader);
6322
6323 bool nir_opt_gcm(nir_shader *shader, bool value_number);
6324
6325 bool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size);
6326
6327 typedef enum {
6328 nir_opt_if_optimize_phi_true_false = (1 << 0),
6329 nir_opt_if_avoid_64bit_phis = (1 << 1),
6330 } nir_opt_if_options;
6331
6332 bool nir_opt_if(nir_shader *shader, nir_opt_if_options options);
6333
6334 bool nir_opt_intrinsics(nir_shader *shader);
6335
6336 bool nir_opt_large_constants(nir_shader *shader,
6337 glsl_type_size_align_func size_align,
6338 unsigned threshold);
6339
6340 bool nir_opt_loop(nir_shader *shader);
6341
6342 bool nir_opt_loop_unroll(nir_shader *shader);
6343
6344 typedef enum {
6345 nir_move_const_undef = (1 << 0),
6346 nir_move_load_ubo = (1 << 1),
6347 nir_move_load_input = (1 << 2),
6348 nir_move_comparisons = (1 << 3),
6349 nir_move_copies = (1 << 4),
6350 nir_move_load_ssbo = (1 << 5),
6351 nir_move_load_uniform = (1 << 6),
6352 nir_move_alu = (1 << 7),
6353 } nir_move_options;
6354
6355 bool nir_can_move_instr(nir_instr *instr, nir_move_options options);
6356
6357 bool nir_opt_sink(nir_shader *shader, nir_move_options options);
6358
6359 bool nir_opt_move(nir_shader *shader, nir_move_options options);
6360
6361 typedef struct {
6362 /** nir_load_uniform max base offset */
6363 uint32_t uniform_max;
6364
6365 /** nir_load_ubo_vec4 max base offset */
6366 uint32_t ubo_vec4_max;
6367
6368 /** nir_var_mem_shared max base offset */
6369 uint32_t shared_max;
6370
6371 /** nir_load/store_buffer_amd max base offset */
6372 uint32_t buffer_max;
6373 } nir_opt_offsets_options;
6374
6375 bool nir_opt_offsets(nir_shader *shader, const nir_opt_offsets_options *options);
6376
6377 bool nir_opt_peephole_select(nir_shader *shader, unsigned limit,
6378 bool indirect_load_ok, bool expensive_alu_ok);
6379
6380 bool nir_opt_reassociate_bfi(nir_shader *shader);
6381
6382 bool nir_opt_rematerialize_compares(nir_shader *shader);
6383
6384 bool nir_opt_remove_phis(nir_shader *shader);
6385 bool nir_opt_remove_phis_block(nir_block *block);
6386
6387 bool nir_opt_phi_precision(nir_shader *shader);
6388
6389 bool nir_opt_shrink_stores(nir_shader *shader, bool shrink_image_store);
6390
6391 bool nir_opt_shrink_vectors(nir_shader *shader);
6392
6393 bool nir_opt_undef(nir_shader *shader);
6394
6395 bool nir_lower_undef_to_zero(nir_shader *shader);
6396
6397 bool nir_opt_uniform_atomics(nir_shader *shader);
6398
6399 bool nir_opt_uniform_subgroup(nir_shader *shader,
6400 const nir_lower_subgroups_options *);
6401
6402 bool nir_opt_vectorize(nir_shader *shader, nir_vectorize_cb filter,
6403 void *data);
6404
6405 bool nir_opt_conditional_discard(nir_shader *shader);
6406 bool nir_opt_move_discards_to_top(nir_shader *shader);
6407
6408 bool nir_opt_ray_queries(nir_shader *shader);
6409
6410 bool nir_opt_ray_query_ranges(nir_shader *shader);
6411
6412 bool nir_opt_reuse_constants(nir_shader *shader);
6413
6414 void nir_sweep(nir_shader *shader);
6415
6416 void nir_remap_dual_slot_attributes(nir_shader *shader,
6417 uint64_t *dual_slot_inputs);
6418 uint64_t nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot);
6419
6420 nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
6421 gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);
6422
6423 static inline bool
nir_variable_is_in_ubo(const nir_variable * var)6424 nir_variable_is_in_ubo(const nir_variable *var)
6425 {
6426 return (var->data.mode == nir_var_mem_ubo &&
6427 var->interface_type != NULL);
6428 }
6429
6430 static inline bool
nir_variable_is_in_ssbo(const nir_variable * var)6431 nir_variable_is_in_ssbo(const nir_variable *var)
6432 {
6433 return (var->data.mode == nir_var_mem_ssbo &&
6434 var->interface_type != NULL);
6435 }
6436
6437 static inline bool
nir_variable_is_in_block(const nir_variable * var)6438 nir_variable_is_in_block(const nir_variable *var)
6439 {
6440 return nir_variable_is_in_ubo(var) || nir_variable_is_in_ssbo(var);
6441 }
6442
6443 static inline unsigned
nir_variable_count_slots(const nir_variable * var,const struct glsl_type * type)6444 nir_variable_count_slots(const nir_variable *var, const struct glsl_type *type)
6445 {
6446 return var->data.compact ? DIV_ROUND_UP(var->data.location_frac + glsl_get_length(type), 4) : glsl_count_attribute_slots(type, false);
6447 }
6448
6449 static inline unsigned
nir_deref_count_slots(nir_deref_instr * deref,nir_variable * var)6450 nir_deref_count_slots(nir_deref_instr *deref, nir_variable *var)
6451 {
6452 if (var->data.compact) {
6453 switch (deref->deref_type) {
6454 case nir_deref_type_array:
6455 return 1;
6456 case nir_deref_type_var:
6457 return nir_variable_count_slots(var, deref->type);
6458 default:
6459 unreachable("illegal deref type");
6460 }
6461 }
6462 return glsl_count_attribute_slots(deref->type, false);
6463 }
6464
6465 /* See default_ub_config in nir_range_analysis.c for documentation. */
6466 typedef struct nir_unsigned_upper_bound_config {
6467 unsigned min_subgroup_size;
6468 unsigned max_subgroup_size;
6469 unsigned max_workgroup_invocations;
6470 unsigned max_workgroup_count[3];
6471 unsigned max_workgroup_size[3];
6472
6473 uint32_t vertex_attrib_max[32];
6474 } nir_unsigned_upper_bound_config;
6475
6476 uint32_t
6477 nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
6478 nir_scalar scalar,
6479 const nir_unsigned_upper_bound_config *config);
6480
6481 bool
6482 nir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht,
6483 nir_scalar ssa, unsigned const_val,
6484 const nir_unsigned_upper_bound_config *config);
6485
6486 typedef struct {
6487 /* True if gl_DrawID is considered uniform, i.e. if the preamble is run
6488 * at least once per "internal" draw rather than per user-visible draw.
6489 */
6490 bool drawid_uniform;
6491
6492 /* True if the subgroup size is uniform. */
6493 bool subgroup_size_uniform;
6494
6495 /* True if load_workgroup_size is supported in the preamble. */
6496 bool load_workgroup_size_allowed;
6497
6498 /* size/align for load/store_preamble. */
6499 void (*def_size)(nir_def *def, unsigned *size, unsigned *align);
6500
6501 /* Total available size for load/store_preamble storage, in units
6502 * determined by def_size.
6503 */
6504 unsigned preamble_storage_size;
6505
6506 /* Give the cost for an instruction. nir_opt_preamble will prioritize
6507 * instructions with higher costs. Instructions with cost 0 may still be
6508 * lifted, but only when required to lift other instructions with non-0
6509 * cost (e.g. a load_const source of an expression).
6510 */
6511 float (*instr_cost_cb)(nir_instr *instr, const void *data);
6512
6513 /* Give the cost of rewriting the instruction to use load_preamble. This
6514 * may happen from inserting move instructions, etc. If the benefit doesn't
6515 * exceed the cost here then we won't rewrite it.
6516 */
6517 float (*rewrite_cost_cb)(nir_def *def, const void *data);
6518
6519 /* Instructions whose definitions should not be rewritten. These could
6520 * still be moved to the preamble, but they shouldn't be the root of a
6521 * replacement expression. Instructions with cost 0 and derefs are
6522 * automatically included by the pass.
6523 */
6524 nir_instr_filter_cb avoid_instr_cb;
6525
6526 const void *cb_data;
6527 } nir_opt_preamble_options;
6528
6529 bool
6530 nir_opt_preamble(nir_shader *shader,
6531 const nir_opt_preamble_options *options,
6532 unsigned *size);
6533
6534 nir_function_impl *nir_shader_get_preamble(nir_shader *shader);
6535
6536 bool nir_lower_point_smooth(nir_shader *shader);
6537 bool nir_lower_poly_line_smooth(nir_shader *shader, unsigned num_smooth_aa_sample);
6538
6539 bool nir_mod_analysis(nir_scalar val, nir_alu_type val_type, unsigned div, unsigned *mod);
6540
6541 bool
6542 nir_remove_tex_shadow(nir_shader *shader, unsigned textures_bitmask);
6543
6544 void
6545 nir_trivialize_registers(nir_shader *s);
6546
6547 unsigned
6548 nir_static_workgroup_size(const nir_shader *s);
6549
6550 static inline nir_intrinsic_instr *
nir_reg_get_decl(nir_def * reg)6551 nir_reg_get_decl(nir_def *reg)
6552 {
6553 assert(reg->parent_instr->type == nir_instr_type_intrinsic);
6554 nir_intrinsic_instr *decl = nir_instr_as_intrinsic(reg->parent_instr);
6555 assert(decl->intrinsic == nir_intrinsic_decl_reg);
6556
6557 return decl;
6558 }
6559
6560 static inline nir_intrinsic_instr *
nir_next_decl_reg(nir_intrinsic_instr * prev,nir_function_impl * impl)6561 nir_next_decl_reg(nir_intrinsic_instr *prev, nir_function_impl *impl)
6562 {
6563 nir_instr *start;
6564 if (prev != NULL)
6565 start = nir_instr_next(&prev->instr);
6566 else if (impl != NULL)
6567 start = nir_block_first_instr(nir_start_block(impl));
6568 else
6569 return NULL;
6570
6571 for (nir_instr *instr = start; instr; instr = nir_instr_next(instr)) {
6572 if (instr->type != nir_instr_type_intrinsic)
6573 continue;
6574
6575 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
6576 if (intrin->intrinsic == nir_intrinsic_decl_reg)
6577 return intrin;
6578 }
6579
6580 return NULL;
6581 }
6582
6583 #define nir_foreach_reg_decl(reg, impl) \
6584 for (nir_intrinsic_instr *reg = nir_next_decl_reg(NULL, impl); \
6585 reg; reg = nir_next_decl_reg(reg, NULL))
6586
6587 #define nir_foreach_reg_decl_safe(reg, impl) \
6588 for (nir_intrinsic_instr *reg = nir_next_decl_reg(NULL, impl), \
6589 *next_ = nir_next_decl_reg(reg, NULL); \
6590 reg; reg = next_, next_ = nir_next_decl_reg(next_, NULL))
6591
6592 static inline nir_cursor
nir_after_reg_decls(nir_function_impl * impl)6593 nir_after_reg_decls(nir_function_impl *impl)
6594 {
6595 nir_intrinsic_instr *last_reg_decl = NULL;
6596 nir_foreach_reg_decl(reg_decl, impl)
6597 last_reg_decl = reg_decl;
6598
6599 if (last_reg_decl != NULL)
6600 return nir_after_instr(&last_reg_decl->instr);
6601 return nir_before_impl(impl);
6602 }
6603
6604 static inline bool
nir_is_load_reg(nir_intrinsic_instr * intr)6605 nir_is_load_reg(nir_intrinsic_instr *intr)
6606 {
6607 return intr->intrinsic == nir_intrinsic_load_reg ||
6608 intr->intrinsic == nir_intrinsic_load_reg_indirect;
6609 }
6610
6611 static inline bool
nir_is_store_reg(nir_intrinsic_instr * intr)6612 nir_is_store_reg(nir_intrinsic_instr *intr)
6613 {
6614 return intr->intrinsic == nir_intrinsic_store_reg ||
6615 intr->intrinsic == nir_intrinsic_store_reg_indirect;
6616 }
6617
6618 #define nir_foreach_reg_load(load, reg) \
6619 assert(reg->intrinsic == nir_intrinsic_decl_reg); \
6620 \
6621 nir_foreach_use(load, ®->def) \
6622 if (nir_is_load_reg(nir_instr_as_intrinsic(nir_src_parent_instr(load))))
6623
6624 #define nir_foreach_reg_load_safe(load, reg) \
6625 assert(reg->intrinsic == nir_intrinsic_decl_reg); \
6626 \
6627 nir_foreach_use_safe(load, ®->def) \
6628 if (nir_is_load_reg(nir_instr_as_intrinsic(nir_src_parent_instr(load))))
6629
6630 #define nir_foreach_reg_store(store, reg) \
6631 assert(reg->intrinsic == nir_intrinsic_decl_reg); \
6632 \
6633 nir_foreach_use(store, ®->def) \
6634 if (nir_is_store_reg(nir_instr_as_intrinsic(nir_src_parent_instr(store))))
6635
6636 #define nir_foreach_reg_store_safe(store, reg) \
6637 assert(reg->intrinsic == nir_intrinsic_decl_reg); \
6638 \
6639 nir_foreach_use_safe(store, ®->def) \
6640 if (nir_is_store_reg(nir_instr_as_intrinsic(nir_src_parent_instr(store))))
6641
6642 static inline nir_intrinsic_instr *
nir_load_reg_for_def(const nir_def * def)6643 nir_load_reg_for_def(const nir_def *def)
6644 {
6645 if (def->parent_instr->type != nir_instr_type_intrinsic)
6646 return NULL;
6647
6648 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(def->parent_instr);
6649 if (!nir_is_load_reg(intr))
6650 return NULL;
6651
6652 return intr;
6653 }
6654
6655 static inline nir_intrinsic_instr *
nir_store_reg_for_def(const nir_def * def)6656 nir_store_reg_for_def(const nir_def *def)
6657 {
6658 /* Look for the trivial store: single use of our destination by a
6659 * store_register intrinsic.
6660 */
6661 if (!list_is_singular(&def->uses))
6662 return NULL;
6663
6664 nir_src *src = list_first_entry(&def->uses, nir_src, use_link);
6665 if (nir_src_is_if(src))
6666 return NULL;
6667
6668 nir_instr *parent = nir_src_parent_instr(src);
6669 if (parent->type != nir_instr_type_intrinsic)
6670 return NULL;
6671
6672 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
6673 if (!nir_is_store_reg(intr))
6674 return NULL;
6675
6676 /* The first value is data. Third is indirect index, ignore that one. */
6677 if (&intr->src[0] != src)
6678 return NULL;
6679
6680 return intr;
6681 }
6682
6683 #include "nir_inline_helpers.h"
6684
6685 #ifdef __cplusplus
6686 } /* extern "C" */
6687 #endif
6688
6689 #endif /* NIR_H */
6690