1 /* 2 * Copyright © 2014 Connor Abbott 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Connor Abbott (cwabbott0@gmail.com) 25 * 26 */ 27 28 #ifndef NIR_H 29 #define NIR_H 30 31 #include "util/hash_table.h" 32 #include "compiler/glsl/list.h" 33 #include "GL/gl.h" /* GLenum */ 34 #include "util/list.h" 35 #include "util/ralloc.h" 36 #include "util/set.h" 37 #include "util/bitscan.h" 38 #include "util/bitset.h" 39 #include "util/enum_operators.h" 40 #include "util/macros.h" 41 #include "util/format/u_format.h" 42 #include "compiler/nir_types.h" 43 #include "compiler/shader_enums.h" 44 #include "compiler/shader_info.h" 45 #define XXH_INLINE_ALL 46 #include "util/xxhash.h" 47 #include <stdio.h> 48 49 #ifndef NDEBUG 50 #include "util/debug.h" 51 #endif /* NDEBUG */ 52 53 #include "nir_opcodes.h" 54 55 #if defined(_WIN32) && !defined(snprintf) 56 #define snprintf _snprintf 57 #endif 58 59 #ifdef __cplusplus 60 extern "C" { 61 #endif 62 63 #define NIR_FALSE 0u 64 #define NIR_TRUE (~0u) 65 #define NIR_MAX_VEC_COMPONENTS 16 66 #define NIR_MAX_MATRIX_COLUMNS 4 67 #define NIR_STREAM_PACKED (1 << 8) 68 typedef uint16_t nir_component_mask_t; 69 70 static inline bool nir_num_components_valid(unsigned num_components)71 nir_num_components_valid(unsigned num_components) 72 { 73 return (num_components >= 1 && 74 num_components <= 4) || 75 num_components == 8 || 76 num_components == 16; 77 } 78 79 bool nir_component_mask_can_reinterpret(nir_component_mask_t mask, 80 unsigned old_bit_size, 81 unsigned new_bit_size); 82 nir_component_mask_t 83 nir_component_mask_reinterpret(nir_component_mask_t mask, 84 unsigned old_bit_size, 85 unsigned new_bit_size); 86 87 /** Defines a cast function 88 * 89 * This macro defines a cast function from in_type to out_type where 90 * out_type is some structure type that contains a field of type out_type. 91 * 92 * Note that you have to be a bit careful as the generated cast function 93 * destroys constness. 94 */ 95 #define NIR_DEFINE_CAST(name, in_type, out_type, field, \ 96 type_field, type_value) \ 97 static inline out_type * \ 98 name(const in_type *parent) \ 99 { \ 100 assert(parent && parent->type_field == type_value); \ 101 return exec_node_data(out_type, parent, field); \ 102 } 103 104 struct nir_function; 105 struct nir_shader; 106 struct nir_instr; 107 struct nir_builder; 108 109 110 /** 111 * Description of built-in state associated with a uniform 112 * 113 * \sa nir_variable::state_slots 114 */ 115 typedef struct { 116 gl_state_index16 tokens[STATE_LENGTH]; 117 uint16_t swizzle; 118 } nir_state_slot; 119 120 typedef enum { 121 nir_var_shader_in = (1 << 0), 122 nir_var_shader_out = (1 << 1), 123 nir_var_shader_temp = (1 << 2), 124 nir_var_function_temp = (1 << 3), 125 nir_var_uniform = (1 << 4), 126 nir_var_mem_ubo = (1 << 5), 127 nir_var_system_value = (1 << 6), 128 nir_var_mem_ssbo = (1 << 7), 129 nir_var_mem_shared = (1 << 8), 130 nir_var_mem_global = (1 << 9), 131 nir_var_mem_generic = (nir_var_shader_temp | 132 nir_var_function_temp | 133 nir_var_mem_shared | 134 nir_var_mem_global), 135 nir_var_mem_push_const = (1 << 10), /* not actually used for variables */ 136 nir_var_mem_constant = (1 << 11), 137 /** Incoming call or ray payload data for ray-tracing shaders */ 138 nir_var_shader_call_data = (1 << 12), 139 /** Ray hit attributes */ 140 nir_var_ray_hit_attrib = (1 << 13), 141 nir_var_read_only_modes = nir_var_shader_in | nir_var_uniform | 142 nir_var_system_value | nir_var_mem_constant, 143 nir_num_variable_modes = 14, 144 nir_var_all = (1 << nir_num_variable_modes) - 1, 145 } nir_variable_mode; 146 MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_variable_mode) 147 148 /** 149 * Rounding modes. 150 */ 151 typedef enum { 152 nir_rounding_mode_undef = 0, 153 nir_rounding_mode_rtne = 1, /* round to nearest even */ 154 nir_rounding_mode_ru = 2, /* round up */ 155 nir_rounding_mode_rd = 3, /* round down */ 156 nir_rounding_mode_rtz = 4, /* round towards zero */ 157 } nir_rounding_mode; 158 159 typedef union { 160 bool b; 161 float f32; 162 double f64; 163 int8_t i8; 164 uint8_t u8; 165 int16_t i16; 166 uint16_t u16; 167 int32_t i32; 168 uint32_t u32; 169 int64_t i64; 170 uint64_t u64; 171 } nir_const_value; 172 173 #define nir_const_value_to_array(arr, c, components, m) \ 174 { \ 175 for (unsigned i = 0; i < components; ++i) \ 176 arr[i] = c[i].m; \ 177 } while (false) 178 179 static inline nir_const_value nir_const_value_for_raw_uint(uint64_t x,unsigned bit_size)180 nir_const_value_for_raw_uint(uint64_t x, unsigned bit_size) 181 { 182 nir_const_value v; 183 memset(&v, 0, sizeof(v)); 184 185 switch (bit_size) { 186 case 1: v.b = x; break; 187 case 8: v.u8 = x; break; 188 case 16: v.u16 = x; break; 189 case 32: v.u32 = x; break; 190 case 64: v.u64 = x; break; 191 default: 192 unreachable("Invalid bit size"); 193 } 194 195 return v; 196 } 197 198 static inline nir_const_value nir_const_value_for_int(int64_t i,unsigned bit_size)199 nir_const_value_for_int(int64_t i, unsigned bit_size) 200 { 201 nir_const_value v; 202 memset(&v, 0, sizeof(v)); 203 204 assert(bit_size <= 64); 205 if (bit_size < 64) { 206 assert(i >= (-(1ll << (bit_size - 1)))); 207 assert(i < (1ll << (bit_size - 1))); 208 } 209 210 return nir_const_value_for_raw_uint(i, bit_size); 211 } 212 213 static inline nir_const_value nir_const_value_for_uint(uint64_t u,unsigned bit_size)214 nir_const_value_for_uint(uint64_t u, unsigned bit_size) 215 { 216 nir_const_value v; 217 memset(&v, 0, sizeof(v)); 218 219 assert(bit_size <= 64); 220 if (bit_size < 64) 221 assert(u < (1ull << bit_size)); 222 223 return nir_const_value_for_raw_uint(u, bit_size); 224 } 225 226 static inline nir_const_value nir_const_value_for_bool(bool b,unsigned bit_size)227 nir_const_value_for_bool(bool b, unsigned bit_size) 228 { 229 /* Booleans use a 0/-1 convention */ 230 return nir_const_value_for_int(-(int)b, bit_size); 231 } 232 233 /* This one isn't inline because it requires half-float conversion */ 234 nir_const_value nir_const_value_for_float(double b, unsigned bit_size); 235 236 static inline int64_t nir_const_value_as_int(nir_const_value value,unsigned bit_size)237 nir_const_value_as_int(nir_const_value value, unsigned bit_size) 238 { 239 switch (bit_size) { 240 /* int1_t uses 0/-1 convention */ 241 case 1: return -(int)value.b; 242 case 8: return value.i8; 243 case 16: return value.i16; 244 case 32: return value.i32; 245 case 64: return value.i64; 246 default: 247 unreachable("Invalid bit size"); 248 } 249 } 250 251 static inline uint64_t nir_const_value_as_uint(nir_const_value value,unsigned bit_size)252 nir_const_value_as_uint(nir_const_value value, unsigned bit_size) 253 { 254 switch (bit_size) { 255 case 1: return value.b; 256 case 8: return value.u8; 257 case 16: return value.u16; 258 case 32: return value.u32; 259 case 64: return value.u64; 260 default: 261 unreachable("Invalid bit size"); 262 } 263 } 264 265 static inline bool nir_const_value_as_bool(nir_const_value value,unsigned bit_size)266 nir_const_value_as_bool(nir_const_value value, unsigned bit_size) 267 { 268 int64_t i = nir_const_value_as_int(value, bit_size); 269 270 /* Booleans of any size use 0/-1 convention */ 271 assert(i == 0 || i == -1); 272 273 return i; 274 } 275 276 /* This one isn't inline because it requires half-float conversion */ 277 double nir_const_value_as_float(nir_const_value value, unsigned bit_size); 278 279 typedef struct nir_constant { 280 /** 281 * Value of the constant. 282 * 283 * The field used to back the values supplied by the constant is determined 284 * by the type associated with the \c nir_variable. Constants may be 285 * scalars, vectors, or matrices. 286 */ 287 nir_const_value values[NIR_MAX_VEC_COMPONENTS]; 288 289 /* we could get this from the var->type but makes clone *much* easier to 290 * not have to care about the type. 291 */ 292 unsigned num_elements; 293 294 /* Array elements / Structure Fields */ 295 struct nir_constant **elements; 296 } nir_constant; 297 298 /** 299 * \brief Layout qualifiers for gl_FragDepth. 300 * 301 * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared 302 * with a layout qualifier. 303 */ 304 typedef enum { 305 nir_depth_layout_none, /**< No depth layout is specified. */ 306 nir_depth_layout_any, 307 nir_depth_layout_greater, 308 nir_depth_layout_less, 309 nir_depth_layout_unchanged 310 } nir_depth_layout; 311 312 /** 313 * Enum keeping track of how a variable was declared. 314 */ 315 typedef enum { 316 /** 317 * Normal declaration. 318 */ 319 nir_var_declared_normally = 0, 320 321 /** 322 * Variable is implicitly generated by the compiler and should not be 323 * visible via the API. 324 */ 325 nir_var_hidden, 326 } nir_var_declaration_type; 327 328 /** 329 * Either a uniform, global variable, shader input, or shader output. Based on 330 * ir_variable - it should be easy to translate between the two. 331 */ 332 333 typedef struct nir_variable { 334 struct exec_node node; 335 336 /** 337 * Declared type of the variable 338 */ 339 const struct glsl_type *type; 340 341 /** 342 * Declared name of the variable 343 */ 344 char *name; 345 346 struct nir_variable_data { 347 /** 348 * Storage class of the variable. 349 * 350 * \sa nir_variable_mode 351 */ 352 unsigned mode:14; 353 354 /** 355 * Is the variable read-only? 356 * 357 * This is set for variables declared as \c const, shader inputs, 358 * and uniforms. 359 */ 360 unsigned read_only:1; 361 unsigned centroid:1; 362 unsigned sample:1; 363 unsigned patch:1; 364 unsigned invariant:1; 365 366 /** 367 * Precision qualifier. 368 * 369 * In desktop GLSL we do not care about precision qualifiers at all, in 370 * fact, the spec says that precision qualifiers are ignored. 371 * 372 * To make things easy, we make it so that this field is always 373 * GLSL_PRECISION_NONE on desktop shaders. This way all the variables 374 * have the same precision value and the checks we add in the compiler 375 * for this field will never break a desktop shader compile. 376 */ 377 unsigned precision:2; 378 379 /** 380 * Can this variable be coalesced with another? 381 * 382 * This is set by nir_lower_io_to_temporaries to say that any 383 * copies involving this variable should stay put. Propagating it can 384 * duplicate the resulting load/store, which is not wanted, and may 385 * result in a load/store of the variable with an indirect offset which 386 * the backend may not be able to handle. 387 */ 388 unsigned cannot_coalesce:1; 389 390 /** 391 * When separate shader programs are enabled, only input/outputs between 392 * the stages of a multi-stage separate program can be safely removed 393 * from the shader interface. Other input/outputs must remains active. 394 * 395 * This is also used to make sure xfb varyings that are unused by the 396 * fragment shader are not removed. 397 */ 398 unsigned always_active_io:1; 399 400 /** 401 * Interpolation mode for shader inputs / outputs 402 * 403 * \sa glsl_interp_mode 404 */ 405 unsigned interpolation:3; 406 407 /** 408 * If non-zero, then this variable may be packed along with other variables 409 * into a single varying slot, so this offset should be applied when 410 * accessing components. For example, an offset of 1 means that the x 411 * component of this variable is actually stored in component y of the 412 * location specified by \c location. 413 */ 414 unsigned location_frac:2; 415 416 /** 417 * If true, this variable represents an array of scalars that should 418 * be tightly packed. In other words, consecutive array elements 419 * should be stored one component apart, rather than one slot apart. 420 */ 421 unsigned compact:1; 422 423 /** 424 * Whether this is a fragment shader output implicitly initialized with 425 * the previous contents of the specified render target at the 426 * framebuffer location corresponding to this shader invocation. 427 */ 428 unsigned fb_fetch_output:1; 429 430 /** 431 * Non-zero if this variable is considered bindless as defined by 432 * ARB_bindless_texture. 433 */ 434 unsigned bindless:1; 435 436 /** 437 * Was an explicit binding set in the shader? 438 */ 439 unsigned explicit_binding:1; 440 441 /** 442 * Was the location explicitly set in the shader? 443 * 444 * If the location is explicitly set in the shader, it \b cannot be changed 445 * by the linker or by the API (e.g., calls to \c glBindAttribLocation have 446 * no effect). 447 */ 448 unsigned explicit_location:1; 449 450 /** 451 * Was a transfer feedback buffer set in the shader? 452 */ 453 unsigned explicit_xfb_buffer:1; 454 455 /** 456 * Was a transfer feedback stride set in the shader? 457 */ 458 unsigned explicit_xfb_stride:1; 459 460 /** 461 * Was an explicit offset set in the shader? 462 */ 463 unsigned explicit_offset:1; 464 465 /** 466 * Layout of the matrix. Uses glsl_matrix_layout values. 467 */ 468 unsigned matrix_layout:2; 469 470 /** 471 * Non-zero if this variable was created by lowering a named interface 472 * block. 473 */ 474 unsigned from_named_ifc_block:1; 475 476 /** 477 * How the variable was declared. See nir_var_declaration_type. 478 * 479 * This is used to detect variables generated by the compiler, so should 480 * not be visible via the API. 481 */ 482 unsigned how_declared:2; 483 484 /** 485 * Is this variable per-view? If so, we know it must be an array with 486 * size corresponding to the number of views. 487 */ 488 unsigned per_view:1; 489 490 /** 491 * \brief Layout qualifier for gl_FragDepth. See nir_depth_layout. 492 * 493 * This is not equal to \c ir_depth_layout_none if and only if this 494 * variable is \c gl_FragDepth and a layout qualifier is specified. 495 */ 496 unsigned depth_layout:3; 497 498 /** 499 * Vertex stream output identifier. 500 * 501 * For packed outputs, NIR_STREAM_PACKED is set and bits [2*i+1,2*i] 502 * indicate the stream of the i-th component. 503 */ 504 unsigned stream:9; 505 506 /** 507 * See gl_access_qualifier. 508 * 509 * Access flags for memory variables (SSBO/global), image uniforms, and 510 * bindless images in uniforms/inputs/outputs. 511 */ 512 unsigned access:8; 513 514 /** 515 * Descriptor set binding for sampler or UBO. 516 */ 517 unsigned descriptor_set:5; 518 519 /** 520 * output index for dual source blending. 521 */ 522 unsigned index; 523 524 /** 525 * Initial binding point for a sampler or UBO. 526 * 527 * For array types, this represents the binding point for the first element. 528 */ 529 unsigned binding; 530 531 /** 532 * Storage location of the base of this variable 533 * 534 * The precise meaning of this field depends on the nature of the variable. 535 * 536 * - Vertex shader input: one of the values from \c gl_vert_attrib. 537 * - Vertex shader output: one of the values from \c gl_varying_slot. 538 * - Geometry shader input: one of the values from \c gl_varying_slot. 539 * - Geometry shader output: one of the values from \c gl_varying_slot. 540 * - Fragment shader input: one of the values from \c gl_varying_slot. 541 * - Fragment shader output: one of the values from \c gl_frag_result. 542 * - Uniforms: Per-stage uniform slot number for default uniform block. 543 * - Uniforms: Index within the uniform block definition for UBO members. 544 * - Non-UBO Uniforms: uniform slot number. 545 * - Other: This field is not currently used. 546 * 547 * If the variable is a uniform, shader input, or shader output, and the 548 * slot has not been assigned, the value will be -1. 549 */ 550 int location; 551 552 /** 553 * The actual location of the variable in the IR. Only valid for inputs, 554 * outputs, and uniforms (including samplers and images). 555 */ 556 unsigned driver_location; 557 558 /** 559 * Location an atomic counter or transform feedback is stored at. 560 */ 561 unsigned offset; 562 563 union { 564 struct { 565 /** Image internal format if specified explicitly, otherwise PIPE_FORMAT_NONE. */ 566 enum pipe_format format; 567 } image; 568 569 struct { 570 /** 571 * For OpenCL inline samplers. See cl_sampler_addressing_mode and cl_sampler_filter_mode 572 */ 573 unsigned is_inline_sampler : 1; 574 unsigned addressing_mode : 3; 575 unsigned normalized_coordinates : 1; 576 unsigned filter_mode : 1; 577 } sampler; 578 579 struct { 580 /** 581 * Transform feedback buffer. 582 */ 583 uint16_t buffer:2; 584 585 /** 586 * Transform feedback stride. 587 */ 588 uint16_t stride; 589 } xfb; 590 }; 591 } data; 592 593 /** 594 * Identifier for this variable generated by nir_index_vars() that is unique 595 * among other variables in the same exec_list. 596 */ 597 unsigned index; 598 599 /* Number of nir_variable_data members */ 600 uint16_t num_members; 601 602 /** 603 * Built-in state that backs this uniform 604 * 605 * Once set at variable creation, \c state_slots must remain invariant. 606 * This is because, ideally, this array would be shared by all clones of 607 * this variable in the IR tree. In other words, we'd really like for it 608 * to be a fly-weight. 609 * 610 * If the variable is not a uniform, \c num_state_slots will be zero and 611 * \c state_slots will be \c NULL. 612 */ 613 /*@{*/ 614 uint16_t num_state_slots; /**< Number of state slots used */ 615 nir_state_slot *state_slots; /**< State descriptors. */ 616 /*@}*/ 617 618 /** 619 * Constant expression assigned in the initializer of the variable 620 * 621 * This field should only be used temporarily by creators of NIR shaders 622 * and then lower_constant_initializers can be used to get rid of them. 623 * Most of the rest of NIR ignores this field or asserts that it's NULL. 624 */ 625 nir_constant *constant_initializer; 626 627 /** 628 * Global variable assigned in the initializer of the variable 629 * This field should only be used temporarily by creators of NIR shaders 630 * and then lower_constant_initializers can be used to get rid of them. 631 * Most of the rest of NIR ignores this field or asserts that it's NULL. 632 */ 633 struct nir_variable *pointer_initializer; 634 635 /** 636 * For variables that are in an interface block or are an instance of an 637 * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. 638 * 639 * \sa ir_variable::location 640 */ 641 const struct glsl_type *interface_type; 642 643 /** 644 * Description of per-member data for per-member struct variables 645 * 646 * This is used for variables which are actually an amalgamation of 647 * multiple entities such as a struct of built-in values or a struct of 648 * inputs each with their own layout specifier. This is only allowed on 649 * variables with a struct or array of array of struct type. 650 */ 651 struct nir_variable_data *members; 652 } nir_variable; 653 654 static inline bool _nir_shader_variable_has_mode(nir_variable * var,unsigned modes)655 _nir_shader_variable_has_mode(nir_variable *var, unsigned modes) 656 { 657 /* This isn't a shader variable */ 658 assert(!(modes & nir_var_function_temp)); 659 return var->data.mode & modes; 660 } 661 662 #define nir_foreach_variable_in_list(var, var_list) \ 663 foreach_list_typed(nir_variable, var, node, var_list) 664 665 #define nir_foreach_variable_in_list_safe(var, var_list) \ 666 foreach_list_typed_safe(nir_variable, var, node, var_list) 667 668 #define nir_foreach_variable_in_shader(var, shader) \ 669 nir_foreach_variable_in_list(var, &(shader)->variables) 670 671 #define nir_foreach_variable_in_shader_safe(var, shader) \ 672 nir_foreach_variable_in_list_safe(var, &(shader)->variables) 673 674 #define nir_foreach_variable_with_modes(var, shader, modes) \ 675 nir_foreach_variable_in_shader(var, shader) \ 676 if (_nir_shader_variable_has_mode(var, modes)) 677 678 #define nir_foreach_variable_with_modes_safe(var, shader, modes) \ 679 nir_foreach_variable_in_shader_safe(var, shader) \ 680 if (_nir_shader_variable_has_mode(var, modes)) 681 682 #define nir_foreach_shader_in_variable(var, shader) \ 683 nir_foreach_variable_with_modes(var, shader, nir_var_shader_in) 684 685 #define nir_foreach_shader_in_variable_safe(var, shader) \ 686 nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_in) 687 688 #define nir_foreach_shader_out_variable(var, shader) \ 689 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) 690 691 #define nir_foreach_shader_out_variable_safe(var, shader) \ 692 nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_out) 693 694 #define nir_foreach_uniform_variable(var, shader) \ 695 nir_foreach_variable_with_modes(var, shader, nir_var_uniform) 696 697 #define nir_foreach_uniform_variable_safe(var, shader) \ 698 nir_foreach_variable_with_modes_safe(var, shader, nir_var_uniform) 699 700 static inline bool nir_variable_is_global(const nir_variable * var)701 nir_variable_is_global(const nir_variable *var) 702 { 703 return var->data.mode != nir_var_function_temp; 704 } 705 706 typedef struct nir_register { 707 struct exec_node node; 708 709 unsigned num_components; /** < number of vector components */ 710 unsigned num_array_elems; /** < size of array (0 for no array) */ 711 712 /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ 713 uint8_t bit_size; 714 715 /** generic register index. */ 716 unsigned index; 717 718 /** only for debug purposes, can be NULL */ 719 const char *name; 720 721 /** set of nir_srcs where this register is used (read from) */ 722 struct list_head uses; 723 724 /** set of nir_dests where this register is defined (written to) */ 725 struct list_head defs; 726 727 /** set of nir_ifs where this register is used as a condition */ 728 struct list_head if_uses; 729 } nir_register; 730 731 #define nir_foreach_register(reg, reg_list) \ 732 foreach_list_typed(nir_register, reg, node, reg_list) 733 #define nir_foreach_register_safe(reg, reg_list) \ 734 foreach_list_typed_safe(nir_register, reg, node, reg_list) 735 736 typedef enum PACKED { 737 nir_instr_type_alu, 738 nir_instr_type_deref, 739 nir_instr_type_call, 740 nir_instr_type_tex, 741 nir_instr_type_intrinsic, 742 nir_instr_type_load_const, 743 nir_instr_type_jump, 744 nir_instr_type_ssa_undef, 745 nir_instr_type_phi, 746 nir_instr_type_parallel_copy, 747 } nir_instr_type; 748 749 typedef struct nir_instr { 750 struct exec_node node; 751 struct nir_block *block; 752 nir_instr_type type; 753 754 /* A temporary for optimization and analysis passes to use for storing 755 * flags. For instance, DCE uses this to store the "dead/live" info. 756 */ 757 uint8_t pass_flags; 758 759 /** generic instruction index. */ 760 uint32_t index; 761 } nir_instr; 762 763 static inline nir_instr * nir_instr_next(nir_instr * instr)764 nir_instr_next(nir_instr *instr) 765 { 766 struct exec_node *next = exec_node_get_next(&instr->node); 767 if (exec_node_is_tail_sentinel(next)) 768 return NULL; 769 else 770 return exec_node_data(nir_instr, next, node); 771 } 772 773 static inline nir_instr * nir_instr_prev(nir_instr * instr)774 nir_instr_prev(nir_instr *instr) 775 { 776 struct exec_node *prev = exec_node_get_prev(&instr->node); 777 if (exec_node_is_head_sentinel(prev)) 778 return NULL; 779 else 780 return exec_node_data(nir_instr, prev, node); 781 } 782 783 static inline bool nir_instr_is_first(const nir_instr * instr)784 nir_instr_is_first(const nir_instr *instr) 785 { 786 return exec_node_is_head_sentinel(exec_node_get_prev_const(&instr->node)); 787 } 788 789 static inline bool nir_instr_is_last(const nir_instr * instr)790 nir_instr_is_last(const nir_instr *instr) 791 { 792 return exec_node_is_tail_sentinel(exec_node_get_next_const(&instr->node)); 793 } 794 795 typedef struct nir_ssa_def { 796 /** for debugging only, can be NULL */ 797 const char* name; 798 799 /** Instruction which produces this SSA value. */ 800 nir_instr *parent_instr; 801 802 /** set of nir_instrs where this register is used (read from) */ 803 struct list_head uses; 804 805 /** set of nir_ifs where this register is used as a condition */ 806 struct list_head if_uses; 807 808 /** generic SSA definition index. */ 809 unsigned index; 810 811 uint8_t num_components; 812 813 /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ 814 uint8_t bit_size; 815 816 /** 817 * True if this SSA value may have different values in different SIMD 818 * invocations of the shader. This is set by nir_divergence_analysis. 819 */ 820 bool divergent; 821 } nir_ssa_def; 822 823 struct nir_src; 824 825 typedef struct { 826 nir_register *reg; 827 struct nir_src *indirect; /** < NULL for no indirect offset */ 828 unsigned base_offset; 829 830 /* TODO use-def chain goes here */ 831 } nir_reg_src; 832 833 typedef struct { 834 nir_instr *parent_instr; 835 struct list_head def_link; 836 837 nir_register *reg; 838 struct nir_src *indirect; /** < NULL for no indirect offset */ 839 unsigned base_offset; 840 841 /* TODO def-use chain goes here */ 842 } nir_reg_dest; 843 844 struct nir_if; 845 846 typedef struct nir_src { 847 union { 848 /** Instruction that consumes this value as a source. */ 849 nir_instr *parent_instr; 850 struct nir_if *parent_if; 851 }; 852 853 struct list_head use_link; 854 855 union { 856 nir_reg_src reg; 857 nir_ssa_def *ssa; 858 }; 859 860 bool is_ssa; 861 } nir_src; 862 863 static inline nir_src nir_src_init(void)864 nir_src_init(void) 865 { 866 nir_src src = { { NULL } }; 867 return src; 868 } 869 870 #define NIR_SRC_INIT nir_src_init() 871 872 #define nir_foreach_use(src, reg_or_ssa_def) \ 873 list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link) 874 875 #define nir_foreach_use_safe(src, reg_or_ssa_def) \ 876 list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link) 877 878 #define nir_foreach_if_use(src, reg_or_ssa_def) \ 879 list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) 880 881 #define nir_foreach_if_use_safe(src, reg_or_ssa_def) \ 882 list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) 883 884 typedef struct { 885 union { 886 nir_reg_dest reg; 887 nir_ssa_def ssa; 888 }; 889 890 bool is_ssa; 891 } nir_dest; 892 893 static inline nir_dest nir_dest_init(void)894 nir_dest_init(void) 895 { 896 nir_dest dest = { { { NULL } } }; 897 return dest; 898 } 899 900 #define NIR_DEST_INIT nir_dest_init() 901 902 #define nir_foreach_def(dest, reg) \ 903 list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link) 904 905 #define nir_foreach_def_safe(dest, reg) \ 906 list_for_each_entry_safe(nir_dest, dest, &(reg)->defs, reg.def_link) 907 908 static inline nir_src nir_src_for_ssa(nir_ssa_def * def)909 nir_src_for_ssa(nir_ssa_def *def) 910 { 911 nir_src src = NIR_SRC_INIT; 912 913 src.is_ssa = true; 914 src.ssa = def; 915 916 return src; 917 } 918 919 static inline nir_src nir_src_for_reg(nir_register * reg)920 nir_src_for_reg(nir_register *reg) 921 { 922 nir_src src = NIR_SRC_INIT; 923 924 src.is_ssa = false; 925 src.reg.reg = reg; 926 src.reg.indirect = NULL; 927 src.reg.base_offset = 0; 928 929 return src; 930 } 931 932 static inline nir_dest nir_dest_for_reg(nir_register * reg)933 nir_dest_for_reg(nir_register *reg) 934 { 935 nir_dest dest = NIR_DEST_INIT; 936 937 dest.reg.reg = reg; 938 939 return dest; 940 } 941 942 static inline unsigned nir_src_bit_size(nir_src src)943 nir_src_bit_size(nir_src src) 944 { 945 return src.is_ssa ? src.ssa->bit_size : src.reg.reg->bit_size; 946 } 947 948 static inline unsigned nir_src_num_components(nir_src src)949 nir_src_num_components(nir_src src) 950 { 951 return src.is_ssa ? src.ssa->num_components : src.reg.reg->num_components; 952 } 953 954 static inline bool nir_src_is_const(nir_src src)955 nir_src_is_const(nir_src src) 956 { 957 return src.is_ssa && 958 src.ssa->parent_instr->type == nir_instr_type_load_const; 959 } 960 961 static inline bool nir_src_is_divergent(nir_src src)962 nir_src_is_divergent(nir_src src) 963 { 964 assert(src.is_ssa); 965 return src.ssa->divergent; 966 } 967 968 static inline unsigned nir_dest_bit_size(nir_dest dest)969 nir_dest_bit_size(nir_dest dest) 970 { 971 return dest.is_ssa ? dest.ssa.bit_size : dest.reg.reg->bit_size; 972 } 973 974 static inline unsigned nir_dest_num_components(nir_dest dest)975 nir_dest_num_components(nir_dest dest) 976 { 977 return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components; 978 } 979 980 static inline bool nir_dest_is_divergent(nir_dest dest)981 nir_dest_is_divergent(nir_dest dest) 982 { 983 assert(dest.is_ssa); 984 return dest.ssa.divergent; 985 } 986 987 /* Are all components the same, ie. .xxxx */ 988 static inline bool nir_is_same_comp_swizzle(uint8_t * swiz,unsigned nr_comp)989 nir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp) 990 { 991 for (unsigned i = 1; i < nr_comp; i++) 992 if (swiz[i] != swiz[0]) 993 return false; 994 return true; 995 } 996 997 /* Are all components sequential, ie. .yzw */ 998 static inline bool nir_is_sequential_comp_swizzle(uint8_t * swiz,unsigned nr_comp)999 nir_is_sequential_comp_swizzle(uint8_t *swiz, unsigned nr_comp) 1000 { 1001 for (unsigned i = 1; i < nr_comp; i++) 1002 if (swiz[i] != (swiz[0] + i)) 1003 return false; 1004 return true; 1005 } 1006 1007 void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if); 1008 void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr); 1009 1010 typedef struct { 1011 nir_src src; 1012 1013 /** 1014 * \name input modifiers 1015 */ 1016 /*@{*/ 1017 /** 1018 * For inputs interpreted as floating point, flips the sign bit. For 1019 * inputs interpreted as integers, performs the two's complement negation. 1020 */ 1021 bool negate; 1022 1023 /** 1024 * Clears the sign bit for floating point values, and computes the integer 1025 * absolute value for integers. Note that the negate modifier acts after 1026 * the absolute value modifier, therefore if both are set then all inputs 1027 * will become negative. 1028 */ 1029 bool abs; 1030 /*@}*/ 1031 1032 /** 1033 * For each input component, says which component of the register it is 1034 * chosen from. Note that which elements of the swizzle are used and which 1035 * are ignored are based on the write mask for most opcodes - for example, 1036 * a statement like "foo.xzw = bar.zyx" would have a writemask of 1101b and 1037 * a swizzle of {2, x, 1, 0} where x means "don't care." 1038 */ 1039 uint8_t swizzle[NIR_MAX_VEC_COMPONENTS]; 1040 } nir_alu_src; 1041 1042 typedef struct { 1043 nir_dest dest; 1044 1045 /** 1046 * \name saturate output modifier 1047 * 1048 * Only valid for opcodes that output floating-point numbers. Clamps the 1049 * output to between 0.0 and 1.0 inclusive. 1050 */ 1051 1052 bool saturate; 1053 1054 unsigned write_mask : NIR_MAX_VEC_COMPONENTS; /* ignored if dest.is_ssa is true */ 1055 } nir_alu_dest; 1056 1057 /** NIR sized and unsized types 1058 * 1059 * The values in this enum are carefully chosen so that the sized type is 1060 * just the unsized type OR the number of bits. 1061 */ 1062 typedef enum PACKED { 1063 nir_type_invalid = 0, /* Not a valid type */ 1064 nir_type_int = 2, 1065 nir_type_uint = 4, 1066 nir_type_bool = 6, 1067 nir_type_float = 128, 1068 nir_type_bool1 = 1 | nir_type_bool, 1069 nir_type_bool8 = 8 | nir_type_bool, 1070 nir_type_bool16 = 16 | nir_type_bool, 1071 nir_type_bool32 = 32 | nir_type_bool, 1072 nir_type_int1 = 1 | nir_type_int, 1073 nir_type_int8 = 8 | nir_type_int, 1074 nir_type_int16 = 16 | nir_type_int, 1075 nir_type_int32 = 32 | nir_type_int, 1076 nir_type_int64 = 64 | nir_type_int, 1077 nir_type_uint1 = 1 | nir_type_uint, 1078 nir_type_uint8 = 8 | nir_type_uint, 1079 nir_type_uint16 = 16 | nir_type_uint, 1080 nir_type_uint32 = 32 | nir_type_uint, 1081 nir_type_uint64 = 64 | nir_type_uint, 1082 nir_type_float16 = 16 | nir_type_float, 1083 nir_type_float32 = 32 | nir_type_float, 1084 nir_type_float64 = 64 | nir_type_float, 1085 } nir_alu_type; 1086 1087 #define NIR_ALU_TYPE_SIZE_MASK 0x79 1088 #define NIR_ALU_TYPE_BASE_TYPE_MASK 0x86 1089 1090 static inline unsigned nir_alu_type_get_type_size(nir_alu_type type)1091 nir_alu_type_get_type_size(nir_alu_type type) 1092 { 1093 return type & NIR_ALU_TYPE_SIZE_MASK; 1094 } 1095 1096 static inline nir_alu_type nir_alu_type_get_base_type(nir_alu_type type)1097 nir_alu_type_get_base_type(nir_alu_type type) 1098 { 1099 return (nir_alu_type)(type & NIR_ALU_TYPE_BASE_TYPE_MASK); 1100 } 1101 1102 static inline nir_alu_type nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)1103 nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type) 1104 { 1105 switch (base_type) { 1106 case GLSL_TYPE_BOOL: 1107 return nir_type_bool1; 1108 break; 1109 case GLSL_TYPE_UINT: 1110 return nir_type_uint32; 1111 break; 1112 case GLSL_TYPE_INT: 1113 return nir_type_int32; 1114 break; 1115 case GLSL_TYPE_UINT16: 1116 return nir_type_uint16; 1117 break; 1118 case GLSL_TYPE_INT16: 1119 return nir_type_int16; 1120 break; 1121 case GLSL_TYPE_UINT8: 1122 return nir_type_uint8; 1123 case GLSL_TYPE_INT8: 1124 return nir_type_int8; 1125 case GLSL_TYPE_UINT64: 1126 return nir_type_uint64; 1127 break; 1128 case GLSL_TYPE_INT64: 1129 return nir_type_int64; 1130 break; 1131 case GLSL_TYPE_FLOAT: 1132 return nir_type_float32; 1133 break; 1134 case GLSL_TYPE_FLOAT16: 1135 return nir_type_float16; 1136 break; 1137 case GLSL_TYPE_DOUBLE: 1138 return nir_type_float64; 1139 break; 1140 1141 case GLSL_TYPE_SAMPLER: 1142 case GLSL_TYPE_IMAGE: 1143 case GLSL_TYPE_ATOMIC_UINT: 1144 case GLSL_TYPE_STRUCT: 1145 case GLSL_TYPE_INTERFACE: 1146 case GLSL_TYPE_ARRAY: 1147 case GLSL_TYPE_VOID: 1148 case GLSL_TYPE_SUBROUTINE: 1149 case GLSL_TYPE_FUNCTION: 1150 case GLSL_TYPE_ERROR: 1151 return nir_type_invalid; 1152 } 1153 1154 unreachable("unknown type"); 1155 } 1156 1157 static inline nir_alu_type nir_get_nir_type_for_glsl_type(const struct glsl_type * type)1158 nir_get_nir_type_for_glsl_type(const struct glsl_type *type) 1159 { 1160 return nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(type)); 1161 } 1162 1163 nir_op nir_type_conversion_op(nir_alu_type src, nir_alu_type dst, 1164 nir_rounding_mode rnd); 1165 1166 static inline nir_op nir_op_vec(unsigned components)1167 nir_op_vec(unsigned components) 1168 { 1169 switch (components) { 1170 case 1: return nir_op_mov; 1171 case 2: return nir_op_vec2; 1172 case 3: return nir_op_vec3; 1173 case 4: return nir_op_vec4; 1174 case 8: return nir_op_vec8; 1175 case 16: return nir_op_vec16; 1176 default: unreachable("bad component count"); 1177 } 1178 } 1179 1180 static inline bool nir_op_is_vec(nir_op op)1181 nir_op_is_vec(nir_op op) 1182 { 1183 switch (op) { 1184 case nir_op_mov: 1185 case nir_op_vec2: 1186 case nir_op_vec3: 1187 case nir_op_vec4: 1188 case nir_op_vec8: 1189 case nir_op_vec16: 1190 return true; 1191 default: 1192 return false; 1193 } 1194 } 1195 1196 static inline bool nir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode,unsigned bit_size)1197 nir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode, unsigned bit_size) 1198 { 1199 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) || 1200 (32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32) || 1201 (64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64); 1202 } 1203 1204 static inline bool nir_is_denorm_flush_to_zero(unsigned execution_mode,unsigned bit_size)1205 nir_is_denorm_flush_to_zero(unsigned execution_mode, unsigned bit_size) 1206 { 1207 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16) || 1208 (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) || 1209 (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64); 1210 } 1211 1212 static inline bool nir_is_denorm_preserve(unsigned execution_mode,unsigned bit_size)1213 nir_is_denorm_preserve(unsigned execution_mode, unsigned bit_size) 1214 { 1215 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP16) || 1216 (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32) || 1217 (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP64); 1218 } 1219 1220 static inline bool nir_is_rounding_mode_rtne(unsigned execution_mode,unsigned bit_size)1221 nir_is_rounding_mode_rtne(unsigned execution_mode, unsigned bit_size) 1222 { 1223 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) || 1224 (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) || 1225 (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64); 1226 } 1227 1228 static inline bool nir_is_rounding_mode_rtz(unsigned execution_mode,unsigned bit_size)1229 nir_is_rounding_mode_rtz(unsigned execution_mode, unsigned bit_size) 1230 { 1231 return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) || 1232 (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) || 1233 (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64); 1234 } 1235 1236 static inline bool nir_has_any_rounding_mode_rtz(unsigned execution_mode)1237 nir_has_any_rounding_mode_rtz(unsigned execution_mode) 1238 { 1239 return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) || 1240 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) || 1241 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64); 1242 } 1243 1244 static inline bool nir_has_any_rounding_mode_rtne(unsigned execution_mode)1245 nir_has_any_rounding_mode_rtne(unsigned execution_mode) 1246 { 1247 return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) || 1248 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) || 1249 (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64); 1250 } 1251 1252 static inline nir_rounding_mode nir_get_rounding_mode_from_float_controls(unsigned execution_mode,nir_alu_type type)1253 nir_get_rounding_mode_from_float_controls(unsigned execution_mode, 1254 nir_alu_type type) 1255 { 1256 if (nir_alu_type_get_base_type(type) != nir_type_float) 1257 return nir_rounding_mode_undef; 1258 1259 unsigned bit_size = nir_alu_type_get_type_size(type); 1260 1261 if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) 1262 return nir_rounding_mode_rtz; 1263 if (nir_is_rounding_mode_rtne(execution_mode, bit_size)) 1264 return nir_rounding_mode_rtne; 1265 return nir_rounding_mode_undef; 1266 } 1267 1268 static inline bool nir_has_any_rounding_mode_enabled(unsigned execution_mode)1269 nir_has_any_rounding_mode_enabled(unsigned execution_mode) 1270 { 1271 bool result = 1272 nir_has_any_rounding_mode_rtne(execution_mode) || 1273 nir_has_any_rounding_mode_rtz(execution_mode); 1274 return result; 1275 } 1276 1277 typedef enum { 1278 /** 1279 * Operation where the first two sources are commutative. 1280 * 1281 * For 2-source operations, this just mathematical commutativity. Some 1282 * 3-source operations, like ffma, are only commutative in the first two 1283 * sources. 1284 */ 1285 NIR_OP_IS_2SRC_COMMUTATIVE = (1 << 0), 1286 NIR_OP_IS_ASSOCIATIVE = (1 << 1), 1287 } nir_op_algebraic_property; 1288 1289 typedef struct { 1290 const char *name; 1291 1292 uint8_t num_inputs; 1293 1294 /** 1295 * The number of components in the output 1296 * 1297 * If non-zero, this is the size of the output and input sizes are 1298 * explicitly given; swizzle and writemask are still in effect, but if 1299 * the output component is masked out, then the input component may 1300 * still be in use. 1301 * 1302 * If zero, the opcode acts in the standard, per-component manner; the 1303 * operation is performed on each component (except the ones that are 1304 * masked out) with the input being taken from the input swizzle for 1305 * that component. 1306 * 1307 * The size of some of the inputs may be given (i.e. non-zero) even 1308 * though output_size is zero; in that case, the inputs with a zero 1309 * size act per-component, while the inputs with non-zero size don't. 1310 */ 1311 uint8_t output_size; 1312 1313 /** 1314 * The type of vector that the instruction outputs. Note that the 1315 * staurate modifier is only allowed on outputs with the float type. 1316 */ 1317 1318 nir_alu_type output_type; 1319 1320 /** 1321 * The number of components in each input 1322 */ 1323 uint8_t input_sizes[NIR_MAX_VEC_COMPONENTS]; 1324 1325 /** 1326 * The type of vector that each input takes. Note that negate and 1327 * absolute value are only allowed on inputs with int or float type and 1328 * behave differently on the two. 1329 */ 1330 nir_alu_type input_types[NIR_MAX_VEC_COMPONENTS]; 1331 1332 nir_op_algebraic_property algebraic_properties; 1333 1334 /* Whether this represents a numeric conversion opcode */ 1335 bool is_conversion; 1336 } nir_op_info; 1337 1338 extern const nir_op_info nir_op_infos[nir_num_opcodes]; 1339 1340 typedef struct nir_alu_instr { 1341 nir_instr instr; 1342 nir_op op; 1343 1344 /** Indicates that this ALU instruction generates an exact value 1345 * 1346 * This is kind of a mixture of GLSL "precise" and "invariant" and not 1347 * really equivalent to either. This indicates that the value generated by 1348 * this operation is high-precision and any code transformations that touch 1349 * it must ensure that the resulting value is bit-for-bit identical to the 1350 * original. 1351 */ 1352 bool exact:1; 1353 1354 /** 1355 * Indicates that this instruction do not cause wrapping to occur, in the 1356 * form of overflow or underflow. 1357 */ 1358 bool no_signed_wrap:1; 1359 bool no_unsigned_wrap:1; 1360 1361 nir_alu_dest dest; 1362 nir_alu_src src[]; 1363 } nir_alu_instr; 1364 1365 void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, 1366 nir_alu_instr *instr); 1367 void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, 1368 nir_alu_instr *instr); 1369 1370 /* is this source channel used? */ 1371 static inline bool nir_alu_instr_channel_used(const nir_alu_instr * instr,unsigned src,unsigned channel)1372 nir_alu_instr_channel_used(const nir_alu_instr *instr, unsigned src, 1373 unsigned channel) 1374 { 1375 if (nir_op_infos[instr->op].input_sizes[src] > 0) 1376 return channel < nir_op_infos[instr->op].input_sizes[src]; 1377 1378 return (instr->dest.write_mask >> channel) & 1; 1379 } 1380 1381 static inline nir_component_mask_t nir_alu_instr_src_read_mask(const nir_alu_instr * instr,unsigned src)1382 nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src) 1383 { 1384 nir_component_mask_t read_mask = 0; 1385 for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) { 1386 if (!nir_alu_instr_channel_used(instr, src, c)) 1387 continue; 1388 1389 read_mask |= (1 << instr->src[src].swizzle[c]); 1390 } 1391 return read_mask; 1392 } 1393 1394 /** 1395 * Get the number of channels used for a source 1396 */ 1397 static inline unsigned nir_ssa_alu_instr_src_components(const nir_alu_instr * instr,unsigned src)1398 nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src) 1399 { 1400 if (nir_op_infos[instr->op].input_sizes[src] > 0) 1401 return nir_op_infos[instr->op].input_sizes[src]; 1402 1403 return nir_dest_num_components(instr->dest.dest); 1404 } 1405 1406 static inline bool nir_alu_instr_is_comparison(const nir_alu_instr * instr)1407 nir_alu_instr_is_comparison(const nir_alu_instr *instr) 1408 { 1409 switch (instr->op) { 1410 case nir_op_flt: 1411 case nir_op_fge: 1412 case nir_op_feq: 1413 case nir_op_fneu: 1414 case nir_op_ilt: 1415 case nir_op_ult: 1416 case nir_op_ige: 1417 case nir_op_uge: 1418 case nir_op_ieq: 1419 case nir_op_ine: 1420 case nir_op_i2b1: 1421 case nir_op_f2b1: 1422 case nir_op_inot: 1423 return true; 1424 default: 1425 return false; 1426 } 1427 } 1428 1429 bool nir_const_value_negative_equal(nir_const_value c1, nir_const_value c2, 1430 nir_alu_type full_type); 1431 1432 bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, 1433 unsigned src1, unsigned src2); 1434 1435 bool nir_alu_srcs_negative_equal(const nir_alu_instr *alu1, 1436 const nir_alu_instr *alu2, 1437 unsigned src1, unsigned src2); 1438 1439 bool nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn); 1440 1441 typedef enum { 1442 nir_deref_type_var, 1443 nir_deref_type_array, 1444 nir_deref_type_array_wildcard, 1445 nir_deref_type_ptr_as_array, 1446 nir_deref_type_struct, 1447 nir_deref_type_cast, 1448 } nir_deref_type; 1449 1450 typedef struct { 1451 nir_instr instr; 1452 1453 /** The type of this deref instruction */ 1454 nir_deref_type deref_type; 1455 1456 /** Bitmask what modes the underlying variable might be 1457 * 1458 * For OpenCL-style generic pointers, we may not know exactly what mode it 1459 * is at any given point in time in the compile process. This bitfield 1460 * contains the set of modes which it MAY be. 1461 * 1462 * Generally, this field should not be accessed directly. Use one of the 1463 * nir_deref_mode_ helpers instead. 1464 */ 1465 nir_variable_mode modes; 1466 1467 /** The dereferenced type of the resulting pointer value */ 1468 const struct glsl_type *type; 1469 1470 union { 1471 /** Variable being dereferenced if deref_type is a deref_var */ 1472 nir_variable *var; 1473 1474 /** Parent deref if deref_type is not deref_var */ 1475 nir_src parent; 1476 }; 1477 1478 /** Additional deref parameters */ 1479 union { 1480 struct { 1481 nir_src index; 1482 } arr; 1483 1484 struct { 1485 unsigned index; 1486 } strct; 1487 1488 struct { 1489 unsigned ptr_stride; 1490 unsigned align_mul; 1491 unsigned align_offset; 1492 } cast; 1493 }; 1494 1495 /** Destination to store the resulting "pointer" */ 1496 nir_dest dest; 1497 } nir_deref_instr; 1498 1499 /** Returns true if deref might have one of the given modes 1500 * 1501 * For multi-mode derefs, this returns true if any of the possible modes for 1502 * the deref to have any of the specified modes. This function returning true 1503 * does NOT mean that the deref definitely has one of those modes. It simply 1504 * means that, with the best information we have at the time, it might. 1505 */ 1506 static inline bool nir_deref_mode_may_be(const nir_deref_instr * deref,nir_variable_mode modes)1507 nir_deref_mode_may_be(const nir_deref_instr *deref, nir_variable_mode modes) 1508 { 1509 assert(!(modes & ~nir_var_all)); 1510 assert(deref->modes != 0); 1511 return deref->modes & modes; 1512 } 1513 1514 /** Returns true if deref must have one of the given modes 1515 * 1516 * For multi-mode derefs, this returns true if NIR can prove that the given 1517 * deref has one of the specified modes. This function returning false does 1518 * NOT mean that deref doesn't have one of the given mode. It very well may 1519 * have one of those modes, we just don't have enough information to prove 1520 * that it does for sure. 1521 */ 1522 static inline bool nir_deref_mode_must_be(const nir_deref_instr * deref,nir_variable_mode modes)1523 nir_deref_mode_must_be(const nir_deref_instr *deref, nir_variable_mode modes) 1524 { 1525 assert(!(modes & ~nir_var_all)); 1526 assert(deref->modes != 0); 1527 return !(deref->modes & ~modes); 1528 } 1529 1530 /** Returns true if deref has the given mode 1531 * 1532 * This returns true if the deref has exactly the mode specified. If the 1533 * deref may have that mode but may also have a different mode (i.e. modes has 1534 * multiple bits set), this will assert-fail. 1535 * 1536 * If you're confused about which nir_deref_mode_ helper to use, use this one 1537 * or nir_deref_mode_is_one_of below. 1538 */ 1539 static inline bool nir_deref_mode_is(const nir_deref_instr * deref,nir_variable_mode mode)1540 nir_deref_mode_is(const nir_deref_instr *deref, nir_variable_mode mode) 1541 { 1542 assert(util_bitcount(mode) == 1 && (mode & nir_var_all)); 1543 assert(deref->modes != 0); 1544 1545 /* This is only for "simple" cases so, if modes might interact with this 1546 * deref then the deref has to have a single mode. 1547 */ 1548 if (nir_deref_mode_may_be(deref, mode)) { 1549 assert(util_bitcount(deref->modes) == 1); 1550 assert(deref->modes == mode); 1551 } 1552 1553 return deref->modes == mode; 1554 } 1555 1556 /** Returns true if deref has one of the given modes 1557 * 1558 * This returns true if the deref has exactly one possible mode and that mode 1559 * is one of the modes specified. If the deref may have one of those modes 1560 * but may also have a different mode (i.e. modes has multiple bits set), this 1561 * will assert-fail. 1562 */ 1563 static inline bool nir_deref_mode_is_one_of(const nir_deref_instr * deref,nir_variable_mode modes)1564 nir_deref_mode_is_one_of(const nir_deref_instr *deref, nir_variable_mode modes) 1565 { 1566 /* This is only for "simple" cases so, if modes might interact with this 1567 * deref then the deref has to have a single mode. 1568 */ 1569 if (nir_deref_mode_may_be(deref, modes)) { 1570 assert(util_bitcount(deref->modes) == 1); 1571 assert(nir_deref_mode_must_be(deref, modes)); 1572 } 1573 1574 return nir_deref_mode_may_be(deref, modes); 1575 } 1576 1577 /** Returns true if deref's possible modes lie in the given set of modes 1578 * 1579 * This returns true if the deref's modes lie in the given set of modes. If 1580 * the deref's modes overlap with the specified modes but aren't entirely 1581 * contained in the specified set of modes, this will assert-fail. In 1582 * particular, if this is used in a generic pointers scenario, the specified 1583 * modes has to contain all or none of the possible generic pointer modes. 1584 * 1585 * This is intended mostly for mass-lowering of derefs which might have 1586 * generic pointers. 1587 */ 1588 static inline bool nir_deref_mode_is_in_set(const nir_deref_instr * deref,nir_variable_mode modes)1589 nir_deref_mode_is_in_set(const nir_deref_instr *deref, nir_variable_mode modes) 1590 { 1591 if (nir_deref_mode_may_be(deref, modes)) 1592 assert(nir_deref_mode_must_be(deref, modes)); 1593 1594 return nir_deref_mode_may_be(deref, modes); 1595 } 1596 1597 static inline nir_deref_instr *nir_src_as_deref(nir_src src); 1598 1599 static inline nir_deref_instr * nir_deref_instr_parent(const nir_deref_instr * instr)1600 nir_deref_instr_parent(const nir_deref_instr *instr) 1601 { 1602 if (instr->deref_type == nir_deref_type_var) 1603 return NULL; 1604 else 1605 return nir_src_as_deref(instr->parent); 1606 } 1607 1608 static inline nir_variable * nir_deref_instr_get_variable(const nir_deref_instr * instr)1609 nir_deref_instr_get_variable(const nir_deref_instr *instr) 1610 { 1611 while (instr->deref_type != nir_deref_type_var) { 1612 if (instr->deref_type == nir_deref_type_cast) 1613 return NULL; 1614 1615 instr = nir_deref_instr_parent(instr); 1616 } 1617 1618 return instr->var; 1619 } 1620 1621 bool nir_deref_instr_has_indirect(nir_deref_instr *instr); 1622 bool nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr); 1623 bool nir_deref_instr_has_complex_use(nir_deref_instr *instr); 1624 1625 bool nir_deref_instr_remove_if_unused(nir_deref_instr *instr); 1626 1627 unsigned nir_deref_instr_array_stride(nir_deref_instr *instr); 1628 1629 typedef struct { 1630 nir_instr instr; 1631 1632 struct nir_function *callee; 1633 1634 unsigned num_params; 1635 nir_src params[]; 1636 } nir_call_instr; 1637 1638 #include "nir_intrinsics.h" 1639 1640 #define NIR_INTRINSIC_MAX_CONST_INDEX 5 1641 1642 /** Represents an intrinsic 1643 * 1644 * An intrinsic is an instruction type for handling things that are 1645 * more-or-less regular operations but don't just consume and produce SSA 1646 * values like ALU operations do. Intrinsics are not for things that have 1647 * special semantic meaning such as phi nodes and parallel copies. 1648 * Examples of intrinsics include variable load/store operations, system 1649 * value loads, and the like. Even though texturing more-or-less falls 1650 * under this category, texturing is its own instruction type because 1651 * trying to represent texturing with intrinsics would lead to a 1652 * combinatorial explosion of intrinsic opcodes. 1653 * 1654 * By having a single instruction type for handling a lot of different 1655 * cases, optimization passes can look for intrinsics and, for the most 1656 * part, completely ignore them. Each intrinsic type also has a few 1657 * possible flags that govern whether or not they can be reordered or 1658 * eliminated. That way passes like dead code elimination can still work 1659 * on intrisics without understanding the meaning of each. 1660 * 1661 * Each intrinsic has some number of constant indices, some number of 1662 * variables, and some number of sources. What these sources, variables, 1663 * and indices mean depends on the intrinsic and is documented with the 1664 * intrinsic declaration in nir_intrinsics.h. Intrinsics and texture 1665 * instructions are the only types of instruction that can operate on 1666 * variables. 1667 */ 1668 typedef struct { 1669 nir_instr instr; 1670 1671 nir_intrinsic_op intrinsic; 1672 1673 nir_dest dest; 1674 1675 /** number of components if this is a vectorized intrinsic 1676 * 1677 * Similarly to ALU operations, some intrinsics are vectorized. 1678 * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0. 1679 * For vectorized intrinsics, the num_components field specifies the 1680 * number of destination components and the number of source components 1681 * for all sources with nir_intrinsic_infos.src_components[i] == 0. 1682 */ 1683 uint8_t num_components; 1684 1685 int const_index[NIR_INTRINSIC_MAX_CONST_INDEX]; 1686 1687 nir_src src[]; 1688 } nir_intrinsic_instr; 1689 1690 static inline nir_variable * nir_intrinsic_get_var(nir_intrinsic_instr * intrin,unsigned i)1691 nir_intrinsic_get_var(nir_intrinsic_instr *intrin, unsigned i) 1692 { 1693 return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i])); 1694 } 1695 1696 typedef enum { 1697 /* Memory ordering. */ 1698 NIR_MEMORY_ACQUIRE = 1 << 0, 1699 NIR_MEMORY_RELEASE = 1 << 1, 1700 NIR_MEMORY_ACQ_REL = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE, 1701 1702 /* Memory visibility operations. */ 1703 NIR_MEMORY_MAKE_AVAILABLE = 1 << 2, 1704 NIR_MEMORY_MAKE_VISIBLE = 1 << 3, 1705 } nir_memory_semantics; 1706 1707 typedef enum { 1708 NIR_SCOPE_NONE, 1709 NIR_SCOPE_INVOCATION, 1710 NIR_SCOPE_SUBGROUP, 1711 NIR_SCOPE_SHADER_CALL, 1712 NIR_SCOPE_WORKGROUP, 1713 NIR_SCOPE_QUEUE_FAMILY, 1714 NIR_SCOPE_DEVICE, 1715 } nir_scope; 1716 1717 /** 1718 * \name NIR intrinsics semantic flags 1719 * 1720 * information about what the compiler can do with the intrinsics. 1721 * 1722 * \sa nir_intrinsic_info::flags 1723 */ 1724 typedef enum { 1725 /** 1726 * whether the intrinsic can be safely eliminated if none of its output 1727 * value is not being used. 1728 */ 1729 NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0), 1730 1731 /** 1732 * Whether the intrinsic can be reordered with respect to any other 1733 * intrinsic, i.e. whether the only reordering dependencies of the 1734 * intrinsic are due to the register reads/writes. 1735 */ 1736 NIR_INTRINSIC_CAN_REORDER = (1 << 1), 1737 } nir_intrinsic_semantic_flag; 1738 1739 /** 1740 * \name NIR intrinsics const-index flag 1741 * 1742 * Indicates the usage of a const_index slot. 1743 * 1744 * \sa nir_intrinsic_info::index_map 1745 */ 1746 typedef enum { 1747 /** 1748 * Generally instructions that take a offset src argument, can encode 1749 * a constant 'base' value which is added to the offset. 1750 */ 1751 NIR_INTRINSIC_BASE = 1, 1752 1753 /** 1754 * For store instructions, a writemask for the store. 1755 */ 1756 NIR_INTRINSIC_WRMASK, 1757 1758 /** 1759 * The stream-id for GS emit_vertex/end_primitive intrinsics. 1760 */ 1761 NIR_INTRINSIC_STREAM_ID, 1762 1763 /** 1764 * The clip-plane id for load_user_clip_plane intrinsic. 1765 */ 1766 NIR_INTRINSIC_UCP_ID, 1767 1768 /** 1769 * The start of NIR_INTRINSIC_RANGE. Only present on instructions that 1770 * don't have NIR_INTRINSIC_BASE. 1771 * 1772 * If the [range_base, range] is [0, ~0], then we don't know the possible 1773 * range of the access. 1774 */ 1775 NIR_INTRINSIC_RANGE_BASE, 1776 1777 /** 1778 * The amount of data, starting from BASE or RANGE_BASE, that this 1779 * instruction may access. This is used to provide bounds if the offset is 1780 * not constant. 1781 */ 1782 NIR_INTRINSIC_RANGE, 1783 1784 /** 1785 * The Vulkan descriptor set for vulkan_resource_index intrinsic. 1786 */ 1787 NIR_INTRINSIC_DESC_SET, 1788 1789 /** 1790 * The Vulkan descriptor set binding for vulkan_resource_index intrinsic. 1791 */ 1792 NIR_INTRINSIC_BINDING, 1793 1794 /** 1795 * Component offset. 1796 */ 1797 NIR_INTRINSIC_COMPONENT, 1798 1799 /** 1800 * Column index for matrix intrinsics. 1801 */ 1802 NIR_INTRINSIC_COLUMN, 1803 1804 /** 1805 * Interpolation mode (only meaningful for FS inputs). 1806 */ 1807 NIR_INTRINSIC_INTERP_MODE, 1808 1809 /** 1810 * A binary nir_op to use when performing a reduction or scan operation 1811 */ 1812 NIR_INTRINSIC_REDUCTION_OP, 1813 1814 /** 1815 * Cluster size for reduction operations 1816 */ 1817 NIR_INTRINSIC_CLUSTER_SIZE, 1818 1819 /** 1820 * Parameter index for a load_param intrinsic 1821 */ 1822 NIR_INTRINSIC_PARAM_IDX, 1823 1824 /** 1825 * Image dimensionality for image intrinsics 1826 * 1827 * One of GLSL_SAMPLER_DIM_* 1828 */ 1829 NIR_INTRINSIC_IMAGE_DIM, 1830 1831 /** 1832 * Non-zero if we are accessing an array image 1833 */ 1834 NIR_INTRINSIC_IMAGE_ARRAY, 1835 1836 /** 1837 * Image format for image intrinsics 1838 */ 1839 NIR_INTRINSIC_FORMAT, 1840 1841 /** 1842 * Access qualifiers for image and memory access intrinsics 1843 */ 1844 NIR_INTRINSIC_ACCESS, 1845 1846 /** 1847 * Alignment for offsets and addresses 1848 * 1849 * These two parameters, specify an alignment in terms of a multiplier and 1850 * an offset. The multiplier is always a power of two. The offset or 1851 * address parameter X of the intrinsic is guaranteed to satisfy the 1852 * following: 1853 * 1854 * (X - align_offset) % align_mul == 0 1855 * 1856 * For constant offset values, align_mul will be NIR_ALIGN_MUL_MAX and the 1857 * align_offset will be modulo that. 1858 */ 1859 NIR_INTRINSIC_ALIGN_MUL, 1860 NIR_INTRINSIC_ALIGN_OFFSET, 1861 1862 /** 1863 * The Vulkan descriptor type for a vulkan_resource_[re]index intrinsic. 1864 */ 1865 NIR_INTRINSIC_DESC_TYPE, 1866 1867 /** 1868 * The nir_alu_type of input data to a store or conversion 1869 */ 1870 NIR_INTRINSIC_SRC_TYPE, 1871 1872 /** 1873 * The nir_alu_type of the data output from a load or conversion 1874 */ 1875 NIR_INTRINSIC_DEST_TYPE, 1876 1877 /** 1878 * The swizzle mask for the instructions 1879 * SwizzleInvocationsAMD and SwizzleInvocationsMaskedAMD 1880 */ 1881 NIR_INTRINSIC_SWIZZLE_MASK, 1882 1883 /* Separate source/dest access flags for copies */ 1884 NIR_INTRINSIC_SRC_ACCESS, 1885 NIR_INTRINSIC_DST_ACCESS, 1886 1887 /* Driver location for nir_load_patch_location_ir3 */ 1888 NIR_INTRINSIC_DRIVER_LOCATION, 1889 1890 /** 1891 * Mask of nir_memory_semantics, includes ordering and visibility. 1892 */ 1893 NIR_INTRINSIC_MEMORY_SEMANTICS, 1894 1895 /** 1896 * Mask of nir_variable_modes affected by the memory operation. 1897 */ 1898 NIR_INTRINSIC_MEMORY_MODES, 1899 1900 /** 1901 * Value of nir_scope. 1902 */ 1903 NIR_INTRINSIC_MEMORY_SCOPE, 1904 1905 /** 1906 * Value of nir_scope. 1907 */ 1908 NIR_INTRINSIC_EXECUTION_SCOPE, 1909 1910 /** 1911 * Value of nir_io_semantics. 1912 */ 1913 NIR_INTRINSIC_IO_SEMANTICS, 1914 1915 /** 1916 * The rounding mode of a conversion 1917 */ 1918 NIR_INTRINSIC_ROUNDING_MODE, 1919 1920 /** 1921 * Whether or not to saturate in conversions 1922 */ 1923 NIR_INTRINSIC_SATURATE, 1924 1925 NIR_INTRINSIC_NUM_INDEX_FLAGS, 1926 1927 } nir_intrinsic_index_flag; 1928 1929 /** 1930 * Maximum valid value for a nir align_mul value (in intrinsics or derefs). 1931 * 1932 * Offsets can be signed, so this is the largest power of two in int32_t. 1933 */ 1934 #define NIR_ALIGN_MUL_MAX 0x40000000 1935 1936 typedef struct { 1937 unsigned location:7; /* gl_vert_attrib, gl_varying_slot, or gl_frag_result */ 1938 unsigned num_slots:6; /* max 32, may be pessimistic with const indexing */ 1939 unsigned dual_source_blend_index:1; 1940 unsigned fb_fetch_output:1; /* for GL_KHR_blend_equation_advanced */ 1941 unsigned gs_streams:8; /* xxyyzzww: 2-bit stream index for each component */ 1942 unsigned medium_precision:1; /* GLSL mediump qualifier */ 1943 unsigned per_view:1; 1944 unsigned _pad:7; 1945 } nir_io_semantics; 1946 1947 #define NIR_INTRINSIC_MAX_INPUTS 11 1948 1949 typedef struct { 1950 const char *name; 1951 1952 uint8_t num_srcs; /** < number of register/SSA inputs */ 1953 1954 /** number of components of each input register 1955 * 1956 * If this value is 0, the number of components is given by the 1957 * num_components field of nir_intrinsic_instr. If this value is -1, the 1958 * intrinsic consumes however many components are provided and it is not 1959 * validated at all. 1960 */ 1961 int8_t src_components[NIR_INTRINSIC_MAX_INPUTS]; 1962 1963 bool has_dest; 1964 1965 /** number of components of the output register 1966 * 1967 * If this value is 0, the number of components is given by the 1968 * num_components field of nir_intrinsic_instr. 1969 */ 1970 uint8_t dest_components; 1971 1972 /** bitfield of legal bit sizes */ 1973 uint8_t dest_bit_sizes; 1974 1975 /** the number of constant indices used by the intrinsic */ 1976 uint8_t num_indices; 1977 1978 /** indicates the usage of intr->const_index[n] */ 1979 uint8_t index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; 1980 1981 /** semantic flags for calls to this intrinsic */ 1982 nir_intrinsic_semantic_flag flags; 1983 } nir_intrinsic_info; 1984 1985 extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics]; 1986 1987 static inline unsigned nir_intrinsic_src_components(const nir_intrinsic_instr * intr,unsigned srcn)1988 nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn) 1989 { 1990 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic]; 1991 assert(srcn < info->num_srcs); 1992 if (info->src_components[srcn] > 0) 1993 return info->src_components[srcn]; 1994 else if (info->src_components[srcn] == 0) 1995 return intr->num_components; 1996 else 1997 return nir_src_num_components(intr->src[srcn]); 1998 } 1999 2000 static inline unsigned nir_intrinsic_dest_components(nir_intrinsic_instr * intr)2001 nir_intrinsic_dest_components(nir_intrinsic_instr *intr) 2002 { 2003 const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic]; 2004 if (!info->has_dest) 2005 return 0; 2006 else if (info->dest_components) 2007 return info->dest_components; 2008 else 2009 return intr->num_components; 2010 } 2011 2012 /** 2013 * Helper to copy const_index[] from src to dst, without assuming they 2014 * match in order. 2015 */ 2016 static inline void nir_intrinsic_copy_const_indices(nir_intrinsic_instr * dst,nir_intrinsic_instr * src)2017 nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src) 2018 { 2019 if (src->intrinsic == dst->intrinsic) { 2020 memcpy(dst->const_index, src->const_index, sizeof(dst->const_index)); 2021 return; 2022 } 2023 2024 const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic]; 2025 const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic]; 2026 2027 for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) { 2028 if (src_info->index_map[i] == 0) 2029 continue; 2030 2031 /* require that dst instruction also uses the same const_index[]: */ 2032 assert(dst_info->index_map[i] > 0); 2033 2034 dst->const_index[dst_info->index_map[i] - 1] = 2035 src->const_index[src_info->index_map[i] - 1]; 2036 } 2037 } 2038 2039 #define INTRINSIC_IDX_ACCESSORS(name, flag, type) \ 2040 static inline type \ 2041 nir_intrinsic_##name(const nir_intrinsic_instr *instr) \ 2042 { \ 2043 const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; \ 2044 assert(info->index_map[NIR_INTRINSIC_##flag] > 0); \ 2045 return (type)instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1]; \ 2046 } \ 2047 static inline void \ 2048 nir_intrinsic_set_##name(nir_intrinsic_instr *instr, type val) \ 2049 { \ 2050 const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; \ 2051 assert(info->index_map[NIR_INTRINSIC_##flag] > 0); \ 2052 instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1] = val; \ 2053 } \ 2054 static inline bool \ 2055 nir_intrinsic_has_##name(const nir_intrinsic_instr *instr) \ 2056 { \ 2057 const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; \ 2058 return info->index_map[NIR_INTRINSIC_##flag] > 0; \ 2059 } 2060 INTRINSIC_IDX_ACCESSORS(write_mask,WRMASK,unsigned)2061 INTRINSIC_IDX_ACCESSORS(write_mask, WRMASK, unsigned) 2062 INTRINSIC_IDX_ACCESSORS(base, BASE, int) 2063 INTRINSIC_IDX_ACCESSORS(stream_id, STREAM_ID, unsigned) 2064 INTRINSIC_IDX_ACCESSORS(ucp_id, UCP_ID, unsigned) 2065 INTRINSIC_IDX_ACCESSORS(range, RANGE, unsigned) 2066 INTRINSIC_IDX_ACCESSORS(range_base, RANGE_BASE, unsigned) 2067 INTRINSIC_IDX_ACCESSORS(desc_set, DESC_SET, unsigned) 2068 INTRINSIC_IDX_ACCESSORS(binding, BINDING, unsigned) 2069 INTRINSIC_IDX_ACCESSORS(component, COMPONENT, unsigned) 2070 INTRINSIC_IDX_ACCESSORS(column, COLUMN, unsigned) 2071 INTRINSIC_IDX_ACCESSORS(interp_mode, INTERP_MODE, unsigned) 2072 INTRINSIC_IDX_ACCESSORS(reduction_op, REDUCTION_OP, unsigned) 2073 INTRINSIC_IDX_ACCESSORS(cluster_size, CLUSTER_SIZE, unsigned) 2074 INTRINSIC_IDX_ACCESSORS(param_idx, PARAM_IDX, unsigned) 2075 INTRINSIC_IDX_ACCESSORS(image_dim, IMAGE_DIM, enum glsl_sampler_dim) 2076 INTRINSIC_IDX_ACCESSORS(image_array, IMAGE_ARRAY, bool) 2077 INTRINSIC_IDX_ACCESSORS(access, ACCESS, enum gl_access_qualifier) 2078 INTRINSIC_IDX_ACCESSORS(src_access, SRC_ACCESS, enum gl_access_qualifier) 2079 INTRINSIC_IDX_ACCESSORS(dst_access, DST_ACCESS, enum gl_access_qualifier) 2080 INTRINSIC_IDX_ACCESSORS(format, FORMAT, enum pipe_format) 2081 INTRINSIC_IDX_ACCESSORS(align_mul, ALIGN_MUL, unsigned) 2082 INTRINSIC_IDX_ACCESSORS(align_offset, ALIGN_OFFSET, unsigned) 2083 INTRINSIC_IDX_ACCESSORS(desc_type, DESC_TYPE, unsigned) 2084 INTRINSIC_IDX_ACCESSORS(src_type, SRC_TYPE, nir_alu_type) 2085 INTRINSIC_IDX_ACCESSORS(dest_type, DEST_TYPE, nir_alu_type) 2086 INTRINSIC_IDX_ACCESSORS(swizzle_mask, SWIZZLE_MASK, unsigned) 2087 INTRINSIC_IDX_ACCESSORS(driver_location, DRIVER_LOCATION, unsigned) 2088 INTRINSIC_IDX_ACCESSORS(memory_semantics, MEMORY_SEMANTICS, nir_memory_semantics) 2089 INTRINSIC_IDX_ACCESSORS(memory_modes, MEMORY_MODES, nir_variable_mode) 2090 INTRINSIC_IDX_ACCESSORS(memory_scope, MEMORY_SCOPE, nir_scope) 2091 INTRINSIC_IDX_ACCESSORS(execution_scope, EXECUTION_SCOPE, nir_scope) 2092 INTRINSIC_IDX_ACCESSORS(rounding_mode, ROUNDING_MODE, nir_rounding_mode) 2093 INTRINSIC_IDX_ACCESSORS(saturate, SATURATE, bool) 2094 2095 static inline void 2096 nir_intrinsic_set_align(nir_intrinsic_instr *intrin, 2097 unsigned align_mul, unsigned align_offset) 2098 { 2099 assert(util_is_power_of_two_nonzero(align_mul)); 2100 assert(align_offset < align_mul); 2101 nir_intrinsic_set_align_mul(intrin, align_mul); 2102 nir_intrinsic_set_align_offset(intrin, align_offset); 2103 } 2104 2105 /** Returns a simple alignment for a load/store intrinsic offset 2106 * 2107 * Instead of the full mul+offset alignment scheme provided by the ALIGN_MUL 2108 * and ALIGN_OFFSET parameters, this helper takes both into account and 2109 * provides a single simple alignment parameter. The offset X is guaranteed 2110 * to satisfy X % align == 0. 2111 */ 2112 static inline unsigned nir_intrinsic_align(const nir_intrinsic_instr * intrin)2113 nir_intrinsic_align(const nir_intrinsic_instr *intrin) 2114 { 2115 const unsigned align_mul = nir_intrinsic_align_mul(intrin); 2116 const unsigned align_offset = nir_intrinsic_align_offset(intrin); 2117 assert(align_offset < align_mul); 2118 return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul; 2119 } 2120 2121 static inline bool nir_intrinsic_has_align(const nir_intrinsic_instr * intrin)2122 nir_intrinsic_has_align(const nir_intrinsic_instr *intrin) 2123 { 2124 return nir_intrinsic_has_align_mul(intrin) && 2125 nir_intrinsic_has_align_offset(intrin); 2126 } 2127 2128 static inline void nir_intrinsic_set_io_semantics(nir_intrinsic_instr * intrin,nir_io_semantics semantics)2129 nir_intrinsic_set_io_semantics(nir_intrinsic_instr *intrin, 2130 nir_io_semantics semantics) 2131 { 2132 const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic]; 2133 assert(info->index_map[NIR_INTRINSIC_IO_SEMANTICS] > 0); 2134 STATIC_ASSERT(sizeof(nir_io_semantics) == sizeof(intrin->const_index[0])); 2135 semantics._pad = 0; /* clear padding bits */ 2136 memcpy(&intrin->const_index[info->index_map[NIR_INTRINSIC_IO_SEMANTICS] - 1], 2137 &semantics, sizeof(semantics)); 2138 } 2139 2140 static inline nir_io_semantics nir_intrinsic_io_semantics(const nir_intrinsic_instr * intrin)2141 nir_intrinsic_io_semantics(const nir_intrinsic_instr *intrin) 2142 { 2143 const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic]; 2144 assert(info->index_map[NIR_INTRINSIC_IO_SEMANTICS] > 0); 2145 nir_io_semantics semantics; 2146 memcpy(&semantics, 2147 &intrin->const_index[info->index_map[NIR_INTRINSIC_IO_SEMANTICS] - 1], 2148 sizeof(semantics)); 2149 return semantics; 2150 } 2151 2152 unsigned 2153 nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr); 2154 2155 /* Converts a image_deref_* intrinsic into a image_* one */ 2156 void nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr, 2157 nir_ssa_def *handle, bool bindless); 2158 2159 /* Determine if an intrinsic can be arbitrarily reordered and eliminated. */ 2160 static inline bool nir_intrinsic_can_reorder(nir_intrinsic_instr * instr)2161 nir_intrinsic_can_reorder(nir_intrinsic_instr *instr) 2162 { 2163 if (instr->intrinsic == nir_intrinsic_load_deref || 2164 instr->intrinsic == nir_intrinsic_load_ssbo || 2165 instr->intrinsic == nir_intrinsic_bindless_image_load || 2166 instr->intrinsic == nir_intrinsic_image_deref_load || 2167 instr->intrinsic == nir_intrinsic_image_load) { 2168 return nir_intrinsic_access(instr) & ACCESS_CAN_REORDER; 2169 } else { 2170 const nir_intrinsic_info *info = 2171 &nir_intrinsic_infos[instr->intrinsic]; 2172 return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && 2173 (info->flags & NIR_INTRINSIC_CAN_REORDER); 2174 } 2175 } 2176 2177 /** 2178 * \group texture information 2179 * 2180 * This gives semantic information about textures which is useful to the 2181 * frontend, the backend, and lowering passes, but not the optimizer. 2182 */ 2183 2184 typedef enum { 2185 nir_tex_src_coord, 2186 nir_tex_src_projector, 2187 nir_tex_src_comparator, /* shadow comparator */ 2188 nir_tex_src_offset, 2189 nir_tex_src_bias, 2190 nir_tex_src_lod, 2191 nir_tex_src_min_lod, 2192 nir_tex_src_ms_index, /* MSAA sample index */ 2193 nir_tex_src_ms_mcs, /* MSAA compression value */ 2194 nir_tex_src_ddx, 2195 nir_tex_src_ddy, 2196 nir_tex_src_texture_deref, /* < deref pointing to the texture */ 2197 nir_tex_src_sampler_deref, /* < deref pointing to the sampler */ 2198 nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */ 2199 nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */ 2200 nir_tex_src_texture_handle, /* < bindless texture handle */ 2201 nir_tex_src_sampler_handle, /* < bindless sampler handle */ 2202 nir_tex_src_plane, /* < selects plane for planar textures */ 2203 nir_num_tex_src_types 2204 } nir_tex_src_type; 2205 2206 typedef struct { 2207 nir_src src; 2208 nir_tex_src_type src_type; 2209 } nir_tex_src; 2210 2211 typedef enum { 2212 nir_texop_tex, /**< Regular texture look-up */ 2213 nir_texop_txb, /**< Texture look-up with LOD bias */ 2214 nir_texop_txl, /**< Texture look-up with explicit LOD */ 2215 nir_texop_txd, /**< Texture look-up with partial derivatives */ 2216 nir_texop_txf, /**< Texel fetch with explicit LOD */ 2217 nir_texop_txf_ms, /**< Multisample texture fetch */ 2218 nir_texop_txf_ms_fb, /**< Multisample texture fetch from framebuffer */ 2219 nir_texop_txf_ms_mcs, /**< Multisample compression value fetch */ 2220 nir_texop_txs, /**< Texture size */ 2221 nir_texop_lod, /**< Texture lod query */ 2222 nir_texop_tg4, /**< Texture gather */ 2223 nir_texop_query_levels, /**< Texture levels query */ 2224 nir_texop_texture_samples, /**< Texture samples query */ 2225 nir_texop_samples_identical, /**< Query whether all samples are definitely 2226 * identical. 2227 */ 2228 nir_texop_tex_prefetch, /**< Regular texture look-up, eligible for pre-dispatch */ 2229 nir_texop_fragment_fetch, /**< Multisample fragment color texture fetch */ 2230 nir_texop_fragment_mask_fetch,/**< Multisample fragment mask texture fetch */ 2231 } nir_texop; 2232 2233 typedef struct { 2234 nir_instr instr; 2235 2236 enum glsl_sampler_dim sampler_dim; 2237 nir_alu_type dest_type; 2238 2239 nir_texop op; 2240 nir_dest dest; 2241 nir_tex_src *src; 2242 unsigned num_srcs, coord_components; 2243 bool is_array, is_shadow; 2244 2245 /** 2246 * If is_shadow is true, whether this is the old-style shadow that outputs 4 2247 * components or the new-style shadow that outputs 1 component. 2248 */ 2249 bool is_new_style_shadow; 2250 2251 /* gather component selector */ 2252 unsigned component : 2; 2253 2254 /* gather offsets */ 2255 int8_t tg4_offsets[4][2]; 2256 2257 /* True if the texture index or handle is not dynamically uniform */ 2258 bool texture_non_uniform; 2259 2260 /* True if the sampler index or handle is not dynamically uniform */ 2261 bool sampler_non_uniform; 2262 2263 /** The texture index 2264 * 2265 * If this texture instruction has a nir_tex_src_texture_offset source, 2266 * then the texture index is given by texture_index + texture_offset. 2267 */ 2268 unsigned texture_index; 2269 2270 /** The sampler index 2271 * 2272 * The following operations do not require a sampler and, as such, this 2273 * field should be ignored: 2274 * - nir_texop_txf 2275 * - nir_texop_txf_ms 2276 * - nir_texop_txs 2277 * - nir_texop_query_levels 2278 * - nir_texop_texture_samples 2279 * - nir_texop_samples_identical 2280 * 2281 * If this texture instruction has a nir_tex_src_sampler_offset source, 2282 * then the sampler index is given by sampler_index + sampler_offset. 2283 */ 2284 unsigned sampler_index; 2285 } nir_tex_instr; 2286 2287 /* 2288 * Returns true if the texture operation requires a sampler as a general rule, 2289 * see the documentation of sampler_index. 2290 * 2291 * Note that the specific hw/driver backend could require to a sampler 2292 * object/configuration packet in any case, for some other reason. 2293 */ 2294 static inline bool nir_tex_instr_need_sampler(const nir_tex_instr * instr)2295 nir_tex_instr_need_sampler(const nir_tex_instr *instr) 2296 { 2297 switch (instr->op) { 2298 case nir_texop_txf: 2299 case nir_texop_txf_ms: 2300 case nir_texop_txs: 2301 case nir_texop_query_levels: 2302 case nir_texop_texture_samples: 2303 case nir_texop_samples_identical: 2304 return false; 2305 default: 2306 return true; 2307 } 2308 } 2309 2310 static inline unsigned nir_tex_instr_dest_size(const nir_tex_instr * instr)2311 nir_tex_instr_dest_size(const nir_tex_instr *instr) 2312 { 2313 switch (instr->op) { 2314 case nir_texop_txs: { 2315 unsigned ret; 2316 switch (instr->sampler_dim) { 2317 case GLSL_SAMPLER_DIM_1D: 2318 case GLSL_SAMPLER_DIM_BUF: 2319 ret = 1; 2320 break; 2321 case GLSL_SAMPLER_DIM_2D: 2322 case GLSL_SAMPLER_DIM_CUBE: 2323 case GLSL_SAMPLER_DIM_MS: 2324 case GLSL_SAMPLER_DIM_RECT: 2325 case GLSL_SAMPLER_DIM_EXTERNAL: 2326 case GLSL_SAMPLER_DIM_SUBPASS: 2327 ret = 2; 2328 break; 2329 case GLSL_SAMPLER_DIM_3D: 2330 ret = 3; 2331 break; 2332 default: 2333 unreachable("not reached"); 2334 } 2335 if (instr->is_array) 2336 ret++; 2337 return ret; 2338 } 2339 2340 case nir_texop_lod: 2341 return 2; 2342 2343 case nir_texop_texture_samples: 2344 case nir_texop_query_levels: 2345 case nir_texop_samples_identical: 2346 case nir_texop_fragment_mask_fetch: 2347 return 1; 2348 2349 default: 2350 if (instr->is_shadow && instr->is_new_style_shadow) 2351 return 1; 2352 2353 return 4; 2354 } 2355 } 2356 2357 /* Returns true if this texture operation queries something about the texture 2358 * rather than actually sampling it. 2359 */ 2360 static inline bool nir_tex_instr_is_query(const nir_tex_instr * instr)2361 nir_tex_instr_is_query(const nir_tex_instr *instr) 2362 { 2363 switch (instr->op) { 2364 case nir_texop_txs: 2365 case nir_texop_lod: 2366 case nir_texop_texture_samples: 2367 case nir_texop_query_levels: 2368 case nir_texop_txf_ms_mcs: 2369 return true; 2370 case nir_texop_tex: 2371 case nir_texop_txb: 2372 case nir_texop_txl: 2373 case nir_texop_txd: 2374 case nir_texop_txf: 2375 case nir_texop_txf_ms: 2376 case nir_texop_txf_ms_fb: 2377 case nir_texop_tg4: 2378 return false; 2379 default: 2380 unreachable("Invalid texture opcode"); 2381 } 2382 } 2383 2384 static inline bool nir_tex_instr_has_implicit_derivative(const nir_tex_instr * instr)2385 nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr) 2386 { 2387 switch (instr->op) { 2388 case nir_texop_tex: 2389 case nir_texop_txb: 2390 case nir_texop_lod: 2391 return true; 2392 default: 2393 return false; 2394 } 2395 } 2396 2397 static inline nir_alu_type nir_tex_instr_src_type(const nir_tex_instr * instr,unsigned src)2398 nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src) 2399 { 2400 switch (instr->src[src].src_type) { 2401 case nir_tex_src_coord: 2402 switch (instr->op) { 2403 case nir_texop_txf: 2404 case nir_texop_txf_ms: 2405 case nir_texop_txf_ms_fb: 2406 case nir_texop_txf_ms_mcs: 2407 case nir_texop_samples_identical: 2408 return nir_type_int; 2409 2410 default: 2411 return nir_type_float; 2412 } 2413 2414 case nir_tex_src_lod: 2415 switch (instr->op) { 2416 case nir_texop_txs: 2417 case nir_texop_txf: 2418 return nir_type_int; 2419 2420 default: 2421 return nir_type_float; 2422 } 2423 2424 case nir_tex_src_projector: 2425 case nir_tex_src_comparator: 2426 case nir_tex_src_bias: 2427 case nir_tex_src_min_lod: 2428 case nir_tex_src_ddx: 2429 case nir_tex_src_ddy: 2430 return nir_type_float; 2431 2432 case nir_tex_src_offset: 2433 case nir_tex_src_ms_index: 2434 case nir_tex_src_plane: 2435 return nir_type_int; 2436 2437 case nir_tex_src_ms_mcs: 2438 case nir_tex_src_texture_deref: 2439 case nir_tex_src_sampler_deref: 2440 case nir_tex_src_texture_offset: 2441 case nir_tex_src_sampler_offset: 2442 case nir_tex_src_texture_handle: 2443 case nir_tex_src_sampler_handle: 2444 return nir_type_uint; 2445 2446 case nir_num_tex_src_types: 2447 unreachable("nir_num_tex_src_types is not a valid source type"); 2448 } 2449 2450 unreachable("Invalid texture source type"); 2451 } 2452 2453 static inline unsigned nir_tex_instr_src_size(const nir_tex_instr * instr,unsigned src)2454 nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src) 2455 { 2456 if (instr->src[src].src_type == nir_tex_src_coord) 2457 return instr->coord_components; 2458 2459 /* The MCS value is expected to be a vec4 returned by a txf_ms_mcs */ 2460 if (instr->src[src].src_type == nir_tex_src_ms_mcs) 2461 return 4; 2462 2463 if (instr->src[src].src_type == nir_tex_src_ddx || 2464 instr->src[src].src_type == nir_tex_src_ddy) { 2465 if (instr->is_array) 2466 return instr->coord_components - 1; 2467 else 2468 return instr->coord_components; 2469 } 2470 2471 /* Usual APIs don't allow cube + offset, but we allow it, with 2 coords for 2472 * the offset, since a cube maps to a single face. 2473 */ 2474 if (instr->src[src].src_type == nir_tex_src_offset) { 2475 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) 2476 return 2; 2477 else if (instr->is_array) 2478 return instr->coord_components - 1; 2479 else 2480 return instr->coord_components; 2481 } 2482 2483 return 1; 2484 } 2485 2486 static inline int nir_tex_instr_src_index(const nir_tex_instr * instr,nir_tex_src_type type)2487 nir_tex_instr_src_index(const nir_tex_instr *instr, nir_tex_src_type type) 2488 { 2489 for (unsigned i = 0; i < instr->num_srcs; i++) 2490 if (instr->src[i].src_type == type) 2491 return (int) i; 2492 2493 return -1; 2494 } 2495 2496 void nir_tex_instr_add_src(nir_tex_instr *tex, 2497 nir_tex_src_type src_type, 2498 nir_src src); 2499 2500 void nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx); 2501 2502 bool nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex); 2503 2504 typedef struct { 2505 nir_instr instr; 2506 2507 nir_ssa_def def; 2508 2509 nir_const_value value[]; 2510 } nir_load_const_instr; 2511 2512 typedef enum { 2513 /** Return from a function 2514 * 2515 * This instruction is a classic function return. It jumps to 2516 * nir_function_impl::end_block. No return value is provided in this 2517 * instruction. Instead, the function is expected to write any return 2518 * data to a deref passed in from the caller. 2519 */ 2520 nir_jump_return, 2521 2522 /** Break out of the inner-most loop 2523 * 2524 * This has the same semantics as C's "break" statement. 2525 */ 2526 nir_jump_break, 2527 2528 /** Jump back to the top of the inner-most loop 2529 * 2530 * This has the same semantics as C's "continue" statement assuming that a 2531 * NIR loop is implemented as "while (1) { body }". 2532 */ 2533 nir_jump_continue, 2534 2535 /** Jumps for unstructured CFG. 2536 * 2537 * As within an unstructured CFG we can't rely on block ordering we need to 2538 * place explicit jumps at the end of every block. 2539 */ 2540 nir_jump_goto, 2541 nir_jump_goto_if, 2542 } nir_jump_type; 2543 2544 typedef struct { 2545 nir_instr instr; 2546 nir_jump_type type; 2547 nir_src condition; 2548 struct nir_block *target; 2549 struct nir_block *else_target; 2550 } nir_jump_instr; 2551 2552 /* creates a new SSA variable in an undefined state */ 2553 2554 typedef struct { 2555 nir_instr instr; 2556 nir_ssa_def def; 2557 } nir_ssa_undef_instr; 2558 2559 typedef struct { 2560 struct exec_node node; 2561 2562 /* The predecessor block corresponding to this source */ 2563 struct nir_block *pred; 2564 2565 nir_src src; 2566 } nir_phi_src; 2567 2568 #define nir_foreach_phi_src(phi_src, phi) \ 2569 foreach_list_typed(nir_phi_src, phi_src, node, &(phi)->srcs) 2570 #define nir_foreach_phi_src_safe(phi_src, phi) \ 2571 foreach_list_typed_safe(nir_phi_src, phi_src, node, &(phi)->srcs) 2572 2573 typedef struct { 2574 nir_instr instr; 2575 2576 struct exec_list srcs; /** < list of nir_phi_src */ 2577 2578 nir_dest dest; 2579 } nir_phi_instr; 2580 2581 typedef struct { 2582 struct exec_node node; 2583 nir_src src; 2584 nir_dest dest; 2585 } nir_parallel_copy_entry; 2586 2587 #define nir_foreach_parallel_copy_entry(entry, pcopy) \ 2588 foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries) 2589 2590 typedef struct { 2591 nir_instr instr; 2592 2593 /* A list of nir_parallel_copy_entrys. The sources of all of the 2594 * entries are copied to the corresponding destinations "in parallel". 2595 * In other words, if we have two entries: a -> b and b -> a, the values 2596 * get swapped. 2597 */ 2598 struct exec_list entries; 2599 } nir_parallel_copy_instr; 2600 2601 NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr, 2602 type, nir_instr_type_alu) 2603 NIR_DEFINE_CAST(nir_instr_as_deref, nir_instr, nir_deref_instr, instr, 2604 type, nir_instr_type_deref) 2605 NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr, 2606 type, nir_instr_type_call) 2607 NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr, 2608 type, nir_instr_type_jump) 2609 NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr, 2610 type, nir_instr_type_tex) 2611 NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr, 2612 type, nir_instr_type_intrinsic) 2613 NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr, 2614 type, nir_instr_type_load_const) 2615 NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr, 2616 type, nir_instr_type_ssa_undef) 2617 NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr, 2618 type, nir_instr_type_phi) 2619 NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr, 2620 nir_parallel_copy_instr, instr, 2621 type, nir_instr_type_parallel_copy) 2622 2623 2624 #define NIR_DEFINE_SRC_AS_CONST(type, suffix) \ 2625 static inline type \ 2626 nir_src_comp_as_##suffix(nir_src src, unsigned comp) \ 2627 { \ 2628 assert(nir_src_is_const(src)); \ 2629 nir_load_const_instr *load = \ 2630 nir_instr_as_load_const(src.ssa->parent_instr); \ 2631 assert(comp < load->def.num_components); \ 2632 return nir_const_value_as_##suffix(load->value[comp], \ 2633 load->def.bit_size); \ 2634 } \ 2635 \ 2636 static inline type \ 2637 nir_src_as_##suffix(nir_src src) \ 2638 { \ 2639 assert(nir_src_num_components(src) == 1); \ 2640 return nir_src_comp_as_##suffix(src, 0); \ 2641 } 2642 2643 NIR_DEFINE_SRC_AS_CONST(int64_t, int) 2644 NIR_DEFINE_SRC_AS_CONST(uint64_t, uint) 2645 NIR_DEFINE_SRC_AS_CONST(bool, bool) 2646 NIR_DEFINE_SRC_AS_CONST(double, float) 2647 2648 #undef NIR_DEFINE_SRC_AS_CONST 2649 2650 2651 typedef struct { 2652 nir_ssa_def *def; 2653 unsigned comp; 2654 } nir_ssa_scalar; 2655 2656 static inline bool nir_ssa_scalar_is_const(nir_ssa_scalar s)2657 nir_ssa_scalar_is_const(nir_ssa_scalar s) 2658 { 2659 return s.def->parent_instr->type == nir_instr_type_load_const; 2660 } 2661 2662 static inline nir_const_value nir_ssa_scalar_as_const_value(nir_ssa_scalar s)2663 nir_ssa_scalar_as_const_value(nir_ssa_scalar s) 2664 { 2665 assert(s.comp < s.def->num_components); 2666 nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr); 2667 return load->value[s.comp]; 2668 } 2669 2670 #define NIR_DEFINE_SCALAR_AS_CONST(type, suffix) \ 2671 static inline type \ 2672 nir_ssa_scalar_as_##suffix(nir_ssa_scalar s) \ 2673 { \ 2674 return nir_const_value_as_##suffix( \ 2675 nir_ssa_scalar_as_const_value(s), s.def->bit_size); \ 2676 } 2677 NIR_DEFINE_SCALAR_AS_CONST(int64_t,int)2678 NIR_DEFINE_SCALAR_AS_CONST(int64_t, int) 2679 NIR_DEFINE_SCALAR_AS_CONST(uint64_t, uint) 2680 NIR_DEFINE_SCALAR_AS_CONST(bool, bool) 2681 NIR_DEFINE_SCALAR_AS_CONST(double, float) 2682 2683 #undef NIR_DEFINE_SCALAR_AS_CONST 2684 2685 static inline bool 2686 nir_ssa_scalar_is_alu(nir_ssa_scalar s) 2687 { 2688 return s.def->parent_instr->type == nir_instr_type_alu; 2689 } 2690 2691 static inline nir_op nir_ssa_scalar_alu_op(nir_ssa_scalar s)2692 nir_ssa_scalar_alu_op(nir_ssa_scalar s) 2693 { 2694 return nir_instr_as_alu(s.def->parent_instr)->op; 2695 } 2696 2697 static inline nir_ssa_scalar nir_ssa_scalar_chase_alu_src(nir_ssa_scalar s,unsigned alu_src_idx)2698 nir_ssa_scalar_chase_alu_src(nir_ssa_scalar s, unsigned alu_src_idx) 2699 { 2700 nir_ssa_scalar out = { NULL, 0 }; 2701 2702 nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr); 2703 assert(alu_src_idx < nir_op_infos[alu->op].num_inputs); 2704 2705 /* Our component must be written */ 2706 assert(s.comp < s.def->num_components); 2707 assert(alu->dest.write_mask & (1u << s.comp)); 2708 2709 assert(alu->src[alu_src_idx].src.is_ssa); 2710 out.def = alu->src[alu_src_idx].src.ssa; 2711 2712 if (nir_op_infos[alu->op].input_sizes[alu_src_idx] == 0) { 2713 /* The ALU src is unsized so the source component follows the 2714 * destination component. 2715 */ 2716 out.comp = alu->src[alu_src_idx].swizzle[s.comp]; 2717 } else { 2718 /* This is a sized source so all source components work together to 2719 * produce all the destination components. Since we need to return a 2720 * scalar, this only works if the source is a scalar. 2721 */ 2722 assert(nir_op_infos[alu->op].input_sizes[alu_src_idx] == 1); 2723 out.comp = alu->src[alu_src_idx].swizzle[0]; 2724 } 2725 assert(out.comp < out.def->num_components); 2726 2727 return out; 2728 } 2729 2730 2731 /* 2732 * Control flow 2733 * 2734 * Control flow consists of a tree of control flow nodes, which include 2735 * if-statements and loops. The leaves of the tree are basic blocks, lists of 2736 * instructions that always run start-to-finish. Each basic block also keeps 2737 * track of its successors (blocks which may run immediately after the current 2738 * block) and predecessors (blocks which could have run immediately before the 2739 * current block). Each function also has a start block and an end block which 2740 * all return statements point to (which is always empty). Together, all the 2741 * blocks with their predecessors and successors make up the control flow 2742 * graph (CFG) of the function. There are helpers that modify the tree of 2743 * control flow nodes while modifying the CFG appropriately; these should be 2744 * used instead of modifying the tree directly. 2745 */ 2746 2747 typedef enum { 2748 nir_cf_node_block, 2749 nir_cf_node_if, 2750 nir_cf_node_loop, 2751 nir_cf_node_function 2752 } nir_cf_node_type; 2753 2754 typedef struct nir_cf_node { 2755 struct exec_node node; 2756 nir_cf_node_type type; 2757 struct nir_cf_node *parent; 2758 } nir_cf_node; 2759 2760 typedef struct nir_block { 2761 nir_cf_node cf_node; 2762 2763 struct exec_list instr_list; /** < list of nir_instr */ 2764 2765 /** generic block index; generated by nir_index_blocks */ 2766 unsigned index; 2767 2768 /* 2769 * Each block can only have up to 2 successors, so we put them in a simple 2770 * array - no need for anything more complicated. 2771 */ 2772 struct nir_block *successors[2]; 2773 2774 /* Set of nir_block predecessors in the CFG */ 2775 struct set *predecessors; 2776 2777 /* 2778 * this node's immediate dominator in the dominance tree - set to NULL for 2779 * the start block. 2780 */ 2781 struct nir_block *imm_dom; 2782 2783 /* This node's children in the dominance tree */ 2784 unsigned num_dom_children; 2785 struct nir_block **dom_children; 2786 2787 /* Set of nir_blocks on the dominance frontier of this block */ 2788 struct set *dom_frontier; 2789 2790 /* 2791 * These two indices have the property that dom_{pre,post}_index for each 2792 * child of this block in the dominance tree will always be between 2793 * dom_pre_index and dom_post_index for this block, which makes testing if 2794 * a given block is dominated by another block an O(1) operation. 2795 */ 2796 uint32_t dom_pre_index, dom_post_index; 2797 2798 /** 2799 * Value just before the first nir_instr->index in the block, but after 2800 * end_ip that of any predecessor block. 2801 */ 2802 uint32_t start_ip; 2803 /** 2804 * Value just after the last nir_instr->index in the block, but before the 2805 * start_ip of any successor block. 2806 */ 2807 uint32_t end_ip; 2808 2809 /* SSA def live in and out for this block; used for liveness analysis. 2810 * Indexed by ssa_def->index 2811 */ 2812 BITSET_WORD *live_in; 2813 BITSET_WORD *live_out; 2814 } nir_block; 2815 2816 static inline bool nir_block_is_reachable(nir_block * b)2817 nir_block_is_reachable(nir_block *b) 2818 { 2819 /* See also nir_block_dominates */ 2820 return b->dom_post_index != 0; 2821 } 2822 2823 static inline nir_instr * nir_block_first_instr(nir_block * block)2824 nir_block_first_instr(nir_block *block) 2825 { 2826 struct exec_node *head = exec_list_get_head(&block->instr_list); 2827 return exec_node_data(nir_instr, head, node); 2828 } 2829 2830 static inline nir_instr * nir_block_last_instr(nir_block * block)2831 nir_block_last_instr(nir_block *block) 2832 { 2833 struct exec_node *tail = exec_list_get_tail(&block->instr_list); 2834 return exec_node_data(nir_instr, tail, node); 2835 } 2836 2837 static inline bool nir_block_ends_in_jump(nir_block * block)2838 nir_block_ends_in_jump(nir_block *block) 2839 { 2840 return !exec_list_is_empty(&block->instr_list) && 2841 nir_block_last_instr(block)->type == nir_instr_type_jump; 2842 } 2843 2844 #define nir_foreach_instr(instr, block) \ 2845 foreach_list_typed(nir_instr, instr, node, &(block)->instr_list) 2846 #define nir_foreach_instr_reverse(instr, block) \ 2847 foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list) 2848 #define nir_foreach_instr_safe(instr, block) \ 2849 foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list) 2850 #define nir_foreach_instr_reverse_safe(instr, block) \ 2851 foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list) 2852 2853 typedef enum { 2854 nir_selection_control_none = 0x0, 2855 nir_selection_control_flatten = 0x1, 2856 nir_selection_control_dont_flatten = 0x2, 2857 } nir_selection_control; 2858 2859 typedef struct nir_if { 2860 nir_cf_node cf_node; 2861 nir_src condition; 2862 nir_selection_control control; 2863 2864 struct exec_list then_list; /** < list of nir_cf_node */ 2865 struct exec_list else_list; /** < list of nir_cf_node */ 2866 } nir_if; 2867 2868 typedef struct { 2869 nir_if *nif; 2870 2871 /** Instruction that generates nif::condition. */ 2872 nir_instr *conditional_instr; 2873 2874 /** Block within ::nif that has the break instruction. */ 2875 nir_block *break_block; 2876 2877 /** Last block for the then- or else-path that does not contain the break. */ 2878 nir_block *continue_from_block; 2879 2880 /** True when ::break_block is in the else-path of ::nif. */ 2881 bool continue_from_then; 2882 bool induction_rhs; 2883 2884 /* This is true if the terminators exact trip count is unknown. For 2885 * example: 2886 * 2887 * for (int i = 0; i < imin(x, 4); i++) 2888 * ... 2889 * 2890 * Here loop analysis would have set a max_trip_count of 4 however we dont 2891 * know for sure that this is the exact trip count. 2892 */ 2893 bool exact_trip_count_unknown; 2894 2895 struct list_head loop_terminator_link; 2896 } nir_loop_terminator; 2897 2898 typedef struct { 2899 /* Estimated cost (in number of instructions) of the loop */ 2900 unsigned instr_cost; 2901 2902 /* Guessed trip count based on array indexing */ 2903 unsigned guessed_trip_count; 2904 2905 /* Maximum number of times the loop is run (if known) */ 2906 unsigned max_trip_count; 2907 2908 /* Do we know the exact number of times the loop will be run */ 2909 bool exact_trip_count_known; 2910 2911 /* Unroll the loop regardless of its size */ 2912 bool force_unroll; 2913 2914 /* Does the loop contain complex loop terminators, continues or other 2915 * complex behaviours? If this is true we can't rely on 2916 * loop_terminator_list to be complete or accurate. 2917 */ 2918 bool complex_loop; 2919 2920 nir_loop_terminator *limiting_terminator; 2921 2922 /* A list of loop_terminators terminating this loop. */ 2923 struct list_head loop_terminator_list; 2924 } nir_loop_info; 2925 2926 typedef enum { 2927 nir_loop_control_none = 0x0, 2928 nir_loop_control_unroll = 0x1, 2929 nir_loop_control_dont_unroll = 0x2, 2930 } nir_loop_control; 2931 2932 typedef struct { 2933 nir_cf_node cf_node; 2934 2935 struct exec_list body; /** < list of nir_cf_node */ 2936 2937 nir_loop_info *info; 2938 nir_loop_control control; 2939 bool partially_unrolled; 2940 } nir_loop; 2941 2942 /** 2943 * Various bits of metadata that can may be created or required by 2944 * optimization and analysis passes 2945 */ 2946 typedef enum { 2947 nir_metadata_none = 0x0, 2948 2949 /** Indicates that nir_block::index values are valid. 2950 * 2951 * The start block has index 0 and they increase through a natural walk of 2952 * the CFG. nir_function_impl::num_blocks is the number of blocks and 2953 * every block index is in the range [0, nir_function_impl::num_blocks]. 2954 * 2955 * A pass can preserve this metadata type if it doesn't touch the CFG. 2956 */ 2957 nir_metadata_block_index = 0x1, 2958 2959 /** Indicates that block dominance information is valid 2960 * 2961 * This includes: 2962 * 2963 * - nir_block::num_dom_children 2964 * - nir_block::dom_children 2965 * - nir_block::dom_frontier 2966 * - nir_block::dom_pre_index 2967 * - nir_block::dom_post_index 2968 * 2969 * A pass can preserve this metadata type if it doesn't touch the CFG. 2970 */ 2971 nir_metadata_dominance = 0x2, 2972 2973 /** Indicates that SSA def data-flow liveness information is valid 2974 * 2975 * This includes: 2976 * 2977 * - nir_block::live_in 2978 * - nir_block::live_out 2979 * 2980 * A pass can preserve this metadata type if it never adds or removes any 2981 * SSA defs (most passes shouldn't preserve this metadata type). 2982 */ 2983 nir_metadata_live_ssa_defs = 0x4, 2984 2985 /** A dummy metadata value to track when a pass forgot to call 2986 * nir_metadata_preserve. 2987 * 2988 * A pass should always clear this value even if it doesn't make any 2989 * progress to indicate that it thought about preserving metadata. 2990 */ 2991 nir_metadata_not_properly_reset = 0x8, 2992 2993 /** Indicates that loop analysis information is valid. 2994 * 2995 * This includes everything pointed to by nir_loop::info. 2996 * 2997 * A pass can preserve this metadata type if it is guaranteed to not affect 2998 * any loop metadata. However, since loop metadata includes things like 2999 * loop counts which depend on arithmetic in the loop, this is very hard to 3000 * determine. Most passes shouldn't preserve this metadata type. 3001 */ 3002 nir_metadata_loop_analysis = 0x10, 3003 3004 /** Indicates that nir_instr::index values are valid. 3005 * 3006 * The start instruction has index 0 and they increase through a natural 3007 * walk of instructions in blocks in the CFG. The indices my have holes 3008 * after passes such as DCE. 3009 * 3010 * A pass can preserve this metadata type if it never adds or moves any 3011 * instructions (most passes shouldn't preserve this metadata type), but 3012 * can preserve it if it only removes instructions. 3013 */ 3014 nir_metadata_instr_index = 0x20, 3015 3016 /** All metadata 3017 * 3018 * This includes all nir_metadata flags except not_properly_reset. Passes 3019 * which do not change the shader in any way should call 3020 * 3021 * nir_metadata_preserve(impl, nir_metadata_all); 3022 */ 3023 nir_metadata_all = ~nir_metadata_not_properly_reset, 3024 } nir_metadata; 3025 MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_metadata) 3026 3027 typedef struct { 3028 nir_cf_node cf_node; 3029 3030 /** pointer to the function of which this is an implementation */ 3031 struct nir_function *function; 3032 3033 struct exec_list body; /** < list of nir_cf_node */ 3034 3035 nir_block *end_block; 3036 3037 /** list for all local variables in the function */ 3038 struct exec_list locals; 3039 3040 /** list of local registers in the function */ 3041 struct exec_list registers; 3042 3043 /** next available local register index */ 3044 unsigned reg_alloc; 3045 3046 /** next available SSA value index */ 3047 unsigned ssa_alloc; 3048 3049 /* total number of basic blocks, only valid when block_index_dirty = false */ 3050 unsigned num_blocks; 3051 3052 /** True if this nir_function_impl uses structured control-flow 3053 * 3054 * Structured nir_function_impls have different validation rules. 3055 */ 3056 bool structured; 3057 3058 nir_metadata valid_metadata; 3059 } nir_function_impl; 3060 3061 #define nir_foreach_function_temp_variable(var, impl) \ 3062 foreach_list_typed(nir_variable, var, node, &(impl)->locals) 3063 3064 #define nir_foreach_function_temp_variable_safe(var, impl) \ 3065 foreach_list_typed_safe(nir_variable, var, node, &(impl)->locals) 3066 3067 ATTRIBUTE_RETURNS_NONNULL static inline nir_block * nir_start_block(nir_function_impl * impl)3068 nir_start_block(nir_function_impl *impl) 3069 { 3070 return (nir_block *) impl->body.head_sentinel.next; 3071 } 3072 3073 ATTRIBUTE_RETURNS_NONNULL static inline nir_block * nir_impl_last_block(nir_function_impl * impl)3074 nir_impl_last_block(nir_function_impl *impl) 3075 { 3076 return (nir_block *) impl->body.tail_sentinel.prev; 3077 } 3078 3079 static inline nir_cf_node * nir_cf_node_next(nir_cf_node * node)3080 nir_cf_node_next(nir_cf_node *node) 3081 { 3082 struct exec_node *next = exec_node_get_next(&node->node); 3083 if (exec_node_is_tail_sentinel(next)) 3084 return NULL; 3085 else 3086 return exec_node_data(nir_cf_node, next, node); 3087 } 3088 3089 static inline nir_cf_node * nir_cf_node_prev(nir_cf_node * node)3090 nir_cf_node_prev(nir_cf_node *node) 3091 { 3092 struct exec_node *prev = exec_node_get_prev(&node->node); 3093 if (exec_node_is_head_sentinel(prev)) 3094 return NULL; 3095 else 3096 return exec_node_data(nir_cf_node, prev, node); 3097 } 3098 3099 static inline bool nir_cf_node_is_first(const nir_cf_node * node)3100 nir_cf_node_is_first(const nir_cf_node *node) 3101 { 3102 return exec_node_is_head_sentinel(node->node.prev); 3103 } 3104 3105 static inline bool nir_cf_node_is_last(const nir_cf_node * node)3106 nir_cf_node_is_last(const nir_cf_node *node) 3107 { 3108 return exec_node_is_tail_sentinel(node->node.next); 3109 } 3110 NIR_DEFINE_CAST(nir_cf_node_as_block,nir_cf_node,nir_block,cf_node,type,nir_cf_node_block)3111 NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node, 3112 type, nir_cf_node_block) 3113 NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node, 3114 type, nir_cf_node_if) 3115 NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node, 3116 type, nir_cf_node_loop) 3117 NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node, 3118 nir_function_impl, cf_node, type, nir_cf_node_function) 3119 3120 static inline nir_block * 3121 nir_if_first_then_block(nir_if *if_stmt) 3122 { 3123 struct exec_node *head = exec_list_get_head(&if_stmt->then_list); 3124 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3125 } 3126 3127 static inline nir_block * nir_if_last_then_block(nir_if * if_stmt)3128 nir_if_last_then_block(nir_if *if_stmt) 3129 { 3130 struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list); 3131 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 3132 } 3133 3134 static inline nir_block * nir_if_first_else_block(nir_if * if_stmt)3135 nir_if_first_else_block(nir_if *if_stmt) 3136 { 3137 struct exec_node *head = exec_list_get_head(&if_stmt->else_list); 3138 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3139 } 3140 3141 static inline nir_block * nir_if_last_else_block(nir_if * if_stmt)3142 nir_if_last_else_block(nir_if *if_stmt) 3143 { 3144 struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list); 3145 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 3146 } 3147 3148 static inline nir_block * nir_loop_first_block(nir_loop * loop)3149 nir_loop_first_block(nir_loop *loop) 3150 { 3151 struct exec_node *head = exec_list_get_head(&loop->body); 3152 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3153 } 3154 3155 static inline nir_block * nir_loop_last_block(nir_loop * loop)3156 nir_loop_last_block(nir_loop *loop) 3157 { 3158 struct exec_node *tail = exec_list_get_tail(&loop->body); 3159 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 3160 } 3161 3162 /** 3163 * Return true if this list of cf_nodes contains a single empty block. 3164 */ 3165 static inline bool nir_cf_list_is_empty_block(struct exec_list * cf_list)3166 nir_cf_list_is_empty_block(struct exec_list *cf_list) 3167 { 3168 if (exec_list_is_singular(cf_list)) { 3169 struct exec_node *head = exec_list_get_head(cf_list); 3170 nir_block *block = 3171 nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 3172 return exec_list_is_empty(&block->instr_list); 3173 } 3174 return false; 3175 } 3176 3177 typedef struct { 3178 uint8_t num_components; 3179 uint8_t bit_size; 3180 } nir_parameter; 3181 3182 typedef struct nir_function { 3183 struct exec_node node; 3184 3185 const char *name; 3186 struct nir_shader *shader; 3187 3188 unsigned num_params; 3189 nir_parameter *params; 3190 3191 /** The implementation of this function. 3192 * 3193 * If the function is only declared and not implemented, this is NULL. 3194 */ 3195 nir_function_impl *impl; 3196 3197 bool is_entrypoint; 3198 } nir_function; 3199 3200 typedef enum { 3201 nir_lower_imul64 = (1 << 0), 3202 nir_lower_isign64 = (1 << 1), 3203 /** Lower all int64 modulus and division opcodes */ 3204 nir_lower_divmod64 = (1 << 2), 3205 /** Lower all 64-bit umul_high and imul_high opcodes */ 3206 nir_lower_imul_high64 = (1 << 3), 3207 nir_lower_mov64 = (1 << 4), 3208 nir_lower_icmp64 = (1 << 5), 3209 nir_lower_iadd64 = (1 << 6), 3210 nir_lower_iabs64 = (1 << 7), 3211 nir_lower_ineg64 = (1 << 8), 3212 nir_lower_logic64 = (1 << 9), 3213 nir_lower_minmax64 = (1 << 10), 3214 nir_lower_shift64 = (1 << 11), 3215 nir_lower_imul_2x32_64 = (1 << 12), 3216 nir_lower_extract64 = (1 << 13), 3217 nir_lower_ufind_msb64 = (1 << 14), 3218 nir_lower_bit_count64 = (1 << 15), 3219 } nir_lower_int64_options; 3220 3221 typedef enum { 3222 nir_lower_drcp = (1 << 0), 3223 nir_lower_dsqrt = (1 << 1), 3224 nir_lower_drsq = (1 << 2), 3225 nir_lower_dtrunc = (1 << 3), 3226 nir_lower_dfloor = (1 << 4), 3227 nir_lower_dceil = (1 << 5), 3228 nir_lower_dfract = (1 << 6), 3229 nir_lower_dround_even = (1 << 7), 3230 nir_lower_dmod = (1 << 8), 3231 nir_lower_dsub = (1 << 9), 3232 nir_lower_ddiv = (1 << 10), 3233 nir_lower_fp64_full_software = (1 << 11), 3234 } nir_lower_doubles_options; 3235 3236 typedef enum { 3237 nir_divergence_single_prim_per_subgroup = (1 << 0), 3238 nir_divergence_single_patch_per_tcs_subgroup = (1 << 1), 3239 nir_divergence_single_patch_per_tes_subgroup = (1 << 2), 3240 nir_divergence_view_index_uniform = (1 << 3), 3241 } nir_divergence_options; 3242 3243 typedef struct nir_shader_compiler_options { 3244 bool lower_fdiv; 3245 bool lower_ffma16; 3246 bool lower_ffma32; 3247 bool lower_ffma64; 3248 bool fuse_ffma16; 3249 bool fuse_ffma32; 3250 bool fuse_ffma64; 3251 bool lower_flrp16; 3252 bool lower_flrp32; 3253 /** Lowers flrp when it does not support doubles */ 3254 bool lower_flrp64; 3255 bool lower_fpow; 3256 bool lower_fsat; 3257 bool lower_fsqrt; 3258 bool lower_sincos; 3259 bool lower_fmod; 3260 /** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */ 3261 bool lower_bitfield_extract; 3262 /** Lowers ibitfield_extract/ubitfield_extract to compares, shifts. */ 3263 bool lower_bitfield_extract_to_shifts; 3264 /** Lowers bitfield_insert to bfi/bfm */ 3265 bool lower_bitfield_insert; 3266 /** Lowers bitfield_insert to compares, and shifts. */ 3267 bool lower_bitfield_insert_to_shifts; 3268 /** Lowers bitfield_insert to bfm/bitfield_select. */ 3269 bool lower_bitfield_insert_to_bitfield_select; 3270 /** Lowers bitfield_reverse to shifts. */ 3271 bool lower_bitfield_reverse; 3272 /** Lowers bit_count to shifts. */ 3273 bool lower_bit_count; 3274 /** Lowers ifind_msb to compare and ufind_msb */ 3275 bool lower_ifind_msb; 3276 /** Lowers find_lsb to ufind_msb and logic ops */ 3277 bool lower_find_lsb; 3278 bool lower_uadd_carry; 3279 bool lower_usub_borrow; 3280 /** Lowers imul_high/umul_high to 16-bit multiplies and carry operations. */ 3281 bool lower_mul_high; 3282 /** lowers fneg and ineg to fsub and isub. */ 3283 bool lower_negate; 3284 /** lowers fsub and isub to fadd+fneg and iadd+ineg. */ 3285 bool lower_sub; 3286 3287 /* lower {slt,sge,seq,sne} to {flt,fge,feq,fneu} + b2f: */ 3288 bool lower_scmp; 3289 3290 /* lower b/fall_equalN/b/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */ 3291 bool lower_vector_cmp; 3292 3293 /** enable rules to avoid bit ops */ 3294 bool lower_bitops; 3295 3296 /** enables rules to lower isign to imin+imax */ 3297 bool lower_isign; 3298 3299 /** enables rules to lower fsign to fsub and flt */ 3300 bool lower_fsign; 3301 3302 /** enables rules to lower iabs to ineg+imax */ 3303 bool lower_iabs; 3304 3305 /** enable rules that avoid generating umax from signed integer ops */ 3306 bool lower_umax; 3307 3308 /** enable rules that avoid generating umin from signed integer ops */ 3309 bool lower_umin; 3310 3311 /* lower fdph to fdot4 */ 3312 bool lower_fdph; 3313 3314 /** lower fdot to fmul and fsum/fadd. */ 3315 bool lower_fdot; 3316 3317 /* Does the native fdot instruction replicate its result for four 3318 * components? If so, then opt_algebraic_late will turn all fdotN 3319 * instructions into fdotN_replicated instructions. 3320 */ 3321 bool fdot_replicates; 3322 3323 /** lowers ffloor to fsub+ffract: */ 3324 bool lower_ffloor; 3325 3326 /** lowers ffract to fsub+ffloor: */ 3327 bool lower_ffract; 3328 3329 /** lowers fceil to fneg+ffloor+fneg: */ 3330 bool lower_fceil; 3331 3332 bool lower_ftrunc; 3333 3334 bool lower_ldexp; 3335 3336 bool lower_pack_half_2x16; 3337 bool lower_pack_unorm_2x16; 3338 bool lower_pack_snorm_2x16; 3339 bool lower_pack_unorm_4x8; 3340 bool lower_pack_snorm_4x8; 3341 bool lower_pack_64_2x32; 3342 bool lower_pack_64_4x16; 3343 bool lower_pack_32_2x16; 3344 bool lower_pack_64_2x32_split; 3345 bool lower_pack_32_2x16_split; 3346 bool lower_unpack_half_2x16; 3347 bool lower_unpack_unorm_2x16; 3348 bool lower_unpack_snorm_2x16; 3349 bool lower_unpack_unorm_4x8; 3350 bool lower_unpack_snorm_4x8; 3351 bool lower_unpack_64_2x32_split; 3352 bool lower_unpack_32_2x16_split; 3353 3354 bool lower_pack_split; 3355 3356 bool lower_extract_byte; 3357 bool lower_extract_word; 3358 3359 bool lower_all_io_to_temps; 3360 bool lower_all_io_to_elements; 3361 3362 /* Indicates that the driver only has zero-based vertex id */ 3363 bool vertex_id_zero_based; 3364 3365 /** 3366 * If enabled, gl_BaseVertex will be lowered as: 3367 * is_indexed_draw (~0/0) & firstvertex 3368 */ 3369 bool lower_base_vertex; 3370 3371 /** 3372 * If enabled, gl_HelperInvocation will be lowered as: 3373 * 3374 * !((1 << sample_id) & sample_mask_in)) 3375 * 3376 * This depends on some possibly hw implementation details, which may 3377 * not be true for all hw. In particular that the FS is only executed 3378 * for covered samples or for helper invocations. So, do not blindly 3379 * enable this option. 3380 * 3381 * Note: See also issue #22 in ARB_shader_image_load_store 3382 */ 3383 bool lower_helper_invocation; 3384 3385 /** 3386 * Convert gl_SampleMaskIn to gl_HelperInvocation as follows: 3387 * 3388 * gl_SampleMaskIn == 0 ---> gl_HelperInvocation 3389 * gl_SampleMaskIn != 0 ---> !gl_HelperInvocation 3390 */ 3391 bool optimize_sample_mask_in; 3392 3393 bool lower_cs_local_index_from_id; 3394 bool lower_cs_local_id_from_index; 3395 3396 /* Prevents lowering global_invocation_id to be in terms of work_group_id */ 3397 bool has_cs_global_id; 3398 3399 bool lower_device_index_to_zero; 3400 3401 /* Set if nir_lower_pntc_ytransform() should invert gl_PointCoord. 3402 * Either when frame buffer is flipped or GL_POINT_SPRITE_COORD_ORIGIN 3403 * is GL_LOWER_LEFT. 3404 */ 3405 bool lower_wpos_pntc; 3406 3407 /** 3408 * Set if nir_op_[iu]hadd and nir_op_[iu]rhadd instructions should be 3409 * lowered to simple arithmetic. 3410 * 3411 * If this flag is set, the lowering will be applied to all bit-sizes of 3412 * these instructions. 3413 * 3414 * \sa ::lower_hadd64 3415 */ 3416 bool lower_hadd; 3417 3418 /** 3419 * Set if only 64-bit nir_op_[iu]hadd and nir_op_[iu]rhadd instructions 3420 * should be lowered to simple arithmetic. 3421 * 3422 * If this flag is set, the lowering will be applied to only 64-bit 3423 * versions of these instructions. 3424 * 3425 * \sa ::lower_hadd 3426 */ 3427 bool lower_hadd64; 3428 3429 /** 3430 * Set if nir_op_add_sat and nir_op_usub_sat should be lowered to simple 3431 * arithmetic. 3432 * 3433 * If this flag is set, the lowering will be applied to all bit-sizes of 3434 * these instructions. 3435 * 3436 * \sa ::lower_usub_sat64 3437 */ 3438 bool lower_add_sat; 3439 3440 /** 3441 * Set if only 64-bit nir_op_usub_sat should be lowered to simple 3442 * arithmetic. 3443 * 3444 * \sa ::lower_add_sat 3445 */ 3446 bool lower_usub_sat64; 3447 3448 /** 3449 * Should IO be re-vectorized? Some scalar ISAs still operate on vec4's 3450 * for IO purposes and would prefer loads/stores be vectorized. 3451 */ 3452 bool vectorize_io; 3453 bool lower_to_scalar; 3454 3455 /** 3456 * Whether nir_opt_vectorize should only create 16-bit 2D vectors. 3457 */ 3458 bool vectorize_vec2_16bit; 3459 3460 /** 3461 * Should the linker unify inputs_read/outputs_written between adjacent 3462 * shader stages which are linked into a single program? 3463 */ 3464 bool unify_interfaces; 3465 3466 /** 3467 * Should nir_lower_io() create load_interpolated_input intrinsics? 3468 * 3469 * If not, it generates regular load_input intrinsics and interpolation 3470 * information must be inferred from the list of input nir_variables. 3471 */ 3472 bool use_interpolated_input_intrinsics; 3473 3474 3475 /** 3476 * Whether nir_lower_io() will lower interpolateAt functions to 3477 * load_interpolated_input intrinsics. 3478 * 3479 * Unlike use_interpolated_input_intrinsics this will only lower these 3480 * functions and leave input load intrinsics untouched. 3481 */ 3482 bool lower_interpolate_at; 3483 3484 /* Lowers when 32x32->64 bit multiplication is not supported */ 3485 bool lower_mul_2x32_64; 3486 3487 /* Lowers when rotate instruction is not supported */ 3488 bool lower_rotate; 3489 3490 /** 3491 * Backend supports imul24, and would like to use it (when possible) 3492 * for address/offset calculation. If true, driver should call 3493 * nir_lower_amul(). (If not set, amul will automatically be lowered 3494 * to imul.) 3495 */ 3496 bool has_imul24; 3497 3498 /** Backend supports umul24, if not set umul24 will automatically be lowered 3499 * to imul with masked inputs */ 3500 bool has_umul24; 3501 3502 /** Backend supports umad24, if not set umad24 will automatically be lowered 3503 * to imul with masked inputs and iadd */ 3504 bool has_umad24; 3505 3506 /* Whether to generate only scoped_barrier intrinsics instead of the set of 3507 * memory and control barrier intrinsics based on GLSL. 3508 */ 3509 bool use_scoped_barrier; 3510 3511 /** 3512 * Is this the Intel vec4 backend? 3513 * 3514 * Used to inhibit algebraic optimizations that are known to be harmful on 3515 * the Intel vec4 backend. This is generally applicable to any 3516 * optimization that might cause more immediate values to be used in 3517 * 3-source (e.g., ffma and flrp) instructions. 3518 */ 3519 bool intel_vec4; 3520 3521 /** Lower nir_op_ibfe and nir_op_ubfe that have two constant sources. */ 3522 bool lower_bfe_with_two_constants; 3523 3524 /** Whether 8-bit ALU is supported. */ 3525 bool support_8bit_alu; 3526 3527 /** Whether 16-bit ALU is supported. */ 3528 bool support_16bit_alu; 3529 3530 unsigned max_unroll_iterations; 3531 3532 /* For the non-zero value of the enum corresponds multiplier when 3533 * calling lower_uniforms_to_ubo */ 3534 bool lower_uniforms_to_ubo; 3535 3536 nir_lower_int64_options lower_int64_options; 3537 nir_lower_doubles_options lower_doubles_options; 3538 nir_divergence_options divergence_analysis_options; 3539 } nir_shader_compiler_options; 3540 3541 typedef struct nir_shader { 3542 /** list of uniforms (nir_variable) */ 3543 struct exec_list variables; 3544 3545 /** Set of driver-specific options for the shader. 3546 * 3547 * The memory for the options is expected to be kept in a single static 3548 * copy by the driver. 3549 */ 3550 const struct nir_shader_compiler_options *options; 3551 3552 /** Various bits of compile-time information about a given shader */ 3553 struct shader_info info; 3554 3555 struct exec_list functions; /** < list of nir_function */ 3556 3557 /** 3558 * The size of the variable space for load_input_*, load_uniform_*, etc. 3559 * intrinsics. This is in back-end specific units which is likely one of 3560 * bytes, dwords, or vec4s depending on context and back-end. 3561 */ 3562 unsigned num_inputs, num_uniforms, num_outputs; 3563 3564 /** Size in bytes of required shared memory */ 3565 unsigned shared_size; 3566 3567 /** Size in bytes of required scratch space */ 3568 unsigned scratch_size; 3569 3570 /** Constant data associated with this shader. 3571 * 3572 * Constant data is loaded through load_constant intrinsics (as compared to 3573 * the NIR load_const instructions which have the constant value inlined 3574 * into them). This is usually generated by nir_opt_large_constants (so 3575 * shaders don't have to load_const into a temporary array when they want 3576 * to indirect on a const array). 3577 */ 3578 void *constant_data; 3579 /** Size of the constant data associated with the shader, in bytes */ 3580 unsigned constant_data_size; 3581 } nir_shader; 3582 3583 #define nir_foreach_function(func, shader) \ 3584 foreach_list_typed(nir_function, func, node, &(shader)->functions) 3585 3586 static inline nir_function_impl * nir_shader_get_entrypoint(nir_shader * shader)3587 nir_shader_get_entrypoint(nir_shader *shader) 3588 { 3589 nir_function *func = NULL; 3590 3591 nir_foreach_function(function, shader) { 3592 assert(func == NULL); 3593 if (function->is_entrypoint) { 3594 func = function; 3595 #ifndef NDEBUG 3596 break; 3597 #endif 3598 } 3599 } 3600 3601 if (!func) 3602 return NULL; 3603 3604 assert(func->num_params == 0); 3605 assert(func->impl); 3606 return func->impl; 3607 } 3608 3609 typedef struct nir_liveness_bounds { 3610 uint32_t start; 3611 uint32_t end; 3612 } nir_liveness_bounds; 3613 3614 typedef struct nir_instr_liveness { 3615 /** 3616 * nir_instr->index for the start and end of a single live interval for SSA 3617 * defs. ssa values last used by a nir_if condition will have an interval 3618 * ending at the first instruction after the last one before the if 3619 * condition. 3620 * 3621 * Indexed by def->index (impl->ssa_alloc elements). 3622 */ 3623 struct nir_liveness_bounds *defs; 3624 } nir_instr_liveness; 3625 3626 nir_instr_liveness * 3627 nir_live_ssa_defs_per_instr(nir_function_impl *impl); 3628 3629 nir_shader *nir_shader_create(void *mem_ctx, 3630 gl_shader_stage stage, 3631 const nir_shader_compiler_options *options, 3632 shader_info *si); 3633 3634 nir_register *nir_local_reg_create(nir_function_impl *impl); 3635 3636 void nir_reg_remove(nir_register *reg); 3637 3638 /** Adds a variable to the appropriate list in nir_shader */ 3639 void nir_shader_add_variable(nir_shader *shader, nir_variable *var); 3640 3641 static inline void nir_function_impl_add_variable(nir_function_impl * impl,nir_variable * var)3642 nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var) 3643 { 3644 assert(var->data.mode == nir_var_function_temp); 3645 exec_list_push_tail(&impl->locals, &var->node); 3646 } 3647 3648 /** creates a variable, sets a few defaults, and adds it to the list */ 3649 nir_variable *nir_variable_create(nir_shader *shader, 3650 nir_variable_mode mode, 3651 const struct glsl_type *type, 3652 const char *name); 3653 /** creates a local variable and adds it to the list */ 3654 nir_variable *nir_local_variable_create(nir_function_impl *impl, 3655 const struct glsl_type *type, 3656 const char *name); 3657 3658 nir_variable *nir_find_variable_with_location(nir_shader *shader, 3659 nir_variable_mode mode, 3660 unsigned location); 3661 3662 nir_variable *nir_find_variable_with_driver_location(nir_shader *shader, 3663 nir_variable_mode mode, 3664 unsigned location); 3665 3666 /** creates a function and adds it to the shader's list of functions */ 3667 nir_function *nir_function_create(nir_shader *shader, const char *name); 3668 3669 nir_function_impl *nir_function_impl_create(nir_function *func); 3670 /** creates a function_impl that isn't tied to any particular function */ 3671 nir_function_impl *nir_function_impl_create_bare(nir_shader *shader); 3672 3673 nir_block *nir_block_create(nir_shader *shader); 3674 nir_if *nir_if_create(nir_shader *shader); 3675 nir_loop *nir_loop_create(nir_shader *shader); 3676 3677 nir_function_impl *nir_cf_node_get_function(nir_cf_node *node); 3678 3679 /** requests that the given pieces of metadata be generated */ 3680 void nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...); 3681 /** dirties all but the preserved metadata */ 3682 void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved); 3683 /** Preserves all metadata for the given shader */ 3684 void nir_shader_preserve_all_metadata(nir_shader *shader); 3685 3686 /** creates an instruction with default swizzle/writemask/etc. with NULL registers */ 3687 nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op); 3688 3689 nir_deref_instr *nir_deref_instr_create(nir_shader *shader, 3690 nir_deref_type deref_type); 3691 3692 nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type); 3693 3694 nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader, 3695 unsigned num_components, 3696 unsigned bit_size); 3697 3698 nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader, 3699 nir_intrinsic_op op); 3700 3701 nir_call_instr *nir_call_instr_create(nir_shader *shader, 3702 nir_function *callee); 3703 3704 nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs); 3705 3706 nir_phi_instr *nir_phi_instr_create(nir_shader *shader); 3707 3708 nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader); 3709 3710 nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader, 3711 unsigned num_components, 3712 unsigned bit_size); 3713 3714 nir_const_value nir_alu_binop_identity(nir_op binop, unsigned bit_size); 3715 3716 /** 3717 * NIR Cursors and Instruction Insertion API 3718 * @{ 3719 * 3720 * A tiny struct representing a point to insert/extract instructions or 3721 * control flow nodes. Helps reduce the combinatorial explosion of possible 3722 * points to insert/extract. 3723 * 3724 * \sa nir_control_flow.h 3725 */ 3726 typedef enum { 3727 nir_cursor_before_block, 3728 nir_cursor_after_block, 3729 nir_cursor_before_instr, 3730 nir_cursor_after_instr, 3731 } nir_cursor_option; 3732 3733 typedef struct { 3734 nir_cursor_option option; 3735 union { 3736 nir_block *block; 3737 nir_instr *instr; 3738 }; 3739 } nir_cursor; 3740 3741 static inline nir_block * nir_cursor_current_block(nir_cursor cursor)3742 nir_cursor_current_block(nir_cursor cursor) 3743 { 3744 if (cursor.option == nir_cursor_before_instr || 3745 cursor.option == nir_cursor_after_instr) { 3746 return cursor.instr->block; 3747 } else { 3748 return cursor.block; 3749 } 3750 } 3751 3752 bool nir_cursors_equal(nir_cursor a, nir_cursor b); 3753 3754 static inline nir_cursor nir_before_block(nir_block * block)3755 nir_before_block(nir_block *block) 3756 { 3757 nir_cursor cursor; 3758 cursor.option = nir_cursor_before_block; 3759 cursor.block = block; 3760 return cursor; 3761 } 3762 3763 static inline nir_cursor nir_after_block(nir_block * block)3764 nir_after_block(nir_block *block) 3765 { 3766 nir_cursor cursor; 3767 cursor.option = nir_cursor_after_block; 3768 cursor.block = block; 3769 return cursor; 3770 } 3771 3772 static inline nir_cursor nir_before_instr(nir_instr * instr)3773 nir_before_instr(nir_instr *instr) 3774 { 3775 nir_cursor cursor; 3776 cursor.option = nir_cursor_before_instr; 3777 cursor.instr = instr; 3778 return cursor; 3779 } 3780 3781 static inline nir_cursor nir_after_instr(nir_instr * instr)3782 nir_after_instr(nir_instr *instr) 3783 { 3784 nir_cursor cursor; 3785 cursor.option = nir_cursor_after_instr; 3786 cursor.instr = instr; 3787 return cursor; 3788 } 3789 3790 static inline nir_cursor nir_after_block_before_jump(nir_block * block)3791 nir_after_block_before_jump(nir_block *block) 3792 { 3793 nir_instr *last_instr = nir_block_last_instr(block); 3794 if (last_instr && last_instr->type == nir_instr_type_jump) { 3795 return nir_before_instr(last_instr); 3796 } else { 3797 return nir_after_block(block); 3798 } 3799 } 3800 3801 static inline nir_cursor nir_before_src(nir_src * src,bool is_if_condition)3802 nir_before_src(nir_src *src, bool is_if_condition) 3803 { 3804 if (is_if_condition) { 3805 nir_block *prev_block = 3806 nir_cf_node_as_block(nir_cf_node_prev(&src->parent_if->cf_node)); 3807 assert(!nir_block_ends_in_jump(prev_block)); 3808 return nir_after_block(prev_block); 3809 } else if (src->parent_instr->type == nir_instr_type_phi) { 3810 #ifndef NDEBUG 3811 nir_phi_instr *cond_phi = nir_instr_as_phi(src->parent_instr); 3812 bool found = false; 3813 nir_foreach_phi_src(phi_src, cond_phi) { 3814 if (phi_src->src.ssa == src->ssa) { 3815 found = true; 3816 break; 3817 } 3818 } 3819 assert(found); 3820 #endif 3821 /* The LIST_ENTRY macro is a generic container-of macro, it just happens 3822 * to have a more specific name. 3823 */ 3824 nir_phi_src *phi_src = LIST_ENTRY(nir_phi_src, src, src); 3825 return nir_after_block_before_jump(phi_src->pred); 3826 } else { 3827 return nir_before_instr(src->parent_instr); 3828 } 3829 } 3830 3831 static inline nir_cursor nir_before_cf_node(nir_cf_node * node)3832 nir_before_cf_node(nir_cf_node *node) 3833 { 3834 if (node->type == nir_cf_node_block) 3835 return nir_before_block(nir_cf_node_as_block(node)); 3836 3837 return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node))); 3838 } 3839 3840 static inline nir_cursor nir_after_cf_node(nir_cf_node * node)3841 nir_after_cf_node(nir_cf_node *node) 3842 { 3843 if (node->type == nir_cf_node_block) 3844 return nir_after_block(nir_cf_node_as_block(node)); 3845 3846 return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node))); 3847 } 3848 3849 static inline nir_cursor nir_after_phis(nir_block * block)3850 nir_after_phis(nir_block *block) 3851 { 3852 nir_foreach_instr(instr, block) { 3853 if (instr->type != nir_instr_type_phi) 3854 return nir_before_instr(instr); 3855 } 3856 return nir_after_block(block); 3857 } 3858 3859 static inline nir_cursor nir_after_cf_node_and_phis(nir_cf_node * node)3860 nir_after_cf_node_and_phis(nir_cf_node *node) 3861 { 3862 if (node->type == nir_cf_node_block) 3863 return nir_after_block(nir_cf_node_as_block(node)); 3864 3865 nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node)); 3866 3867 return nir_after_phis(block); 3868 } 3869 3870 static inline nir_cursor nir_before_cf_list(struct exec_list * cf_list)3871 nir_before_cf_list(struct exec_list *cf_list) 3872 { 3873 nir_cf_node *first_node = exec_node_data(nir_cf_node, 3874 exec_list_get_head(cf_list), node); 3875 return nir_before_cf_node(first_node); 3876 } 3877 3878 static inline nir_cursor nir_after_cf_list(struct exec_list * cf_list)3879 nir_after_cf_list(struct exec_list *cf_list) 3880 { 3881 nir_cf_node *last_node = exec_node_data(nir_cf_node, 3882 exec_list_get_tail(cf_list), node); 3883 return nir_after_cf_node(last_node); 3884 } 3885 3886 /** 3887 * Insert a NIR instruction at the given cursor. 3888 * 3889 * Note: This does not update the cursor. 3890 */ 3891 void nir_instr_insert(nir_cursor cursor, nir_instr *instr); 3892 3893 static inline void nir_instr_insert_before(nir_instr * instr,nir_instr * before)3894 nir_instr_insert_before(nir_instr *instr, nir_instr *before) 3895 { 3896 nir_instr_insert(nir_before_instr(instr), before); 3897 } 3898 3899 static inline void nir_instr_insert_after(nir_instr * instr,nir_instr * after)3900 nir_instr_insert_after(nir_instr *instr, nir_instr *after) 3901 { 3902 nir_instr_insert(nir_after_instr(instr), after); 3903 } 3904 3905 static inline void nir_instr_insert_before_block(nir_block * block,nir_instr * before)3906 nir_instr_insert_before_block(nir_block *block, nir_instr *before) 3907 { 3908 nir_instr_insert(nir_before_block(block), before); 3909 } 3910 3911 static inline void nir_instr_insert_after_block(nir_block * block,nir_instr * after)3912 nir_instr_insert_after_block(nir_block *block, nir_instr *after) 3913 { 3914 nir_instr_insert(nir_after_block(block), after); 3915 } 3916 3917 static inline void nir_instr_insert_before_cf(nir_cf_node * node,nir_instr * before)3918 nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before) 3919 { 3920 nir_instr_insert(nir_before_cf_node(node), before); 3921 } 3922 3923 static inline void nir_instr_insert_after_cf(nir_cf_node * node,nir_instr * after)3924 nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after) 3925 { 3926 nir_instr_insert(nir_after_cf_node(node), after); 3927 } 3928 3929 static inline void nir_instr_insert_before_cf_list(struct exec_list * list,nir_instr * before)3930 nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before) 3931 { 3932 nir_instr_insert(nir_before_cf_list(list), before); 3933 } 3934 3935 static inline void nir_instr_insert_after_cf_list(struct exec_list * list,nir_instr * after)3936 nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after) 3937 { 3938 nir_instr_insert(nir_after_cf_list(list), after); 3939 } 3940 3941 void nir_instr_remove_v(nir_instr *instr); 3942 3943 static inline nir_cursor nir_instr_remove(nir_instr * instr)3944 nir_instr_remove(nir_instr *instr) 3945 { 3946 nir_cursor cursor; 3947 nir_instr *prev = nir_instr_prev(instr); 3948 if (prev) { 3949 cursor = nir_after_instr(prev); 3950 } else { 3951 cursor = nir_before_block(instr->block); 3952 } 3953 nir_instr_remove_v(instr); 3954 return cursor; 3955 } 3956 3957 /** @} */ 3958 3959 nir_ssa_def *nir_instr_ssa_def(nir_instr *instr); 3960 3961 typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state); 3962 typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state); 3963 typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state); 3964 bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, 3965 void *state); 3966 bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state); 3967 bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); 3968 bool nir_foreach_phi_src_leaving_block(nir_block *instr, 3969 nir_foreach_src_cb cb, 3970 void *state); 3971 3972 nir_const_value *nir_src_as_const_value(nir_src src); 3973 3974 #define NIR_SRC_AS_(name, c_type, type_enum, cast_macro) \ 3975 static inline c_type * \ 3976 nir_src_as_ ## name (nir_src src) \ 3977 { \ 3978 return src.is_ssa && src.ssa->parent_instr->type == type_enum \ 3979 ? cast_macro(src.ssa->parent_instr) : NULL; \ 3980 } 3981 3982 NIR_SRC_AS_(alu_instr, nir_alu_instr, nir_instr_type_alu, nir_instr_as_alu) 3983 NIR_SRC_AS_(intrinsic, nir_intrinsic_instr, 3984 nir_instr_type_intrinsic, nir_instr_as_intrinsic) 3985 NIR_SRC_AS_(deref, nir_deref_instr, nir_instr_type_deref, nir_instr_as_deref) 3986 3987 bool nir_src_is_dynamically_uniform(nir_src src); 3988 bool nir_srcs_equal(nir_src src1, nir_src src2); 3989 bool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2); 3990 void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src); 3991 void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src); 3992 void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src); 3993 void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, 3994 nir_dest new_dest); 3995 3996 void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, 3997 unsigned num_components, unsigned bit_size, 3998 const char *name); 3999 void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, 4000 unsigned num_components, unsigned bit_size, 4001 const char *name); 4002 static inline void nir_ssa_dest_init_for_type(nir_instr * instr,nir_dest * dest,const struct glsl_type * type,const char * name)4003 nir_ssa_dest_init_for_type(nir_instr *instr, nir_dest *dest, 4004 const struct glsl_type *type, 4005 const char *name) 4006 { 4007 assert(glsl_type_is_vector_or_scalar(type)); 4008 nir_ssa_dest_init(instr, dest, glsl_get_components(type), 4009 glsl_get_bit_size(type), name); 4010 } 4011 void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src); 4012 void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, 4013 nir_instr *after_me); 4014 4015 nir_component_mask_t nir_ssa_def_components_read(const nir_ssa_def *def); 4016 4017 4018 /** Returns the next block, disregarding structure 4019 * 4020 * The ordering is deterministic but has no guarantees beyond that. In 4021 * particular, it is not guaranteed to be dominance-preserving. 4022 */ 4023 nir_block *nir_block_unstructured_next(nir_block *block); 4024 nir_block *nir_unstructured_start_block(nir_function_impl *impl); 4025 4026 #define nir_foreach_block_unstructured(block, impl) \ 4027 for (nir_block *block = nir_unstructured_start_block(impl); block != NULL; \ 4028 block = nir_block_unstructured_next(block)) 4029 4030 #define nir_foreach_block_unstructured_safe(block, impl) \ 4031 for (nir_block *block = nir_unstructured_start_block(impl), \ 4032 *next = nir_block_unstructured_next(block); \ 4033 block != NULL; \ 4034 block = next, next = nir_block_unstructured_next(block)) 4035 4036 /* 4037 * finds the next basic block in source-code order, returns NULL if there is 4038 * none 4039 */ 4040 4041 nir_block *nir_block_cf_tree_next(nir_block *block); 4042 4043 /* Performs the opposite of nir_block_cf_tree_next() */ 4044 4045 nir_block *nir_block_cf_tree_prev(nir_block *block); 4046 4047 /* Gets the first block in a CF node in source-code order */ 4048 4049 nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node); 4050 4051 /* Gets the last block in a CF node in source-code order */ 4052 4053 nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node); 4054 4055 /* Gets the next block after a CF node in source-code order */ 4056 4057 nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node); 4058 4059 /* Macros for loops that visit blocks in source-code order */ 4060 4061 #define nir_foreach_block(block, impl) \ 4062 for (nir_block *block = nir_start_block(impl); block != NULL; \ 4063 block = nir_block_cf_tree_next(block)) 4064 4065 #define nir_foreach_block_safe(block, impl) \ 4066 for (nir_block *block = nir_start_block(impl), \ 4067 *next = nir_block_cf_tree_next(block); \ 4068 block != NULL; \ 4069 block = next, next = nir_block_cf_tree_next(block)) 4070 4071 #define nir_foreach_block_reverse(block, impl) \ 4072 for (nir_block *block = nir_impl_last_block(impl); block != NULL; \ 4073 block = nir_block_cf_tree_prev(block)) 4074 4075 #define nir_foreach_block_reverse_safe(block, impl) \ 4076 for (nir_block *block = nir_impl_last_block(impl), \ 4077 *prev = nir_block_cf_tree_prev(block); \ 4078 block != NULL; \ 4079 block = prev, prev = nir_block_cf_tree_prev(block)) 4080 4081 #define nir_foreach_block_in_cf_node(block, node) \ 4082 for (nir_block *block = nir_cf_node_cf_tree_first(node); \ 4083 block != nir_cf_node_cf_tree_next(node); \ 4084 block = nir_block_cf_tree_next(block)) 4085 4086 /* If the following CF node is an if, this function returns that if. 4087 * Otherwise, it returns NULL. 4088 */ 4089 nir_if *nir_block_get_following_if(nir_block *block); 4090 4091 nir_loop *nir_block_get_following_loop(nir_block *block); 4092 4093 void nir_index_local_regs(nir_function_impl *impl); 4094 void nir_index_ssa_defs(nir_function_impl *impl); 4095 unsigned nir_index_instrs(nir_function_impl *impl); 4096 4097 void nir_index_blocks(nir_function_impl *impl); 4098 4099 unsigned nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes); 4100 unsigned nir_function_impl_index_vars(nir_function_impl *impl); 4101 4102 void nir_print_shader(nir_shader *shader, FILE *fp); 4103 void nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors); 4104 void nir_print_instr(const nir_instr *instr, FILE *fp); 4105 void nir_print_deref(const nir_deref_instr *deref, FILE *fp); 4106 4107 /** Shallow clone of a single instruction. */ 4108 nir_instr *nir_instr_clone(nir_shader *s, const nir_instr *orig); 4109 4110 /** Shallow clone of a single ALU instruction. */ 4111 nir_alu_instr *nir_alu_instr_clone(nir_shader *s, const nir_alu_instr *orig); 4112 4113 nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s); 4114 nir_function_impl *nir_function_impl_clone(nir_shader *shader, 4115 const nir_function_impl *fi); 4116 nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var); 4117 nir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader); 4118 4119 void nir_shader_replace(nir_shader *dest, nir_shader *src); 4120 4121 void nir_shader_serialize_deserialize(nir_shader *s); 4122 4123 #ifndef NDEBUG 4124 void nir_validate_shader(nir_shader *shader, const char *when); 4125 void nir_validate_ssa_dominance(nir_shader *shader, const char *when); 4126 void nir_metadata_set_validation_flag(nir_shader *shader); 4127 void nir_metadata_check_validation_flag(nir_shader *shader); 4128 4129 static inline bool should_skip_nir(const char * name)4130 should_skip_nir(const char *name) 4131 { 4132 static const char *list = NULL; 4133 if (!list) { 4134 /* Comma separated list of names to skip. */ 4135 list = getenv("NIR_SKIP"); 4136 if (!list) 4137 list = ""; 4138 } 4139 4140 if (!list[0]) 4141 return false; 4142 4143 return comma_separated_list_contains(list, name); 4144 } 4145 4146 static inline bool should_clone_nir(void)4147 should_clone_nir(void) 4148 { 4149 static int should_clone = -1; 4150 if (should_clone < 0) 4151 should_clone = env_var_as_boolean("NIR_TEST_CLONE", false); 4152 4153 return should_clone; 4154 } 4155 4156 static inline bool should_serialize_deserialize_nir(void)4157 should_serialize_deserialize_nir(void) 4158 { 4159 static int test_serialize = -1; 4160 if (test_serialize < 0) 4161 test_serialize = env_var_as_boolean("NIR_TEST_SERIALIZE", false); 4162 4163 return test_serialize; 4164 } 4165 4166 static inline bool should_print_nir(nir_shader * shader)4167 should_print_nir(nir_shader *shader) 4168 { 4169 static int should_print = -1; 4170 if (should_print < 0) 4171 should_print = env_var_as_unsigned("NIR_PRINT", 0); 4172 4173 if (should_print == 1) 4174 return !shader->info.internal; 4175 4176 return should_print; 4177 } 4178 #else nir_validate_shader(nir_shader * shader,const char * when)4179 static inline void nir_validate_shader(nir_shader *shader, const char *when) { (void) shader; (void)when; } nir_validate_ssa_dominance(nir_shader * shader,const char * when)4180 static inline void nir_validate_ssa_dominance(nir_shader *shader, const char *when) { (void) shader; (void)when; } nir_metadata_set_validation_flag(nir_shader * shader)4181 static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; } nir_metadata_check_validation_flag(nir_shader * shader)4182 static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; } should_skip_nir(UNUSED const char * pass_name)4183 static inline bool should_skip_nir(UNUSED const char *pass_name) { return false; } should_clone_nir(void)4184 static inline bool should_clone_nir(void) { return false; } should_serialize_deserialize_nir(void)4185 static inline bool should_serialize_deserialize_nir(void) { return false; } should_print_nir(nir_shader * shader)4186 static inline bool should_print_nir(nir_shader *shader) { return false; } 4187 #endif /* NDEBUG */ 4188 4189 #define _PASS(pass, nir, do_pass) do { \ 4190 if (should_skip_nir(#pass)) { \ 4191 printf("skipping %s\n", #pass); \ 4192 break; \ 4193 } \ 4194 do_pass \ 4195 if (should_clone_nir()) { \ 4196 nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \ 4197 nir_shader_replace(nir, clone); \ 4198 } \ 4199 if (should_serialize_deserialize_nir()) { \ 4200 nir_shader_serialize_deserialize(nir); \ 4201 } \ 4202 } while (0) 4203 4204 #define NIR_PASS(progress, nir, pass, ...) _PASS(pass, nir, \ 4205 nir_metadata_set_validation_flag(nir); \ 4206 if (should_print_nir(nir)) \ 4207 printf("%s\n", #pass); \ 4208 if (pass(nir, ##__VA_ARGS__)) { \ 4209 nir_validate_shader(nir, "after " #pass); \ 4210 progress = true; \ 4211 if (should_print_nir(nir)) \ 4212 nir_print_shader(nir, stdout); \ 4213 nir_metadata_check_validation_flag(nir); \ 4214 } \ 4215 ) 4216 4217 #define NIR_PASS_V(nir, pass, ...) _PASS(pass, nir, \ 4218 if (should_print_nir(nir)) \ 4219 printf("%s\n", #pass); \ 4220 pass(nir, ##__VA_ARGS__); \ 4221 nir_validate_shader(nir, "after " #pass); \ 4222 if (should_print_nir(nir)) \ 4223 nir_print_shader(nir, stdout); \ 4224 ) 4225 4226 #define NIR_SKIP(name) should_skip_nir(#name) 4227 4228 /** An instruction filtering callback 4229 * 4230 * Returns true if the instruction should be processed and false otherwise. 4231 */ 4232 typedef bool (*nir_instr_filter_cb)(const nir_instr *, const void *); 4233 4234 /** A simple instruction lowering callback 4235 * 4236 * Many instruction lowering passes can be written as a simple function which 4237 * takes an instruction as its input and returns a sequence of instructions 4238 * that implement the consumed instruction. This function type represents 4239 * such a lowering function. When called, a function with this prototype 4240 * should either return NULL indicating that no lowering needs to be done or 4241 * emit a sequence of instructions using the provided builder (whose cursor 4242 * will already be placed after the instruction to be lowered) and return the 4243 * resulting nir_ssa_def. 4244 */ 4245 typedef nir_ssa_def *(*nir_lower_instr_cb)(struct nir_builder *, 4246 nir_instr *, void *); 4247 4248 /** 4249 * Special return value for nir_lower_instr_cb when some progress occurred 4250 * (like changing an input to the instr) that didn't result in a replacement 4251 * SSA def being generated. 4252 */ 4253 #define NIR_LOWER_INSTR_PROGRESS ((nir_ssa_def *)(uintptr_t)1) 4254 4255 /** Iterate over all the instructions in a nir_function_impl and lower them 4256 * using the provided callbacks 4257 * 4258 * This function implements the guts of a standard lowering pass for you. It 4259 * iterates over all of the instructions in a nir_function_impl and calls the 4260 * filter callback on each one. If the filter callback returns true, it then 4261 * calls the lowering call back on the instruction. (Splitting it this way 4262 * allows us to avoid some save/restore work for instructions we know won't be 4263 * lowered.) If the instruction is dead after the lowering is complete, it 4264 * will be removed. If new instructions are added, the lowering callback will 4265 * also be called on them in case multiple lowerings are required. 4266 * 4267 * The metadata for the nir_function_impl will also be updated. If any blocks 4268 * are added (they cannot be removed), dominance and block indices will be 4269 * invalidated. 4270 */ 4271 bool nir_function_impl_lower_instructions(nir_function_impl *impl, 4272 nir_instr_filter_cb filter, 4273 nir_lower_instr_cb lower, 4274 void *cb_data); 4275 bool nir_shader_lower_instructions(nir_shader *shader, 4276 nir_instr_filter_cb filter, 4277 nir_lower_instr_cb lower, 4278 void *cb_data); 4279 4280 void nir_calc_dominance_impl(nir_function_impl *impl); 4281 void nir_calc_dominance(nir_shader *shader); 4282 4283 nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2); 4284 bool nir_block_dominates(nir_block *parent, nir_block *child); 4285 bool nir_block_is_unreachable(nir_block *block); 4286 4287 void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp); 4288 void nir_dump_dom_tree(nir_shader *shader, FILE *fp); 4289 4290 void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp); 4291 void nir_dump_dom_frontier(nir_shader *shader, FILE *fp); 4292 4293 void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp); 4294 void nir_dump_cfg(nir_shader *shader, FILE *fp); 4295 4296 void nir_gs_count_vertices_and_primitives(const nir_shader *shader, 4297 int *out_vtxcnt, 4298 int *out_prmcnt, 4299 unsigned num_streams); 4300 4301 bool nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes); 4302 bool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes); 4303 bool nir_split_var_copies(nir_shader *shader); 4304 bool nir_split_per_member_structs(nir_shader *shader); 4305 bool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes); 4306 4307 bool nir_lower_returns_impl(nir_function_impl *impl); 4308 bool nir_lower_returns(nir_shader *shader); 4309 4310 void nir_inline_function_impl(struct nir_builder *b, 4311 const nir_function_impl *impl, 4312 nir_ssa_def **params, 4313 struct hash_table *shader_var_remap); 4314 bool nir_inline_functions(nir_shader *shader); 4315 4316 void nir_find_inlinable_uniforms(nir_shader *shader); 4317 void nir_inline_uniforms(nir_shader *shader, unsigned num_uniforms, 4318 const uint32_t *uniform_values, 4319 const uint16_t *uniform_dw_offsets); 4320 4321 bool nir_propagate_invariant(nir_shader *shader); 4322 4323 void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader); 4324 void nir_lower_deref_copy_instr(struct nir_builder *b, 4325 nir_intrinsic_instr *copy); 4326 bool nir_lower_var_copies(nir_shader *shader); 4327 4328 bool nir_opt_memcpy(nir_shader *shader); 4329 bool nir_lower_memcpy(nir_shader *shader); 4330 4331 void nir_fixup_deref_modes(nir_shader *shader); 4332 4333 bool nir_lower_global_vars_to_local(nir_shader *shader); 4334 4335 typedef enum { 4336 nir_lower_direct_array_deref_of_vec_load = (1 << 0), 4337 nir_lower_indirect_array_deref_of_vec_load = (1 << 1), 4338 nir_lower_direct_array_deref_of_vec_store = (1 << 2), 4339 nir_lower_indirect_array_deref_of_vec_store = (1 << 3), 4340 } nir_lower_array_deref_of_vec_options; 4341 4342 bool nir_lower_array_deref_of_vec(nir_shader *shader, nir_variable_mode modes, 4343 nir_lower_array_deref_of_vec_options options); 4344 4345 bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes, 4346 uint32_t max_lower_array_len); 4347 4348 bool nir_lower_locals_to_regs(nir_shader *shader); 4349 4350 void nir_lower_io_to_temporaries(nir_shader *shader, 4351 nir_function_impl *entrypoint, 4352 bool outputs, bool inputs); 4353 4354 bool nir_lower_vars_to_scratch(nir_shader *shader, 4355 nir_variable_mode modes, 4356 int size_threshold, 4357 glsl_type_size_align_func size_align); 4358 4359 void nir_lower_clip_halfz(nir_shader *shader); 4360 4361 void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); 4362 4363 void nir_gather_ssa_types(nir_function_impl *impl, 4364 BITSET_WORD *float_types, 4365 BITSET_WORD *int_types); 4366 4367 void nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode, 4368 unsigned *size, 4369 int (*type_size)(const struct glsl_type *, bool)); 4370 4371 /* Some helpers to do very simple linking */ 4372 bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer); 4373 bool nir_remove_unused_io_vars(nir_shader *shader, nir_variable_mode mode, 4374 uint64_t *used_by_other_stage, 4375 uint64_t *used_by_other_stage_patches); 4376 void nir_compact_varyings(nir_shader *producer, nir_shader *consumer, 4377 bool default_to_smooth_interp); 4378 void nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer); 4379 bool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer); 4380 4381 bool nir_lower_amul(nir_shader *shader, 4382 int (*type_size)(const struct glsl_type *, bool)); 4383 4384 bool nir_lower_ubo_vec4(nir_shader *shader); 4385 4386 void nir_assign_io_var_locations(nir_shader *shader, 4387 nir_variable_mode mode, 4388 unsigned *size, 4389 gl_shader_stage stage); 4390 4391 typedef struct { 4392 uint8_t num_linked_io_vars; 4393 uint8_t num_linked_patch_io_vars; 4394 } nir_linked_io_var_info; 4395 4396 nir_linked_io_var_info 4397 nir_assign_linked_io_var_locations(nir_shader *producer, 4398 nir_shader *consumer); 4399 4400 typedef enum { 4401 /* If set, this causes all 64-bit IO operations to be lowered on-the-fly 4402 * to 32-bit operations. This is only valid for nir_var_shader_in/out 4403 * modes. 4404 */ 4405 nir_lower_io_lower_64bit_to_32 = (1 << 0), 4406 4407 /* If set, this forces all non-flat fragment shader inputs to be 4408 * interpolated as if with the "sample" qualifier. This requires 4409 * nir_shader_compiler_options::use_interpolated_input_intrinsics. 4410 */ 4411 nir_lower_io_force_sample_interpolation = (1 << 1), 4412 } nir_lower_io_options; 4413 bool nir_lower_io(nir_shader *shader, 4414 nir_variable_mode modes, 4415 int (*type_size)(const struct glsl_type *, bool), 4416 nir_lower_io_options); 4417 4418 bool nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes); 4419 4420 bool 4421 nir_lower_vars_to_explicit_types(nir_shader *shader, 4422 nir_variable_mode modes, 4423 glsl_type_size_align_func type_info); 4424 4425 bool nir_lower_mem_constant_vars(nir_shader *shader, 4426 glsl_type_size_align_func type_info); 4427 4428 bool nir_lower_vec3_to_vec4(nir_shader *shader, nir_variable_mode modes); 4429 4430 typedef enum { 4431 /** 4432 * An address format which is a simple 32-bit global GPU address. 4433 */ 4434 nir_address_format_32bit_global, 4435 4436 /** 4437 * An address format which is a simple 64-bit global GPU address. 4438 */ 4439 nir_address_format_64bit_global, 4440 4441 /** 4442 * An address format which is a bounds-checked 64-bit global GPU address. 4443 * 4444 * The address is comprised as a 32-bit vec4 where .xy are a uint64_t base 4445 * address stored with the low bits in .x and high bits in .y, .z is a 4446 * size, and .w is an offset. When the final I/O operation is lowered, .w 4447 * is checked against .z and the operation is predicated on the result. 4448 */ 4449 nir_address_format_64bit_bounded_global, 4450 4451 /** 4452 * An address format which is comprised of a vec2 where the first 4453 * component is a buffer index and the second is an offset. 4454 */ 4455 nir_address_format_32bit_index_offset, 4456 4457 /** 4458 * An address format which is a 64-bit value, where the high 32 bits 4459 * are a buffer index, and the low 32 bits are an offset. 4460 */ 4461 nir_address_format_32bit_index_offset_pack64, 4462 4463 /** 4464 * An address format which is comprised of a vec3 where the first two 4465 * components specify the buffer and the third is an offset. 4466 */ 4467 nir_address_format_vec2_index_32bit_offset, 4468 4469 /** 4470 * An address format which represents generic pointers with a 62-bit 4471 * pointer and a 2-bit enum in the top two bits. The top two bits have 4472 * the following meanings: 4473 * 4474 * - 0x0: Global memory 4475 * - 0x1: Shared memory 4476 * - 0x2: Scratch memory 4477 * - 0x3: Global memory 4478 * 4479 * The redundancy between 0x0 and 0x3 is because of Intel sign-extension of 4480 * addresses. Valid global memory addresses may naturally have either 0 or 4481 * ~0 as their high bits. 4482 * 4483 * Shared and scratch pointers are represented as 32-bit offsets with the 4484 * top 32 bits only being used for the enum. This allows us to avoid 4485 * 64-bit address calculations in a bunch of cases. 4486 */ 4487 nir_address_format_62bit_generic, 4488 4489 /** 4490 * An address format which is a simple 32-bit offset. 4491 */ 4492 nir_address_format_32bit_offset, 4493 4494 /** 4495 * An address format which is a simple 32-bit offset cast to 64-bit. 4496 */ 4497 nir_address_format_32bit_offset_as_64bit, 4498 4499 /** 4500 * An address format representing a purely logical addressing model. In 4501 * this model, all deref chains must be complete from the dereference 4502 * operation to the variable. Cast derefs are not allowed. These 4503 * addresses will be 32-bit scalars but the format is immaterial because 4504 * you can always chase the chain. 4505 */ 4506 nir_address_format_logical, 4507 } nir_address_format; 4508 4509 static inline unsigned nir_address_format_bit_size(nir_address_format addr_format)4510 nir_address_format_bit_size(nir_address_format addr_format) 4511 { 4512 switch (addr_format) { 4513 case nir_address_format_32bit_global: return 32; 4514 case nir_address_format_64bit_global: return 64; 4515 case nir_address_format_64bit_bounded_global: return 32; 4516 case nir_address_format_32bit_index_offset: return 32; 4517 case nir_address_format_32bit_index_offset_pack64: return 64; 4518 case nir_address_format_vec2_index_32bit_offset: return 32; 4519 case nir_address_format_62bit_generic: return 64; 4520 case nir_address_format_32bit_offset: return 32; 4521 case nir_address_format_32bit_offset_as_64bit: return 64; 4522 case nir_address_format_logical: return 32; 4523 } 4524 unreachable("Invalid address format"); 4525 } 4526 4527 static inline unsigned nir_address_format_num_components(nir_address_format addr_format)4528 nir_address_format_num_components(nir_address_format addr_format) 4529 { 4530 switch (addr_format) { 4531 case nir_address_format_32bit_global: return 1; 4532 case nir_address_format_64bit_global: return 1; 4533 case nir_address_format_64bit_bounded_global: return 4; 4534 case nir_address_format_32bit_index_offset: return 2; 4535 case nir_address_format_32bit_index_offset_pack64: return 1; 4536 case nir_address_format_vec2_index_32bit_offset: return 3; 4537 case nir_address_format_62bit_generic: return 1; 4538 case nir_address_format_32bit_offset: return 1; 4539 case nir_address_format_32bit_offset_as_64bit: return 1; 4540 case nir_address_format_logical: return 1; 4541 } 4542 unreachable("Invalid address format"); 4543 } 4544 4545 static inline const struct glsl_type * nir_address_format_to_glsl_type(nir_address_format addr_format)4546 nir_address_format_to_glsl_type(nir_address_format addr_format) 4547 { 4548 unsigned bit_size = nir_address_format_bit_size(addr_format); 4549 assert(bit_size == 32 || bit_size == 64); 4550 return glsl_vector_type(bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64, 4551 nir_address_format_num_components(addr_format)); 4552 } 4553 4554 const nir_const_value *nir_address_format_null_value(nir_address_format addr_format); 4555 4556 nir_ssa_def *nir_build_addr_ieq(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, 4557 nir_address_format addr_format); 4558 4559 nir_ssa_def *nir_build_addr_isub(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, 4560 nir_address_format addr_format); 4561 4562 nir_ssa_def * nir_explicit_io_address_from_deref(struct nir_builder *b, 4563 nir_deref_instr *deref, 4564 nir_ssa_def *base_addr, 4565 nir_address_format addr_format); 4566 4567 bool nir_get_explicit_deref_align(nir_deref_instr *deref, 4568 bool default_to_type_align, 4569 uint32_t *align_mul, 4570 uint32_t *align_offset); 4571 4572 void nir_lower_explicit_io_instr(struct nir_builder *b, 4573 nir_intrinsic_instr *io_instr, 4574 nir_ssa_def *addr, 4575 nir_address_format addr_format); 4576 4577 bool nir_lower_explicit_io(nir_shader *shader, 4578 nir_variable_mode modes, 4579 nir_address_format); 4580 4581 nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr); 4582 nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr); 4583 nir_src *nir_get_shader_call_payload_src(nir_intrinsic_instr *call); 4584 4585 bool nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage); 4586 4587 bool nir_lower_regs_to_ssa_impl(nir_function_impl *impl); 4588 bool nir_lower_regs_to_ssa(nir_shader *shader); 4589 bool nir_lower_vars_to_ssa(nir_shader *shader); 4590 4591 bool nir_remove_dead_derefs(nir_shader *shader); 4592 bool nir_remove_dead_derefs_impl(nir_function_impl *impl); 4593 4594 typedef struct nir_remove_dead_variables_options { 4595 bool (*can_remove_var)(nir_variable *var, void *data); 4596 void *can_remove_var_data; 4597 } nir_remove_dead_variables_options; 4598 4599 bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes, 4600 const nir_remove_dead_variables_options *options); 4601 4602 bool nir_lower_variable_initializers(nir_shader *shader, 4603 nir_variable_mode modes); 4604 4605 bool nir_move_vec_src_uses_to_dest(nir_shader *shader); 4606 bool nir_lower_vec_to_movs(nir_shader *shader); 4607 void nir_lower_alpha_test(nir_shader *shader, enum compare_func func, 4608 bool alpha_to_one, 4609 const gl_state_index16 *alpha_ref_state_tokens); 4610 bool nir_lower_alu(nir_shader *shader); 4611 4612 bool nir_lower_flrp(nir_shader *shader, unsigned lowering_mask, 4613 bool always_precise); 4614 4615 bool nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *data); 4616 bool nir_lower_bool_to_bitsize(nir_shader *shader); 4617 bool nir_lower_bool_to_float(nir_shader *shader); 4618 bool nir_lower_bool_to_int32(nir_shader *shader); 4619 bool nir_opt_simplify_convert_alu_types(nir_shader *shader); 4620 bool nir_lower_convert_alu_types(nir_shader *shader, 4621 bool (*should_lower)(nir_intrinsic_instr *)); 4622 bool nir_lower_constant_convert_alu_types(nir_shader *shader); 4623 bool nir_lower_alu_conversion_to_intrinsic(nir_shader *shader); 4624 bool nir_lower_int_to_float(nir_shader *shader); 4625 bool nir_lower_load_const_to_scalar(nir_shader *shader); 4626 bool nir_lower_read_invocation_to_scalar(nir_shader *shader); 4627 bool nir_lower_phis_to_scalar(nir_shader *shader); 4628 void nir_lower_io_arrays_to_elements(nir_shader *producer, nir_shader *consumer); 4629 void nir_lower_io_arrays_to_elements_no_indirects(nir_shader *shader, 4630 bool outputs_only); 4631 void nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask); 4632 bool nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask); 4633 bool nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode mask); 4634 4635 bool nir_lower_fragcolor(nir_shader *shader); 4636 bool nir_lower_fragcoord_wtrans(nir_shader *shader); 4637 void nir_lower_viewport_transform(nir_shader *shader); 4638 bool nir_lower_uniforms_to_ubo(nir_shader *shader, int multiplier); 4639 4640 typedef struct nir_lower_subgroups_options { 4641 uint8_t subgroup_size; 4642 uint8_t ballot_bit_size; 4643 bool lower_to_scalar:1; 4644 bool lower_vote_trivial:1; 4645 bool lower_vote_eq_to_ballot:1; 4646 bool lower_subgroup_masks:1; 4647 bool lower_shuffle:1; 4648 bool lower_shuffle_to_32bit:1; 4649 bool lower_shuffle_to_swizzle_amd:1; 4650 bool lower_quad:1; 4651 bool lower_quad_broadcast_dynamic:1; 4652 bool lower_quad_broadcast_dynamic_to_const:1; 4653 bool lower_elect:1; 4654 } nir_lower_subgroups_options; 4655 4656 bool nir_lower_subgroups(nir_shader *shader, 4657 const nir_lower_subgroups_options *options); 4658 4659 bool nir_lower_system_values(nir_shader *shader); 4660 4661 typedef struct nir_lower_compute_system_values_options { 4662 bool has_base_global_invocation_id:1; 4663 bool has_base_work_group_id:1; 4664 } nir_lower_compute_system_values_options; 4665 4666 bool nir_lower_compute_system_values(nir_shader *shader, 4667 const nir_lower_compute_system_values_options *options); 4668 4669 enum PACKED nir_lower_tex_packing { 4670 nir_lower_tex_packing_none = 0, 4671 /* The sampler returns up to 2 32-bit words of half floats or 16-bit signed 4672 * or unsigned ints based on the sampler type 4673 */ 4674 nir_lower_tex_packing_16, 4675 /* The sampler returns 1 32-bit word of 4x8 unorm */ 4676 nir_lower_tex_packing_8, 4677 }; 4678 4679 typedef struct nir_lower_tex_options { 4680 /** 4681 * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which 4682 * sampler types a texture projector is lowered. 4683 */ 4684 unsigned lower_txp; 4685 4686 /** 4687 * If true, lower away nir_tex_src_offset for all texelfetch instructions. 4688 */ 4689 bool lower_txf_offset; 4690 4691 /** 4692 * If true, lower away nir_tex_src_offset for all rect textures. 4693 */ 4694 bool lower_rect_offset; 4695 4696 /** 4697 * If true, lower rect textures to 2D, using txs to fetch the 4698 * texture dimensions and dividing the texture coords by the 4699 * texture dims to normalize. 4700 */ 4701 bool lower_rect; 4702 4703 /** 4704 * If true, convert yuv to rgb. 4705 */ 4706 unsigned lower_y_uv_external; 4707 unsigned lower_y_u_v_external; 4708 unsigned lower_yx_xuxv_external; 4709 unsigned lower_xy_uxvx_external; 4710 unsigned lower_ayuv_external; 4711 unsigned lower_xyuv_external; 4712 unsigned lower_yuv_external; 4713 unsigned bt709_external; 4714 unsigned bt2020_external; 4715 4716 /** 4717 * To emulate certain texture wrap modes, this can be used 4718 * to saturate the specified tex coord to [0.0, 1.0]. The 4719 * bits are according to sampler #, ie. if, for example: 4720 * 4721 * (conf->saturate_s & (1 << n)) 4722 * 4723 * is true, then the s coord for sampler n is saturated. 4724 * 4725 * Note that clamping must happen *after* projector lowering 4726 * so any projected texture sample instruction with a clamped 4727 * coordinate gets automatically lowered, regardless of the 4728 * 'lower_txp' setting. 4729 */ 4730 unsigned saturate_s; 4731 unsigned saturate_t; 4732 unsigned saturate_r; 4733 4734 /* Bitmask of textures that need swizzling. 4735 * 4736 * If (swizzle_result & (1 << texture_index)), then the swizzle in 4737 * swizzles[texture_index] is applied to the result of the texturing 4738 * operation. 4739 */ 4740 unsigned swizzle_result; 4741 4742 /* A swizzle for each texture. Values 0-3 represent x, y, z, or w swizzles 4743 * while 4 and 5 represent 0 and 1 respectively. 4744 */ 4745 uint8_t swizzles[32][4]; 4746 4747 /* Can be used to scale sampled values in range required by the format. */ 4748 float scale_factors[32]; 4749 4750 /** 4751 * Bitmap of textures that need srgb to linear conversion. If 4752 * (lower_srgb & (1 << texture_index)) then the rgb (xyz) components 4753 * of the texture are lowered to linear. 4754 */ 4755 unsigned lower_srgb; 4756 4757 /** 4758 * If true, lower nir_texop_tex on shaders that doesn't support implicit 4759 * LODs to nir_texop_txl. 4760 */ 4761 bool lower_tex_without_implicit_lod; 4762 4763 /** 4764 * If true, lower nir_texop_txd on cube maps with nir_texop_txl. 4765 */ 4766 bool lower_txd_cube_map; 4767 4768 /** 4769 * If true, lower nir_texop_txd on 3D surfaces with nir_texop_txl. 4770 */ 4771 bool lower_txd_3d; 4772 4773 /** 4774 * If true, lower nir_texop_txd on shadow samplers (except cube maps) 4775 * with nir_texop_txl. Notice that cube map shadow samplers are lowered 4776 * with lower_txd_cube_map. 4777 */ 4778 bool lower_txd_shadow; 4779 4780 /** 4781 * If true, lower nir_texop_txd on all samplers to a nir_texop_txl. 4782 * Implies lower_txd_cube_map and lower_txd_shadow. 4783 */ 4784 bool lower_txd; 4785 4786 /** 4787 * If true, lower nir_texop_txb that try to use shadow compare and min_lod 4788 * at the same time to a nir_texop_lod, some math, and nir_texop_tex. 4789 */ 4790 bool lower_txb_shadow_clamp; 4791 4792 /** 4793 * If true, lower nir_texop_txd on shadow samplers when it uses min_lod 4794 * with nir_texop_txl. This includes cube maps. 4795 */ 4796 bool lower_txd_shadow_clamp; 4797 4798 /** 4799 * If true, lower nir_texop_txd on when it uses both offset and min_lod 4800 * with nir_texop_txl. This includes cube maps. 4801 */ 4802 bool lower_txd_offset_clamp; 4803 4804 /** 4805 * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the 4806 * sampler is bindless. 4807 */ 4808 bool lower_txd_clamp_bindless_sampler; 4809 4810 /** 4811 * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the 4812 * sampler index is not statically determinable to be less than 16. 4813 */ 4814 bool lower_txd_clamp_if_sampler_index_not_lt_16; 4815 4816 /** 4817 * If true, lower nir_texop_txs with a non-0-lod into nir_texop_txs with 4818 * 0-lod followed by a nir_ishr. 4819 */ 4820 bool lower_txs_lod; 4821 4822 /** 4823 * If true, apply a .bagr swizzle on tg4 results to handle Broadcom's 4824 * mixed-up tg4 locations. 4825 */ 4826 bool lower_tg4_broadcom_swizzle; 4827 4828 /** 4829 * If true, lowers tg4 with 4 constant offsets to 4 tg4 calls 4830 */ 4831 bool lower_tg4_offsets; 4832 4833 enum nir_lower_tex_packing lower_tex_packing[32]; 4834 } nir_lower_tex_options; 4835 4836 bool nir_lower_tex(nir_shader *shader, 4837 const nir_lower_tex_options *options); 4838 4839 bool nir_lower_cl_images_to_tex(nir_shader *shader); 4840 4841 enum nir_lower_non_uniform_access_type { 4842 nir_lower_non_uniform_ubo_access = (1 << 0), 4843 nir_lower_non_uniform_ssbo_access = (1 << 1), 4844 nir_lower_non_uniform_texture_access = (1 << 2), 4845 nir_lower_non_uniform_image_access = (1 << 3), 4846 }; 4847 4848 bool nir_lower_non_uniform_access(nir_shader *shader, 4849 enum nir_lower_non_uniform_access_type); 4850 4851 enum nir_lower_idiv_path { 4852 /* This path is based on NV50LegalizeSSA::handleDIV(). It is the faster of 4853 * the two but it is not exact in some cases (for example, 1091317713u / 4854 * 1034u gives 5209173 instead of 1055432) */ 4855 nir_lower_idiv_fast, 4856 /* This path is based on AMDGPUTargetLowering::LowerUDIVREM() and 4857 * AMDGPUTargetLowering::LowerSDIVREM(). It requires more instructions than 4858 * the nv50 path and many of them are integer multiplications, so it is 4859 * probably slower. It should always return the correct result, though. */ 4860 nir_lower_idiv_precise, 4861 }; 4862 4863 bool nir_lower_idiv(nir_shader *shader, enum nir_lower_idiv_path path); 4864 4865 typedef struct nir_input_attachment_options { 4866 bool use_fragcoord_sysval; 4867 bool use_layer_id_sysval; 4868 bool use_view_id_for_layer; 4869 } nir_input_attachment_options; 4870 4871 bool nir_lower_input_attachments(nir_shader *shader, 4872 const nir_input_attachment_options *options); 4873 4874 bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, 4875 bool use_vars, 4876 bool use_clipdist_array, 4877 const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); 4878 bool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables, 4879 bool use_clipdist_array, 4880 const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); 4881 bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables, 4882 bool use_clipdist_array); 4883 bool nir_lower_clip_cull_distance_arrays(nir_shader *nir); 4884 bool nir_lower_clip_disable(nir_shader *shader, unsigned clip_plane_enable); 4885 4886 void nir_lower_point_size_mov(nir_shader *shader, 4887 const gl_state_index16 *pointsize_state_tokens); 4888 4889 bool nir_lower_frexp(nir_shader *nir); 4890 4891 void nir_lower_two_sided_color(nir_shader *shader, bool face_sysval); 4892 4893 bool nir_lower_clamp_color_outputs(nir_shader *shader); 4894 4895 bool nir_lower_flatshade(nir_shader *shader); 4896 4897 void nir_lower_passthrough_edgeflags(nir_shader *shader); 4898 bool nir_lower_patch_vertices(nir_shader *nir, unsigned static_count, 4899 const gl_state_index16 *uniform_state_tokens); 4900 4901 typedef struct nir_lower_wpos_ytransform_options { 4902 gl_state_index16 state_tokens[STATE_LENGTH]; 4903 bool fs_coord_origin_upper_left :1; 4904 bool fs_coord_origin_lower_left :1; 4905 bool fs_coord_pixel_center_integer :1; 4906 bool fs_coord_pixel_center_half_integer :1; 4907 } nir_lower_wpos_ytransform_options; 4908 4909 bool nir_lower_wpos_ytransform(nir_shader *shader, 4910 const nir_lower_wpos_ytransform_options *options); 4911 bool nir_lower_wpos_center(nir_shader *shader, const bool for_sample_shading); 4912 4913 bool nir_lower_pntc_ytransform(nir_shader *shader, 4914 const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); 4915 4916 bool nir_lower_wrmasks(nir_shader *shader, nir_instr_filter_cb cb, const void *data); 4917 4918 bool nir_lower_fb_read(nir_shader *shader); 4919 4920 typedef struct nir_lower_drawpixels_options { 4921 gl_state_index16 texcoord_state_tokens[STATE_LENGTH]; 4922 gl_state_index16 scale_state_tokens[STATE_LENGTH]; 4923 gl_state_index16 bias_state_tokens[STATE_LENGTH]; 4924 unsigned drawpix_sampler; 4925 unsigned pixelmap_sampler; 4926 bool pixel_maps :1; 4927 bool scale_and_bias :1; 4928 } nir_lower_drawpixels_options; 4929 4930 void nir_lower_drawpixels(nir_shader *shader, 4931 const nir_lower_drawpixels_options *options); 4932 4933 typedef struct nir_lower_bitmap_options { 4934 unsigned sampler; 4935 bool swizzle_xxxx; 4936 } nir_lower_bitmap_options; 4937 4938 void nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options); 4939 4940 bool nir_lower_atomics_to_ssbo(nir_shader *shader); 4941 4942 typedef enum { 4943 nir_lower_int_source_mods = 1 << 0, 4944 nir_lower_float_source_mods = 1 << 1, 4945 nir_lower_64bit_source_mods = 1 << 2, 4946 nir_lower_triop_abs = 1 << 3, 4947 nir_lower_all_source_mods = (1 << 4) - 1 4948 } nir_lower_to_source_mods_flags; 4949 4950 4951 bool nir_lower_to_source_mods(nir_shader *shader, nir_lower_to_source_mods_flags options); 4952 4953 typedef enum { 4954 nir_lower_gs_intrinsics_per_stream = 1 << 0, 4955 nir_lower_gs_intrinsics_count_primitives = 1 << 1, 4956 nir_lower_gs_intrinsics_count_vertices_per_primitive = 1 << 2, 4957 nir_lower_gs_intrinsics_overwrite_incomplete = 1 << 3, 4958 } nir_lower_gs_intrinsics_flags; 4959 4960 bool nir_lower_gs_intrinsics(nir_shader *shader, nir_lower_gs_intrinsics_flags options); 4961 4962 typedef unsigned (*nir_lower_bit_size_callback)(const nir_instr *, void *); 4963 4964 bool nir_lower_bit_size(nir_shader *shader, 4965 nir_lower_bit_size_callback callback, 4966 void *callback_data); 4967 bool nir_lower_64bit_phis(nir_shader *shader); 4968 4969 nir_lower_int64_options nir_lower_int64_op_to_options_mask(nir_op opcode); 4970 bool nir_lower_int64(nir_shader *shader); 4971 4972 nir_lower_doubles_options nir_lower_doubles_op_to_options_mask(nir_op opcode); 4973 bool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64, 4974 nir_lower_doubles_options options); 4975 bool nir_lower_pack(nir_shader *shader); 4976 4977 void nir_lower_mediump_outputs(nir_shader *nir); 4978 4979 bool nir_lower_point_size(nir_shader *shader, float min, float max); 4980 4981 typedef enum { 4982 nir_lower_interpolation_at_sample = (1 << 1), 4983 nir_lower_interpolation_at_offset = (1 << 2), 4984 nir_lower_interpolation_centroid = (1 << 3), 4985 nir_lower_interpolation_pixel = (1 << 4), 4986 nir_lower_interpolation_sample = (1 << 5), 4987 } nir_lower_interpolation_options; 4988 4989 bool nir_lower_interpolation(nir_shader *shader, 4990 nir_lower_interpolation_options options); 4991 4992 bool nir_lower_discard_to_demote(nir_shader *shader); 4993 4994 bool nir_lower_memory_model(nir_shader *shader); 4995 4996 bool nir_lower_goto_ifs(nir_shader *shader); 4997 4998 bool nir_shader_uses_view_index(nir_shader *shader); 4999 bool nir_can_lower_multiview(nir_shader *shader); 5000 bool nir_lower_multiview(nir_shader *shader, uint32_t view_mask); 5001 5002 bool nir_normalize_cubemap_coords(nir_shader *shader); 5003 5004 void nir_live_ssa_defs_impl(nir_function_impl *impl); 5005 5006 void nir_loop_analyze_impl(nir_function_impl *impl, 5007 nir_variable_mode indirect_mask); 5008 5009 bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b); 5010 5011 bool nir_repair_ssa_impl(nir_function_impl *impl); 5012 bool nir_repair_ssa(nir_shader *shader); 5013 5014 void nir_convert_loop_to_lcssa(nir_loop *loop); 5015 bool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants); 5016 void nir_divergence_analysis(nir_shader *shader); 5017 bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr); 5018 5019 /* If phi_webs_only is true, only convert SSA values involved in phi nodes to 5020 * registers. If false, convert all values (even those not involved in a phi 5021 * node) to registers. 5022 */ 5023 bool nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only); 5024 5025 bool nir_lower_phis_to_regs_block(nir_block *block); 5026 bool nir_lower_ssa_defs_to_regs_block(nir_block *block); 5027 bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl); 5028 5029 bool nir_lower_samplers(nir_shader *shader); 5030 bool nir_lower_ssbo(nir_shader *shader); 5031 5032 /* This is here for unit tests. */ 5033 bool nir_opt_comparison_pre_impl(nir_function_impl *impl); 5034 5035 bool nir_opt_comparison_pre(nir_shader *shader); 5036 5037 bool nir_opt_access(nir_shader *shader); 5038 bool nir_opt_algebraic(nir_shader *shader); 5039 bool nir_opt_algebraic_before_ffma(nir_shader *shader); 5040 bool nir_opt_algebraic_late(nir_shader *shader); 5041 bool nir_opt_algebraic_distribute_src_mods(nir_shader *shader); 5042 bool nir_opt_constant_folding(nir_shader *shader); 5043 5044 /* Try to combine a and b into a. Return true if combination was possible, 5045 * which will result in b being removed by the pass. Return false if 5046 * combination wasn't possible. 5047 */ 5048 typedef bool (*nir_combine_memory_barrier_cb)( 5049 nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *data); 5050 5051 bool nir_opt_combine_memory_barriers(nir_shader *shader, 5052 nir_combine_memory_barrier_cb combine_cb, 5053 void *data); 5054 5055 bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes); 5056 5057 bool nir_copy_prop(nir_shader *shader); 5058 5059 bool nir_opt_copy_prop_vars(nir_shader *shader); 5060 5061 bool nir_opt_cse(nir_shader *shader); 5062 5063 bool nir_opt_dce(nir_shader *shader); 5064 5065 bool nir_opt_dead_cf(nir_shader *shader); 5066 5067 bool nir_opt_dead_write_vars(nir_shader *shader); 5068 5069 bool nir_opt_deref_impl(nir_function_impl *impl); 5070 bool nir_opt_deref(nir_shader *shader); 5071 5072 bool nir_opt_find_array_copies(nir_shader *shader); 5073 5074 bool nir_opt_gcm(nir_shader *shader, bool value_number); 5075 5076 bool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size); 5077 5078 bool nir_opt_if(nir_shader *shader, bool aggressive_last_continue); 5079 5080 bool nir_opt_intrinsics(nir_shader *shader); 5081 5082 bool nir_opt_large_constants(nir_shader *shader, 5083 glsl_type_size_align_func size_align, 5084 unsigned threshold); 5085 5086 bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask); 5087 5088 typedef enum { 5089 nir_move_const_undef = (1 << 0), 5090 nir_move_load_ubo = (1 << 1), 5091 nir_move_load_input = (1 << 2), 5092 nir_move_comparisons = (1 << 3), 5093 nir_move_copies = (1 << 4), 5094 } nir_move_options; 5095 5096 bool nir_can_move_instr(nir_instr *instr, nir_move_options options); 5097 5098 bool nir_opt_sink(nir_shader *shader, nir_move_options options); 5099 5100 bool nir_opt_move(nir_shader *shader, nir_move_options options); 5101 5102 bool nir_opt_peephole_select(nir_shader *shader, unsigned limit, 5103 bool indirect_load_ok, bool expensive_alu_ok); 5104 5105 bool nir_opt_rematerialize_compares(nir_shader *shader); 5106 5107 bool nir_opt_remove_phis(nir_shader *shader); 5108 bool nir_opt_remove_phis_block(nir_block *block); 5109 5110 bool nir_opt_shrink_vectors(nir_shader *shader); 5111 5112 bool nir_opt_trivial_continues(nir_shader *shader); 5113 5114 bool nir_opt_undef(nir_shader *shader); 5115 5116 bool nir_opt_uniform_atomics(nir_shader *shader); 5117 5118 typedef bool (*nir_opt_vectorize_cb)(const nir_instr *a, const nir_instr *b, 5119 void *data); 5120 bool nir_opt_vectorize(nir_shader *shader, nir_opt_vectorize_cb filter, 5121 void *data); 5122 5123 bool nir_opt_conditional_discard(nir_shader *shader); 5124 5125 typedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul, 5126 unsigned align_offset, 5127 unsigned bit_size, 5128 unsigned num_components, 5129 nir_intrinsic_instr *low, nir_intrinsic_instr *high); 5130 5131 bool nir_opt_load_store_vectorize(nir_shader *shader, nir_variable_mode modes, 5132 nir_should_vectorize_mem_func callback, 5133 nir_variable_mode robust_modes); 5134 5135 void nir_sweep(nir_shader *shader); 5136 5137 void nir_remap_dual_slot_attributes(nir_shader *shader, 5138 uint64_t *dual_slot_inputs); 5139 uint64_t nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot); 5140 5141 nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val); 5142 gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin); 5143 5144 static inline bool nir_variable_is_in_ubo(const nir_variable * var)5145 nir_variable_is_in_ubo(const nir_variable *var) 5146 { 5147 return (var->data.mode == nir_var_mem_ubo && 5148 var->interface_type != NULL); 5149 } 5150 5151 static inline bool nir_variable_is_in_ssbo(const nir_variable * var)5152 nir_variable_is_in_ssbo(const nir_variable *var) 5153 { 5154 return (var->data.mode == nir_var_mem_ssbo && 5155 var->interface_type != NULL); 5156 } 5157 5158 static inline bool nir_variable_is_in_block(const nir_variable * var)5159 nir_variable_is_in_block(const nir_variable *var) 5160 { 5161 return nir_variable_is_in_ubo(var) || nir_variable_is_in_ssbo(var); 5162 } 5163 5164 typedef struct nir_unsigned_upper_bound_config { 5165 unsigned min_subgroup_size; 5166 unsigned max_subgroup_size; 5167 unsigned max_work_group_invocations; 5168 unsigned max_work_group_count[3]; 5169 unsigned max_work_group_size[3]; 5170 5171 uint32_t vertex_attrib_max[32]; 5172 } nir_unsigned_upper_bound_config; 5173 5174 uint32_t 5175 nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht, 5176 nir_ssa_scalar scalar, 5177 const nir_unsigned_upper_bound_config *config); 5178 5179 bool 5180 nir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht, 5181 nir_ssa_scalar ssa, unsigned const_val, 5182 const nir_unsigned_upper_bound_config *config); 5183 5184 #ifdef __cplusplus 5185 } /* extern "C" */ 5186 #endif 5187 5188 #endif /* NIR_H */ 5189