1 /* 2 * Copyright 2024 Intel Corporation 3 * SPDX-License-Identifier: MIT 4 */ 5 6 #include "elk_nir_options.h" 7 8 #define COMMON_OPTIONS \ 9 .has_uclz = true, \ 10 .lower_fdiv = true, \ 11 .lower_scmp = true, \ 12 .lower_flrp16 = true, \ 13 .lower_fmod = true, \ 14 .lower_ufind_msb = true, \ 15 .lower_uadd_carry = true, \ 16 .lower_usub_borrow = true, \ 17 .lower_flrp64 = true, \ 18 .lower_fisnormal = true, \ 19 .lower_isign = true, \ 20 .lower_ldexp = true, \ 21 .lower_bitfield_extract = true, \ 22 .lower_bitfield_insert = true, \ 23 .lower_device_index_to_zero = true, \ 24 .vectorize_io = true, \ 25 .vectorize_tess_levels = true, \ 26 .use_interpolated_input_intrinsics = true, \ 27 .lower_insert_byte = true, \ 28 .lower_insert_word = true, \ 29 .vertex_id_zero_based = true, \ 30 .lower_base_vertex = true, \ 31 .support_16bit_alu = true, \ 32 .lower_uniforms_to_ubo = true 33 34 #define COMMON_SCALAR_OPTIONS \ 35 .lower_to_scalar = true, \ 36 .lower_pack_half_2x16 = true, \ 37 .lower_pack_snorm_2x16 = true, \ 38 .lower_pack_snorm_4x8 = true, \ 39 .lower_pack_unorm_2x16 = true, \ 40 .lower_pack_unorm_4x8 = true, \ 41 .lower_unpack_half_2x16 = true, \ 42 .lower_unpack_snorm_2x16 = true, \ 43 .lower_unpack_snorm_4x8 = true, \ 44 .lower_unpack_unorm_2x16 = true, \ 45 .lower_unpack_unorm_4x8 = true, \ 46 .lower_hadd64 = true, \ 47 .avoid_ternary_with_two_constants = true, \ 48 .has_pack_32_4x8 = true, \ 49 .max_unroll_iterations = 32, \ 50 .force_indirect_unrolling = nir_var_function_temp, \ 51 .divergence_analysis_options = \ 52 (nir_divergence_single_patch_per_tcs_subgroup | \ 53 nir_divergence_single_patch_per_tes_subgroup | \ 54 nir_divergence_shader_record_ptr_uniform) 55 56 const struct nir_shader_compiler_options elk_scalar_nir_options = { 57 COMMON_OPTIONS, 58 COMMON_SCALAR_OPTIONS, 59 }; 60 61 const struct nir_shader_compiler_options elk_vector_nir_options = { 62 COMMON_OPTIONS, 63 64 /* In the vec4 backend, our dpN instruction replicates its result to all the 65 * components of a vec4. We would like NIR to give us replicated fdot 66 * instructions because it can optimize better for us. 67 */ 68 .fdot_replicates = true, 69 70 .lower_usub_sat = true, 71 .lower_pack_snorm_2x16 = true, 72 .lower_pack_unorm_2x16 = true, 73 .lower_unpack_snorm_2x16 = true, 74 .lower_unpack_unorm_2x16 = true, 75 .lower_extract_byte = true, 76 .lower_extract_word = true, 77 .intel_vec4 = true, 78 .max_unroll_iterations = 32, 79 }; 80