/* * Copyright © 2025 Intel Corporation * SPDX-License-Identifier: MIT */ #include "brw_nir.h" #include "intel_nir.h" #include "intel_nir.h" #include "compiler/nir/nir_builder.h" #include "compiler/spirv/nir_spirv.h" #include "compiler/spirv/spirv_info.h" #include "dev/intel_debug.h" #include "util/u_dynarray.h" static void optimize(nir_shader *nir) { bool progress; do { progress = false; NIR_PASS(progress, nir, nir_split_var_copies); NIR_PASS(progress, nir, nir_split_struct_vars, nir_var_function_temp); NIR_PASS(progress, nir, nir_lower_var_copies); NIR_PASS(progress, nir, nir_lower_vars_to_ssa); NIR_PASS(progress, nir, nir_copy_prop); NIR_PASS(progress, nir, nir_opt_remove_phis); NIR_PASS(progress, nir, nir_lower_phis_to_scalar, true); NIR_PASS(progress, nir, nir_opt_dce); NIR_PASS(progress, nir, nir_opt_dead_cf); NIR_PASS(progress, nir, nir_opt_cse); NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true); NIR_PASS(progress, nir, nir_opt_phi_precision); NIR_PASS(progress, nir, nir_opt_algebraic); NIR_PASS(progress, nir, nir_opt_constant_folding); NIR_PASS(progress, nir, nir_opt_deref); NIR_PASS(progress, nir, nir_opt_copy_prop_vars); NIR_PASS(progress, nir, nir_opt_undef); NIR_PASS(progress, nir, nir_lower_undef_to_zero); NIR_PASS(progress, nir, nir_opt_shrink_vectors, true); NIR_PASS(progress, nir, nir_opt_loop_unroll); } while (progress); } nir_shader * brw_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size) { static const struct spirv_capabilities spirv_caps = { .Addresses = true, .Float16 = true, .Float64 = true, .Groups = true, .StorageImageWriteWithoutFormat = true, .Int8 = true, .Int16 = true, .Int64 = true, .Int64Atomics = true, .Kernel = true, .Linkage = true, /* We receive linked kernel from clc */ .DenormFlushToZero = true, .DenormPreserve = true, .SignedZeroInfNanPreserve = true, .RoundingModeRTE = true, .RoundingModeRTZ = true, .GenericPointer = true, .GroupNonUniform = true, .GroupNonUniformArithmetic = true, .GroupNonUniformClustered = true, .GroupNonUniformBallot = true, .GroupNonUniformQuad = true, .GroupNonUniformShuffle = true, .GroupNonUniformVote = true, .SubgroupDispatch = true, }; struct spirv_to_nir_options spirv_options = { .environment = NIR_SPIRV_OPENCL, .capabilities = &spirv_caps, .printf = true, .shared_addr_format = nir_address_format_62bit_generic, .global_addr_format = nir_address_format_62bit_generic, .temp_addr_format = nir_address_format_62bit_generic, .constant_addr_format = nir_address_format_64bit_global, .create_library = true, }; assert(spirv_size % 4 == 0); const nir_shader_compiler_options *nir_options = &brw_scalar_nir_options; nir_shader *nir = spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL, "library", &spirv_options, nir_options); nir_validate_shader(nir, "after spirv_to_nir"); nir_validate_ssa_dominance(nir, "after spirv_to_nir"); ralloc_steal(mem_ctx, nir); nir->info.name = ralloc_strdup(nir, "library"); nir_fixup_is_exported(nir); NIR_PASS(_, nir, nir_lower_system_values); NIR_PASS(_, nir, nir_lower_calls_to_builtins); NIR_PASS_V(nir, nir_lower_printf, &(const struct nir_lower_printf_options) { .ptr_bit_size = 64, .use_printf_base_identifier = true, }); NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp); NIR_PASS(_, nir, nir_lower_returns); NIR_PASS(_, nir, nir_inline_functions); //nir_remove_non_exported(nir); NIR_PASS(_, nir, nir_copy_prop); NIR_PASS(_, nir, nir_opt_deref); /* We can't deal with constant data, get rid of it */ nir_lower_constant_to_temp(nir); /* We can go ahead and lower the rest of the constant initializers. We do * this here so that nir_remove_dead_variables and split_per_member_structs * below see the corresponding stores. */ NIR_PASS(_, nir, nir_lower_variable_initializers, ~0); /* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B * aligned and so it can just read/write them as vec4s. This results in a * LOT of vec4->vec3 casts on loads and stores. One solution to this * problem is to get rid of all vec3 variables. */ NIR_PASS(_, nir, nir_lower_vec3_to_vec4, nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant); /* We assign explicit types early so that the optimizer can take advantage * of that information and hopefully get rid of some of our memcpys. */ NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_uniform | nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared | nir_var_mem_global, glsl_get_cl_type_size_align); optimize(nir); NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_all, NULL); /* Lower again, this time after dead-variables to get more compact variable * layouts. */ NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant, glsl_get_cl_type_size_align); assert(nir->constant_data_size == 0); NIR_PASS(_, nir, nir_lower_memcpy); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_constant, nir_address_format_64bit_global); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_uniform, nir_address_format_64bit_global); /* Note: we cannot lower explicit I/O here, because we need derefs in tact * for function calls into the library to work. */ NIR_PASS(_, nir, nir_lower_convert_alu_types, NULL); NIR_PASS(_, nir, nir_opt_if, 0); NIR_PASS(_, nir, nir_opt_idiv_const, 16); optimize(nir); return nir; }