• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2025 Intel Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "brw_nir.h"
7 #include "intel_nir.h"
8 
9 #include "intel_nir.h"
10 #include "compiler/nir/nir_builder.h"
11 #include "compiler/spirv/nir_spirv.h"
12 #include "compiler/spirv/spirv_info.h"
13 #include "dev/intel_debug.h"
14 #include "util/u_dynarray.h"
15 
16 static void
optimize(nir_shader * nir)17 optimize(nir_shader *nir)
18 {
19    bool progress;
20    do {
21       progress = false;
22 
23       NIR_PASS(progress, nir, nir_split_var_copies);
24       NIR_PASS(progress, nir, nir_split_struct_vars, nir_var_function_temp);
25       NIR_PASS(progress, nir, nir_lower_var_copies);
26       NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
27 
28       NIR_PASS(progress, nir, nir_copy_prop);
29       NIR_PASS(progress, nir, nir_opt_remove_phis);
30       NIR_PASS(progress, nir, nir_lower_phis_to_scalar, true);
31       NIR_PASS(progress, nir, nir_opt_dce);
32       NIR_PASS(progress, nir, nir_opt_dead_cf);
33       NIR_PASS(progress, nir, nir_opt_cse);
34       NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
35       NIR_PASS(progress, nir, nir_opt_phi_precision);
36       NIR_PASS(progress, nir, nir_opt_algebraic);
37       NIR_PASS(progress, nir, nir_opt_constant_folding);
38 
39       NIR_PASS(progress, nir, nir_opt_deref);
40       NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
41       NIR_PASS(progress, nir, nir_opt_undef);
42       NIR_PASS(progress, nir, nir_lower_undef_to_zero);
43 
44       NIR_PASS(progress, nir, nir_opt_shrink_vectors, true);
45       NIR_PASS(progress, nir, nir_opt_loop_unroll);
46 
47    } while (progress);
48 }
49 
50 nir_shader *
brw_nir_from_spirv(void * mem_ctx,const uint32_t * spirv,size_t spirv_size)51 brw_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size)
52 {
53    static const struct spirv_capabilities spirv_caps = {
54       .Addresses = true,
55       .Float16 = true,
56       .Float64 = true,
57       .Groups = true,
58       .StorageImageWriteWithoutFormat = true,
59       .Int8 = true,
60       .Int16 = true,
61       .Int64 = true,
62       .Int64Atomics = true,
63       .Kernel = true,
64       .Linkage = true, /* We receive linked kernel from clc */
65       .DenormFlushToZero = true,
66       .DenormPreserve = true,
67       .SignedZeroInfNanPreserve = true,
68       .RoundingModeRTE = true,
69       .RoundingModeRTZ = true,
70       .GenericPointer = true,
71       .GroupNonUniform = true,
72       .GroupNonUniformArithmetic = true,
73       .GroupNonUniformClustered = true,
74       .GroupNonUniformBallot = true,
75       .GroupNonUniformQuad = true,
76       .GroupNonUniformShuffle = true,
77       .GroupNonUniformVote = true,
78       .SubgroupDispatch = true,
79    };
80    struct spirv_to_nir_options spirv_options = {
81       .environment = NIR_SPIRV_OPENCL,
82       .capabilities = &spirv_caps,
83       .printf = true,
84       .shared_addr_format = nir_address_format_62bit_generic,
85       .global_addr_format = nir_address_format_62bit_generic,
86       .temp_addr_format = nir_address_format_62bit_generic,
87       .constant_addr_format = nir_address_format_64bit_global,
88       .create_library = true,
89    };
90 
91    assert(spirv_size % 4 == 0);
92 
93    const nir_shader_compiler_options *nir_options = &brw_scalar_nir_options;
94 
95    nir_shader *nir =
96       spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
97                    "library", &spirv_options, nir_options);
98    nir_validate_shader(nir, "after spirv_to_nir");
99    nir_validate_ssa_dominance(nir, "after spirv_to_nir");
100    ralloc_steal(mem_ctx, nir);
101    nir->info.name = ralloc_strdup(nir, "library");
102 
103    nir_fixup_is_exported(nir);
104 
105    NIR_PASS(_, nir, nir_lower_system_values);
106    NIR_PASS(_, nir, nir_lower_calls_to_builtins);
107 
108    NIR_PASS_V(nir, nir_lower_printf, &(const struct nir_lower_printf_options) {
109          .ptr_bit_size               = 64,
110          .use_printf_base_identifier = true,
111       });
112 
113    NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp);
114    NIR_PASS(_, nir, nir_lower_returns);
115    NIR_PASS(_, nir, nir_inline_functions);
116    //nir_remove_non_exported(nir);
117    NIR_PASS(_, nir, nir_copy_prop);
118    NIR_PASS(_, nir, nir_opt_deref);
119 
120    /* We can't deal with constant data, get rid of it */
121    nir_lower_constant_to_temp(nir);
122 
123    /* We can go ahead and lower the rest of the constant initializers.  We do
124     * this here so that nir_remove_dead_variables and split_per_member_structs
125     * below see the corresponding stores.
126     */
127    NIR_PASS(_, nir, nir_lower_variable_initializers, ~0);
128 
129    /* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B
130     * aligned and so it can just read/write them as vec4s.  This results in a
131     * LOT of vec4->vec3 casts on loads and stores.  One solution to this
132     * problem is to get rid of all vec3 variables.
133     */
134    NIR_PASS(_, nir, nir_lower_vec3_to_vec4,
135             nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
136                nir_var_mem_global | nir_var_mem_constant);
137 
138    /* We assign explicit types early so that the optimizer can take advantage
139     * of that information and hopefully get rid of some of our memcpys.
140     */
141    NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
142             nir_var_uniform | nir_var_shader_temp | nir_var_function_temp |
143                nir_var_mem_shared | nir_var_mem_global,
144             glsl_get_cl_type_size_align);
145 
146    optimize(nir);
147 
148    NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_all, NULL);
149 
150    /* Lower again, this time after dead-variables to get more compact variable
151     * layouts.
152     */
153    NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
154             nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
155                nir_var_mem_global | nir_var_mem_constant,
156             glsl_get_cl_type_size_align);
157    assert(nir->constant_data_size == 0);
158 
159    NIR_PASS(_, nir, nir_lower_memcpy);
160 
161    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_constant,
162             nir_address_format_64bit_global);
163 
164    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_uniform,
165             nir_address_format_64bit_global);
166 
167    /* Note: we cannot lower explicit I/O here, because we need derefs in tact
168     * for function calls into the library to work.
169     */
170 
171    NIR_PASS(_, nir, nir_lower_convert_alu_types, NULL);
172    NIR_PASS(_, nir, nir_opt_if, 0);
173    NIR_PASS(_, nir, nir_opt_idiv_const, 16);
174 
175    optimize(nir);
176 
177    return nir;
178 }
179