1 /*
2 * Copyright © 2025 Intel Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "brw_nir.h"
7 #include "intel_nir.h"
8
9 #include "intel_nir.h"
10 #include "compiler/nir/nir_builder.h"
11 #include "compiler/spirv/nir_spirv.h"
12 #include "compiler/spirv/spirv_info.h"
13 #include "dev/intel_debug.h"
14 #include "util/u_dynarray.h"
15
16 static void
optimize(nir_shader * nir)17 optimize(nir_shader *nir)
18 {
19 bool progress;
20 do {
21 progress = false;
22
23 NIR_PASS(progress, nir, nir_split_var_copies);
24 NIR_PASS(progress, nir, nir_split_struct_vars, nir_var_function_temp);
25 NIR_PASS(progress, nir, nir_lower_var_copies);
26 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
27
28 NIR_PASS(progress, nir, nir_copy_prop);
29 NIR_PASS(progress, nir, nir_opt_remove_phis);
30 NIR_PASS(progress, nir, nir_lower_phis_to_scalar, true);
31 NIR_PASS(progress, nir, nir_opt_dce);
32 NIR_PASS(progress, nir, nir_opt_dead_cf);
33 NIR_PASS(progress, nir, nir_opt_cse);
34 NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
35 NIR_PASS(progress, nir, nir_opt_phi_precision);
36 NIR_PASS(progress, nir, nir_opt_algebraic);
37 NIR_PASS(progress, nir, nir_opt_constant_folding);
38
39 NIR_PASS(progress, nir, nir_opt_deref);
40 NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
41 NIR_PASS(progress, nir, nir_opt_undef);
42 NIR_PASS(progress, nir, nir_lower_undef_to_zero);
43
44 NIR_PASS(progress, nir, nir_opt_shrink_vectors, true);
45 NIR_PASS(progress, nir, nir_opt_loop_unroll);
46
47 } while (progress);
48 }
49
50 nir_shader *
brw_nir_from_spirv(void * mem_ctx,const uint32_t * spirv,size_t spirv_size)51 brw_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size)
52 {
53 static const struct spirv_capabilities spirv_caps = {
54 .Addresses = true,
55 .Float16 = true,
56 .Float64 = true,
57 .Groups = true,
58 .StorageImageWriteWithoutFormat = true,
59 .Int8 = true,
60 .Int16 = true,
61 .Int64 = true,
62 .Int64Atomics = true,
63 .Kernel = true,
64 .Linkage = true, /* We receive linked kernel from clc */
65 .DenormFlushToZero = true,
66 .DenormPreserve = true,
67 .SignedZeroInfNanPreserve = true,
68 .RoundingModeRTE = true,
69 .RoundingModeRTZ = true,
70 .GenericPointer = true,
71 .GroupNonUniform = true,
72 .GroupNonUniformArithmetic = true,
73 .GroupNonUniformClustered = true,
74 .GroupNonUniformBallot = true,
75 .GroupNonUniformQuad = true,
76 .GroupNonUniformShuffle = true,
77 .GroupNonUniformVote = true,
78 .SubgroupDispatch = true,
79 };
80 struct spirv_to_nir_options spirv_options = {
81 .environment = NIR_SPIRV_OPENCL,
82 .capabilities = &spirv_caps,
83 .printf = true,
84 .shared_addr_format = nir_address_format_62bit_generic,
85 .global_addr_format = nir_address_format_62bit_generic,
86 .temp_addr_format = nir_address_format_62bit_generic,
87 .constant_addr_format = nir_address_format_64bit_global,
88 .create_library = true,
89 };
90
91 assert(spirv_size % 4 == 0);
92
93 const nir_shader_compiler_options *nir_options = &brw_scalar_nir_options;
94
95 nir_shader *nir =
96 spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
97 "library", &spirv_options, nir_options);
98 nir_validate_shader(nir, "after spirv_to_nir");
99 nir_validate_ssa_dominance(nir, "after spirv_to_nir");
100 ralloc_steal(mem_ctx, nir);
101 nir->info.name = ralloc_strdup(nir, "library");
102
103 nir_fixup_is_exported(nir);
104
105 NIR_PASS(_, nir, nir_lower_system_values);
106 NIR_PASS(_, nir, nir_lower_calls_to_builtins);
107
108 NIR_PASS_V(nir, nir_lower_printf, &(const struct nir_lower_printf_options) {
109 .ptr_bit_size = 64,
110 .use_printf_base_identifier = true,
111 });
112
113 NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp);
114 NIR_PASS(_, nir, nir_lower_returns);
115 NIR_PASS(_, nir, nir_inline_functions);
116 //nir_remove_non_exported(nir);
117 NIR_PASS(_, nir, nir_copy_prop);
118 NIR_PASS(_, nir, nir_opt_deref);
119
120 /* We can't deal with constant data, get rid of it */
121 nir_lower_constant_to_temp(nir);
122
123 /* We can go ahead and lower the rest of the constant initializers. We do
124 * this here so that nir_remove_dead_variables and split_per_member_structs
125 * below see the corresponding stores.
126 */
127 NIR_PASS(_, nir, nir_lower_variable_initializers, ~0);
128
129 /* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B
130 * aligned and so it can just read/write them as vec4s. This results in a
131 * LOT of vec4->vec3 casts on loads and stores. One solution to this
132 * problem is to get rid of all vec3 variables.
133 */
134 NIR_PASS(_, nir, nir_lower_vec3_to_vec4,
135 nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
136 nir_var_mem_global | nir_var_mem_constant);
137
138 /* We assign explicit types early so that the optimizer can take advantage
139 * of that information and hopefully get rid of some of our memcpys.
140 */
141 NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
142 nir_var_uniform | nir_var_shader_temp | nir_var_function_temp |
143 nir_var_mem_shared | nir_var_mem_global,
144 glsl_get_cl_type_size_align);
145
146 optimize(nir);
147
148 NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_all, NULL);
149
150 /* Lower again, this time after dead-variables to get more compact variable
151 * layouts.
152 */
153 NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
154 nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
155 nir_var_mem_global | nir_var_mem_constant,
156 glsl_get_cl_type_size_align);
157 assert(nir->constant_data_size == 0);
158
159 NIR_PASS(_, nir, nir_lower_memcpy);
160
161 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_constant,
162 nir_address_format_64bit_global);
163
164 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_uniform,
165 nir_address_format_64bit_global);
166
167 /* Note: we cannot lower explicit I/O here, because we need derefs in tact
168 * for function calls into the library to work.
169 */
170
171 NIR_PASS(_, nir, nir_lower_convert_alu_types, NULL);
172 NIR_PASS(_, nir, nir_opt_if, 0);
173 NIR_PASS(_, nir, nir_opt_idiv_const, 16);
174
175 optimize(nir);
176
177 return nir;
178 }
179