1 /*
2 * Copyright 2023 Alyssa Rosenzweig
3 * Copyright 2020 Intel Corporation
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "asahi_clc.h"
8 #include "asahi/compiler/agx_compile.h"
9 #include "asahi/compiler/agx_nir.h"
10 #include "compiler/glsl_types.h"
11 #include "compiler/spirv/nir_spirv.h"
12 #include "nir.h"
13 #include "nir_builder.h"
14 #include "nir_builder_opcodes.h"
15 #include "nir_intrinsics.h"
16 #include "nir_precompiled.h"
17 #include "shader_enums.h"
18
19 #include <fcntl.h>
20 #include <inttypes.h>
21 #include <stdio.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include "util/macros.h"
25 #include <sys/mman.h>
26
27 const char *targets[] = {"g13g", "g13x"};
28
29 #define foreach_target(target) \
30 for (const char **target = &targets[0]; \
31 target < &targets[ARRAY_SIZE(targets)]; ++target)
32
33 static const struct spirv_to_nir_options spirv_options = {
34 .environment = NIR_SPIRV_OPENCL,
35 .shared_addr_format = nir_address_format_62bit_generic,
36 .global_addr_format = nir_address_format_62bit_generic,
37 .temp_addr_format = nir_address_format_62bit_generic,
38 .constant_addr_format = nir_address_format_64bit_global,
39 .create_library = true,
40 .printf = true,
41 };
42
43 /* Standard optimization loop */
44 static void
optimize(nir_shader * nir)45 optimize(nir_shader *nir)
46 {
47 bool progress;
48 do {
49 progress = false;
50
51 NIR_PASS(progress, nir, nir_split_var_copies);
52 NIR_PASS(progress, nir, nir_split_struct_vars, nir_var_function_temp);
53 NIR_PASS(progress, nir, nir_lower_var_copies);
54 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
55
56 NIR_PASS(progress, nir, nir_copy_prop);
57 NIR_PASS(progress, nir, nir_opt_remove_phis);
58 NIR_PASS(progress, nir, nir_lower_phis_to_scalar, true);
59 NIR_PASS(progress, nir, nir_opt_dce);
60 NIR_PASS(progress, nir, nir_opt_dead_cf);
61 NIR_PASS(progress, nir, nir_opt_cse);
62 NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
63 NIR_PASS(progress, nir, nir_opt_phi_precision);
64 NIR_PASS(progress, nir, nir_opt_algebraic);
65 NIR_PASS(progress, nir, nir_opt_constant_folding);
66
67 NIR_PASS(progress, nir, nir_opt_deref);
68 NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
69 NIR_PASS(progress, nir, nir_opt_undef);
70 NIR_PASS(progress, nir, nir_lower_undef_to_zero);
71
72 NIR_PASS(progress, nir, nir_opt_shrink_vectors, true);
73 NIR_PASS(progress, nir, nir_opt_loop_unroll);
74
75 } while (progress);
76 }
77
78 static nir_shader *
compile(void * memctx,const uint32_t * spirv,size_t spirv_size)79 compile(void *memctx, const uint32_t *spirv, size_t spirv_size)
80 {
81 const nir_shader_compiler_options *nir_options = &agx_nir_options;
82
83 assert(spirv_size % 4 == 0);
84 nir_shader *nir =
85 spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
86 "library", &spirv_options, nir_options);
87 nir_validate_shader(nir, "after spirv_to_nir");
88 ralloc_steal(memctx, nir);
89
90 nir_fixup_is_exported(nir);
91
92 NIR_PASS(_, nir, nir_lower_system_values);
93 NIR_PASS(_, nir, nir_lower_calls_to_builtins);
94
95 nir_lower_compute_system_values_options cs = {.global_id_is_32bit = true};
96 NIR_PASS(_, nir, nir_lower_compute_system_values, &cs);
97
98 NIR_PASS(_, nir, nir_lower_printf,
99 &(const struct nir_lower_printf_options){
100 .hash_format_strings = true,
101 });
102
103 /* We have to lower away local constant initializers right before we
104 * inline functions. That way they get properly initialized at the top
105 * of the function and not at the top of its caller.
106 */
107 NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp);
108 NIR_PASS(_, nir, nir_lower_returns);
109 NIR_PASS(_, nir, nir_inline_functions);
110 nir_remove_non_exported(nir);
111 NIR_PASS(_, nir, nir_copy_prop);
112 NIR_PASS(_, nir, nir_opt_deref);
113
114 /* We can't deal with constant data, get rid of it */
115 nir_lower_constant_to_temp(nir);
116
117 /* We can go ahead and lower the rest of the constant initializers. We do
118 * this here so that nir_remove_dead_variables and split_per_member_structs
119 * below see the corresponding stores.
120 */
121 NIR_PASS(_, nir, nir_lower_variable_initializers, ~0);
122
123 /* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B
124 * aligned and so it can just read/write them as vec4s. This results in a
125 * LOT of vec4->vec3 casts on loads and stores. One solution to this
126 * problem is to get rid of all vec3 variables.
127 */
128 NIR_PASS(_, nir, nir_lower_vec3_to_vec4,
129 nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
130 nir_var_mem_global | nir_var_mem_constant);
131
132 /* We assign explicit types early so that the optimizer can take advantage
133 * of that information and hopefully get rid of some of our memcpys.
134 */
135 NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
136 nir_var_uniform | nir_var_shader_temp | nir_var_function_temp |
137 nir_var_mem_shared | nir_var_mem_global,
138 glsl_get_cl_type_size_align);
139
140 optimize(nir);
141
142 NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_all, NULL);
143
144 /* Lower again, this time after dead-variables to get more compact variable
145 * layouts.
146 */
147 NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
148 nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
149 nir_var_mem_global | nir_var_mem_constant,
150 glsl_get_cl_type_size_align);
151 assert(nir->constant_data_size == 0);
152
153 NIR_PASS(_, nir, nir_lower_memcpy);
154
155 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_constant,
156 nir_address_format_64bit_global);
157
158 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_uniform,
159 nir_address_format_32bit_offset_as_64bit);
160
161 /* Note: we cannot lower explicit I/O here, because we need derefs in tact
162 * for function calls into the library to work.
163 */
164
165 NIR_PASS(_, nir, nir_lower_convert_alu_types, NULL);
166 NIR_PASS(_, nir, nir_opt_if, 0);
167 NIR_PASS(_, nir, nir_opt_idiv_const, 16);
168
169 NIR_PASS(_, nir, agx_nir_lower_texture_early, false /* support_lod_bias */);
170 NIR_PASS(_, nir, agx_nir_lower_texture);
171 NIR_PASS(_, nir, agx_nir_lower_multisampled_image_store);
172
173 optimize(nir);
174
175 return nir;
176 }
177
178 static void
print_shader(FILE * fp,const char * name,const char * suffix,uint32_t variant,struct agx_shader_part * p)179 print_shader(FILE *fp, const char *name, const char *suffix, uint32_t variant,
180 struct agx_shader_part *p)
181 {
182 struct agx_precompiled_kernel_info info = agx_compact_kernel_info(&p->info);
183 size_t sz_B = sizeof(info) + p->info.binary_size;
184 size_t sz_el = DIV_ROUND_UP(sz_B, 4);
185 uint32_t *mem = calloc(sz_el, 4);
186
187 memcpy(mem, &info, sizeof(info));
188 memcpy((uint8_t *)mem + sizeof(info), p->binary, p->info.binary_size);
189
190 nir_precomp_print_blob(fp, name, suffix, variant, mem, sz_B, true);
191 free(mem);
192 }
193
194 static bool
gather_atomic_info(nir_builder * b,nir_intrinsic_instr * intr,void * data)195 gather_atomic_info(nir_builder *b, nir_intrinsic_instr *intr, void *data)
196 {
197 bool *any = data;
198
199 switch (intr->intrinsic) {
200 case nir_intrinsic_global_atomic:
201 case nir_intrinsic_global_atomic_agx:
202 case nir_intrinsic_deref_atomic:
203 case nir_intrinsic_global_atomic_swap:
204 case nir_intrinsic_global_atomic_swap_agx:
205 case nir_intrinsic_deref_atomic_swap:
206 *any = true;
207 return false;
208 default:
209 return false;
210 }
211 }
212
213 /* G13X variants are only compiled when atomics are used */
214 static const char *
remap_variant(nir_function * func,unsigned variant,const char * target)215 remap_variant(nir_function *func, unsigned variant, const char *target)
216 {
217 bool has_atomic = func->pass_flags & BITFIELD_BIT(variant);
218
219 if (!has_atomic && !strcmp(target, "g13x"))
220 return "g13g";
221 else
222 return target;
223 }
224
225 static nir_def *
load_kernel_input(nir_builder * b,unsigned num_components,unsigned bit_size,unsigned offset_B)226 load_kernel_input(nir_builder *b, unsigned num_components, unsigned bit_size,
227 unsigned offset_B)
228 {
229 assert((offset_B & 1) == 0 && "half-aligned");
230 return nir_load_preamble(b, num_components, bit_size, .base = offset_B / 2);
231 }
232
233 int
main(int argc,char ** argv)234 main(int argc, char **argv)
235 {
236 if (argc != 4) {
237 fprintf(stderr, "Usage: %s [input spir-v] [output header] [output C]\n",
238 argv[0]);
239 return 1;
240 }
241
242 const char *infile = argv[1];
243 const char *outh_file = argv[2];
244 const char *outc_file = argv[3];
245
246 void *mem_ctx = ralloc_context(NULL);
247
248 int fd = open(infile, O_RDONLY);
249 if (fd < 0) {
250 fprintf(stderr, "Failed to open %s\n", infile);
251 ralloc_free(mem_ctx);
252 return 1;
253 }
254
255 off_t spirv_len = lseek(fd, 0, SEEK_END);
256 const void *spirv_map = mmap(NULL, spirv_len, PROT_READ, MAP_PRIVATE, fd, 0);
257 close(fd);
258 if (spirv_map == MAP_FAILED) {
259 fprintf(stderr, "Failed to mmap the file: errno=%d, %s\n", errno,
260 strerror(errno));
261 ralloc_free(mem_ctx);
262 return 1;
263 }
264
265 FILE *fp_h = fopen(outh_file, "w");
266 FILE *fp_c = fopen(outc_file, "w");
267 glsl_type_singleton_init_or_ref();
268
269 nir_precomp_print_header(fp_c, fp_h, "The Asahi Linux Contributors",
270 "libagx_shaders.h");
271
272 nir_shader *nir = compile(mem_ctx, spirv_map, spirv_len);
273
274 /* load_preamble works at 16-bit granularity */
275 struct nir_precomp_opts opt = {.arg_align_B = 2};
276
277 nir_foreach_entrypoint(libfunc, nir) {
278 libfunc->pass_flags = 0;
279 struct nir_precomp_layout layout =
280 nir_precomp_derive_layout(&opt, libfunc);
281 unsigned nr_vars = nir_precomp_nr_variants(libfunc);
282
283 nir_precomp_print_layout_struct(fp_h, &opt, libfunc);
284
285 for (unsigned v = 0; v < nr_vars; ++v) {
286 nir_shader *s = nir_precompiled_build_variant(
287 libfunc, v, &agx_nir_options, &opt, load_kernel_input);
288
289 agx_link_libagx(s, nir);
290
291 NIR_PASS(_, s, nir_lower_vars_to_explicit_types, nir_var_mem_shared,
292 glsl_get_cl_type_size_align);
293
294 NIR_PASS(_, s, nir_lower_explicit_io, nir_var_mem_shared,
295 nir_address_format_62bit_generic);
296
297 /* Unroll loops before lowering indirects */
298 bool progress = false;
299 do {
300 progress = false;
301 NIR_PASS(progress, s, nir_opt_loop);
302 } while (progress);
303
304 agx_preprocess_nir(s, NULL);
305
306 bool has_atomic = false;
307 nir_shader_intrinsics_pass(s, gather_atomic_info, nir_metadata_all,
308 &has_atomic);
309 if (has_atomic) {
310 libfunc->pass_flags |= BITFIELD_BIT(v);
311 }
312
313 foreach_target(target)
314 {
315 /* Skip unused variants */
316 if (strcmp(*target, remap_variant(libfunc, v, *target)))
317 continue;
318
319 struct agx_shader_part compiled;
320 bool is_helper = !strcmp(libfunc->name, "libagx_helper");
321 struct agx_shader_key key = {
322 .libagx = nir,
323 .promote_constants = !is_helper,
324 .reserved_preamble = layout.size_B / 2,
325 .is_helper = is_helper,
326 };
327
328 if (has_atomic) {
329 key.dev.needs_g13x_coherency =
330 u_tristate_make(!strcmp(*target, "g13x"));
331 }
332
333 nir_shader *clone = nir_shader_clone(NULL, s);
334 agx_compile_shader_nir(clone, &key, NULL, &compiled);
335 print_shader(fp_c, libfunc->name, *target, v, &compiled);
336 free(compiled.binary);
337 ralloc_free(clone);
338
339 assert(compiled.info.scratch_size == 0 &&
340 "internal shaders do not spill");
341
342 assert(compiled.info.preamble_scratch_size == 0 &&
343 "internal shader preambles do not spill");
344 }
345
346 ralloc_free(s);
347 }
348 }
349
350 nir_precomp_print_program_enum(fp_h, nir, "libagx");
351 nir_precomp_print_dispatch_macros(fp_h, &opt, nir);
352
353 /* For each target, generate a table mapping programs to binaries */
354 foreach_target(target)
355 {
356 nir_precomp_print_extern_binary_map(fp_h, "libagx", *target);
357 nir_precomp_print_binary_map(fp_c, nir, "libagx", *target, remap_variant);
358 }
359
360 /* Remove the NIR functions we compiled to binaries to save memory */
361 nir_remove_entrypoints(nir);
362
363 nir_precomp_print_nir(fp_c, fp_h, nir, "libagx", "nir");
364
365 glsl_type_singleton_decref();
366 fclose(fp_c);
367 fclose(fp_h);
368 ralloc_free(mem_ctx);
369 return 0;
370 }
371