• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2023 Alyssa Rosenzweig
3  * Copyright 2020 Intel Corporation
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "asahi_clc.h"
8 #include "asahi/compiler/agx_compile.h"
9 #include "asahi/compiler/agx_nir.h"
10 #include "compiler/glsl_types.h"
11 #include "compiler/spirv/nir_spirv.h"
12 #include "nir.h"
13 #include "nir_builder.h"
14 #include "nir_builder_opcodes.h"
15 #include "nir_intrinsics.h"
16 #include "nir_precompiled.h"
17 #include "shader_enums.h"
18 
19 #include <fcntl.h>
20 #include <inttypes.h>
21 #include <stdio.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include "util/macros.h"
25 #include <sys/mman.h>
26 
27 const char *targets[] = {"g13g", "g13x"};
28 
29 #define foreach_target(target)                                                 \
30    for (const char **target = &targets[0];                                     \
31         target < &targets[ARRAY_SIZE(targets)]; ++target)
32 
33 static const struct spirv_to_nir_options spirv_options = {
34    .environment = NIR_SPIRV_OPENCL,
35    .shared_addr_format = nir_address_format_62bit_generic,
36    .global_addr_format = nir_address_format_62bit_generic,
37    .temp_addr_format = nir_address_format_62bit_generic,
38    .constant_addr_format = nir_address_format_64bit_global,
39    .create_library = true,
40    .printf = true,
41 };
42 
43 /* Standard optimization loop */
44 static void
optimize(nir_shader * nir)45 optimize(nir_shader *nir)
46 {
47    bool progress;
48    do {
49       progress = false;
50 
51       NIR_PASS(progress, nir, nir_split_var_copies);
52       NIR_PASS(progress, nir, nir_split_struct_vars, nir_var_function_temp);
53       NIR_PASS(progress, nir, nir_lower_var_copies);
54       NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
55 
56       NIR_PASS(progress, nir, nir_copy_prop);
57       NIR_PASS(progress, nir, nir_opt_remove_phis);
58       NIR_PASS(progress, nir, nir_lower_phis_to_scalar, true);
59       NIR_PASS(progress, nir, nir_opt_dce);
60       NIR_PASS(progress, nir, nir_opt_dead_cf);
61       NIR_PASS(progress, nir, nir_opt_cse);
62       NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
63       NIR_PASS(progress, nir, nir_opt_phi_precision);
64       NIR_PASS(progress, nir, nir_opt_algebraic);
65       NIR_PASS(progress, nir, nir_opt_constant_folding);
66 
67       NIR_PASS(progress, nir, nir_opt_deref);
68       NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
69       NIR_PASS(progress, nir, nir_opt_undef);
70       NIR_PASS(progress, nir, nir_lower_undef_to_zero);
71 
72       NIR_PASS(progress, nir, nir_opt_shrink_vectors, true);
73       NIR_PASS(progress, nir, nir_opt_loop_unroll);
74 
75    } while (progress);
76 }
77 
78 static nir_shader *
compile(void * memctx,const uint32_t * spirv,size_t spirv_size)79 compile(void *memctx, const uint32_t *spirv, size_t spirv_size)
80 {
81    const nir_shader_compiler_options *nir_options = &agx_nir_options;
82 
83    assert(spirv_size % 4 == 0);
84    nir_shader *nir =
85       spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
86                    "library", &spirv_options, nir_options);
87    nir_validate_shader(nir, "after spirv_to_nir");
88    ralloc_steal(memctx, nir);
89 
90    nir_fixup_is_exported(nir);
91 
92    NIR_PASS(_, nir, nir_lower_system_values);
93    NIR_PASS(_, nir, nir_lower_calls_to_builtins);
94 
95    nir_lower_compute_system_values_options cs = {.global_id_is_32bit = true};
96    NIR_PASS(_, nir, nir_lower_compute_system_values, &cs);
97 
98    NIR_PASS(_, nir, nir_lower_printf,
99             &(const struct nir_lower_printf_options){
100                .hash_format_strings = true,
101             });
102 
103    /* We have to lower away local constant initializers right before we
104     * inline functions.  That way they get properly initialized at the top
105     * of the function and not at the top of its caller.
106     */
107    NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp);
108    NIR_PASS(_, nir, nir_lower_returns);
109    NIR_PASS(_, nir, nir_inline_functions);
110    nir_remove_non_exported(nir);
111    NIR_PASS(_, nir, nir_copy_prop);
112    NIR_PASS(_, nir, nir_opt_deref);
113 
114    /* We can't deal with constant data, get rid of it */
115    nir_lower_constant_to_temp(nir);
116 
117    /* We can go ahead and lower the rest of the constant initializers.  We do
118     * this here so that nir_remove_dead_variables and split_per_member_structs
119     * below see the corresponding stores.
120     */
121    NIR_PASS(_, nir, nir_lower_variable_initializers, ~0);
122 
123    /* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B
124     * aligned and so it can just read/write them as vec4s.  This results in a
125     * LOT of vec4->vec3 casts on loads and stores.  One solution to this
126     * problem is to get rid of all vec3 variables.
127     */
128    NIR_PASS(_, nir, nir_lower_vec3_to_vec4,
129             nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
130                nir_var_mem_global | nir_var_mem_constant);
131 
132    /* We assign explicit types early so that the optimizer can take advantage
133     * of that information and hopefully get rid of some of our memcpys.
134     */
135    NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
136             nir_var_uniform | nir_var_shader_temp | nir_var_function_temp |
137                nir_var_mem_shared | nir_var_mem_global,
138             glsl_get_cl_type_size_align);
139 
140    optimize(nir);
141 
142    NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_all, NULL);
143 
144    /* Lower again, this time after dead-variables to get more compact variable
145     * layouts.
146     */
147    NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
148             nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
149                nir_var_mem_global | nir_var_mem_constant,
150             glsl_get_cl_type_size_align);
151    assert(nir->constant_data_size == 0);
152 
153    NIR_PASS(_, nir, nir_lower_memcpy);
154 
155    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_constant,
156             nir_address_format_64bit_global);
157 
158    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_uniform,
159             nir_address_format_32bit_offset_as_64bit);
160 
161    /* Note: we cannot lower explicit I/O here, because we need derefs in tact
162     * for function calls into the library to work.
163     */
164 
165    NIR_PASS(_, nir, nir_lower_convert_alu_types, NULL);
166    NIR_PASS(_, nir, nir_opt_if, 0);
167    NIR_PASS(_, nir, nir_opt_idiv_const, 16);
168 
169    NIR_PASS(_, nir, agx_nir_lower_texture_early, false /* support_lod_bias */);
170    NIR_PASS(_, nir, agx_nir_lower_texture);
171    NIR_PASS(_, nir, agx_nir_lower_multisampled_image_store);
172 
173    optimize(nir);
174 
175    return nir;
176 }
177 
178 static void
print_shader(FILE * fp,const char * name,const char * suffix,uint32_t variant,struct agx_shader_part * p)179 print_shader(FILE *fp, const char *name, const char *suffix, uint32_t variant,
180              struct agx_shader_part *p)
181 {
182    struct agx_precompiled_kernel_info info = agx_compact_kernel_info(&p->info);
183    size_t sz_B = sizeof(info) + p->info.binary_size;
184    size_t sz_el = DIV_ROUND_UP(sz_B, 4);
185    uint32_t *mem = calloc(sz_el, 4);
186 
187    memcpy(mem, &info, sizeof(info));
188    memcpy((uint8_t *)mem + sizeof(info), p->binary, p->info.binary_size);
189 
190    nir_precomp_print_blob(fp, name, suffix, variant, mem, sz_B, true);
191    free(mem);
192 }
193 
194 static bool
gather_atomic_info(nir_builder * b,nir_intrinsic_instr * intr,void * data)195 gather_atomic_info(nir_builder *b, nir_intrinsic_instr *intr, void *data)
196 {
197    bool *any = data;
198 
199    switch (intr->intrinsic) {
200    case nir_intrinsic_global_atomic:
201    case nir_intrinsic_global_atomic_agx:
202    case nir_intrinsic_deref_atomic:
203    case nir_intrinsic_global_atomic_swap:
204    case nir_intrinsic_global_atomic_swap_agx:
205    case nir_intrinsic_deref_atomic_swap:
206       *any = true;
207       return false;
208    default:
209       return false;
210    }
211 }
212 
213 /* G13X variants are only compiled when atomics are used */
214 static const char *
remap_variant(nir_function * func,unsigned variant,const char * target)215 remap_variant(nir_function *func, unsigned variant, const char *target)
216 {
217    bool has_atomic = func->pass_flags & BITFIELD_BIT(variant);
218 
219    if (!has_atomic && !strcmp(target, "g13x"))
220       return "g13g";
221    else
222       return target;
223 }
224 
225 static nir_def *
load_kernel_input(nir_builder * b,unsigned num_components,unsigned bit_size,unsigned offset_B)226 load_kernel_input(nir_builder *b, unsigned num_components, unsigned bit_size,
227                   unsigned offset_B)
228 {
229    assert((offset_B & 1) == 0 && "half-aligned");
230    return nir_load_preamble(b, num_components, bit_size, .base = offset_B / 2);
231 }
232 
233 int
main(int argc,char ** argv)234 main(int argc, char **argv)
235 {
236    if (argc != 4) {
237       fprintf(stderr, "Usage: %s [input spir-v] [output header] [output C]\n",
238               argv[0]);
239       return 1;
240    }
241 
242    const char *infile = argv[1];
243    const char *outh_file = argv[2];
244    const char *outc_file = argv[3];
245 
246    void *mem_ctx = ralloc_context(NULL);
247 
248    int fd = open(infile, O_RDONLY);
249    if (fd < 0) {
250       fprintf(stderr, "Failed to open %s\n", infile);
251       ralloc_free(mem_ctx);
252       return 1;
253    }
254 
255    off_t spirv_len = lseek(fd, 0, SEEK_END);
256    const void *spirv_map = mmap(NULL, spirv_len, PROT_READ, MAP_PRIVATE, fd, 0);
257    close(fd);
258    if (spirv_map == MAP_FAILED) {
259       fprintf(stderr, "Failed to mmap the file: errno=%d, %s\n", errno,
260               strerror(errno));
261       ralloc_free(mem_ctx);
262       return 1;
263    }
264 
265    FILE *fp_h = fopen(outh_file, "w");
266    FILE *fp_c = fopen(outc_file, "w");
267    glsl_type_singleton_init_or_ref();
268 
269    nir_precomp_print_header(fp_c, fp_h, "The Asahi Linux Contributors",
270                             "libagx_shaders.h");
271 
272    nir_shader *nir = compile(mem_ctx, spirv_map, spirv_len);
273 
274    /* load_preamble works at 16-bit granularity */
275    struct nir_precomp_opts opt = {.arg_align_B = 2};
276 
277    nir_foreach_entrypoint(libfunc, nir) {
278       libfunc->pass_flags = 0;
279       struct nir_precomp_layout layout =
280          nir_precomp_derive_layout(&opt, libfunc);
281       unsigned nr_vars = nir_precomp_nr_variants(libfunc);
282 
283       nir_precomp_print_layout_struct(fp_h, &opt, libfunc);
284 
285       for (unsigned v = 0; v < nr_vars; ++v) {
286          nir_shader *s = nir_precompiled_build_variant(
287             libfunc, v, &agx_nir_options, &opt, load_kernel_input);
288 
289          agx_link_libagx(s, nir);
290 
291          NIR_PASS(_, s, nir_lower_vars_to_explicit_types, nir_var_mem_shared,
292                   glsl_get_cl_type_size_align);
293 
294          NIR_PASS(_, s, nir_lower_explicit_io, nir_var_mem_shared,
295                   nir_address_format_62bit_generic);
296 
297          /* Unroll loops before lowering indirects */
298          bool progress = false;
299          do {
300             progress = false;
301             NIR_PASS(progress, s, nir_opt_loop);
302          } while (progress);
303 
304          agx_preprocess_nir(s, NULL);
305 
306          bool has_atomic = false;
307          nir_shader_intrinsics_pass(s, gather_atomic_info, nir_metadata_all,
308                                     &has_atomic);
309          if (has_atomic) {
310             libfunc->pass_flags |= BITFIELD_BIT(v);
311          }
312 
313          foreach_target(target)
314          {
315             /* Skip unused variants */
316             if (strcmp(*target, remap_variant(libfunc, v, *target)))
317                continue;
318 
319             struct agx_shader_part compiled;
320             bool is_helper = !strcmp(libfunc->name, "libagx_helper");
321             struct agx_shader_key key = {
322                .libagx = nir,
323                .promote_constants = !is_helper,
324                .reserved_preamble = layout.size_B / 2,
325                .is_helper = is_helper,
326             };
327 
328             if (has_atomic) {
329                key.dev.needs_g13x_coherency =
330                   u_tristate_make(!strcmp(*target, "g13x"));
331             }
332 
333             nir_shader *clone = nir_shader_clone(NULL, s);
334             agx_compile_shader_nir(clone, &key, NULL, &compiled);
335             print_shader(fp_c, libfunc->name, *target, v, &compiled);
336             free(compiled.binary);
337             ralloc_free(clone);
338 
339             assert(compiled.info.scratch_size == 0 &&
340                    "internal shaders do not spill");
341 
342             assert(compiled.info.preamble_scratch_size == 0 &&
343                    "internal shader preambles do not spill");
344          }
345 
346          ralloc_free(s);
347       }
348    }
349 
350    nir_precomp_print_program_enum(fp_h, nir, "libagx");
351    nir_precomp_print_dispatch_macros(fp_h, &opt, nir);
352 
353    /* For each target, generate a table mapping programs to binaries */
354    foreach_target(target)
355    {
356       nir_precomp_print_extern_binary_map(fp_h, "libagx", *target);
357       nir_precomp_print_binary_map(fp_c, nir, "libagx", *target, remap_variant);
358    }
359 
360    /* Remove the NIR functions we compiled to binaries to save memory */
361    nir_remove_entrypoints(nir);
362 
363    nir_precomp_print_nir(fp_c, fp_h, nir, "libagx", "nir");
364 
365    glsl_type_singleton_decref();
366    fclose(fp_c);
367    fclose(fp_h);
368    ralloc_free(mem_ctx);
369    return 0;
370 }
371