• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "brw_kernel.h"
25 #include "brw_nir.h"
26 #include "elk/elk_nir_options.h"
27 #include "intel_nir.h"
28 
29 #include "intel_nir.h"
30 #include "nir_clc_helpers.h"
31 #include "compiler/nir/nir_builder.h"
32 #include "compiler/spirv/nir_spirv.h"
33 #include "dev/intel_debug.h"
34 #include "util/u_atomic.h"
35 #include "util/u_dynarray.h"
36 
37 static const nir_shader *
load_clc_shader(struct brw_compiler * compiler,struct disk_cache * disk_cache,const nir_shader_compiler_options * nir_options,const struct spirv_to_nir_options * spirv_options)38 load_clc_shader(struct brw_compiler *compiler, struct disk_cache *disk_cache,
39                 const nir_shader_compiler_options *nir_options,
40                 const struct spirv_to_nir_options *spirv_options)
41 {
42    if (compiler->clc_shader)
43       return compiler->clc_shader;
44 
45    nir_shader *nir =  nir_load_libclc_shader(64, disk_cache,
46                                              spirv_options, nir_options,
47                                              disk_cache != NULL);
48    if (nir == NULL)
49       return NULL;
50 
51    const nir_shader *old_nir =
52       p_atomic_cmpxchg(&compiler->clc_shader, NULL, nir);
53    if (old_nir == NULL) {
54       /* We won the race */
55       ralloc_steal(compiler, nir);
56       return nir;
57    } else {
58       /* Someone else built the shader first */
59       ralloc_free(nir);
60       return old_nir;
61    }
62 }
63 
64 static nir_builder
builder_init_new_impl(nir_function * func)65 builder_init_new_impl(nir_function *func)
66 {
67    nir_function_impl *impl = nir_function_impl_create(func);
68    return nir_builder_at(nir_before_impl(impl));
69 }
70 
71 static void
implement_atomic_builtin(nir_function * func,nir_atomic_op atomic_op,enum glsl_base_type data_base_type,nir_variable_mode mode)72 implement_atomic_builtin(nir_function *func, nir_atomic_op atomic_op,
73                          enum glsl_base_type data_base_type,
74                          nir_variable_mode mode)
75 {
76    nir_builder b = builder_init_new_impl(func);
77    const struct glsl_type *data_type = glsl_scalar_type(data_base_type);
78 
79    unsigned p = 0;
80 
81    nir_deref_instr *ret = NULL;
82    ret = nir_build_deref_cast(&b, nir_load_param(&b, p++),
83                               nir_var_function_temp, data_type, 0);
84 
85    nir_intrinsic_op op = nir_intrinsic_deref_atomic;
86    nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b.shader, op);
87    nir_intrinsic_set_atomic_op(atomic, atomic_op);
88 
89    for (unsigned i = 0; i < nir_intrinsic_infos[op].num_srcs; i++) {
90       nir_def *src = nir_load_param(&b, p++);
91       if (i == 0) {
92          /* The first source is our deref */
93          assert(nir_intrinsic_infos[op].src_components[i] == -1);
94          src = &nir_build_deref_cast(&b, src, mode, data_type, 0)->def;
95       }
96       atomic->src[i] = nir_src_for_ssa(src);
97    }
98 
99    nir_def_init_for_type(&atomic->instr, &atomic->def, data_type);
100 
101    nir_builder_instr_insert(&b, &atomic->instr);
102    nir_store_deref(&b, ret, &atomic->def, ~0);
103 }
104 
105 static void
implement_sub_group_ballot_builtin(nir_function * func)106 implement_sub_group_ballot_builtin(nir_function *func)
107 {
108    nir_builder b = builder_init_new_impl(func);
109    nir_deref_instr *ret =
110       nir_build_deref_cast(&b, nir_load_param(&b, 0),
111                            nir_var_function_temp, glsl_uint_type(), 0);
112    nir_def *cond = nir_load_param(&b, 1);
113 
114    nir_intrinsic_instr *ballot =
115       nir_intrinsic_instr_create(b.shader, nir_intrinsic_ballot);
116    ballot->src[0] = nir_src_for_ssa(cond);
117    ballot->num_components = 1;
118    nir_def_init(&ballot->instr, &ballot->def, 1, 32);
119    nir_builder_instr_insert(&b, &ballot->instr);
120 
121    nir_store_deref(&b, ret, &ballot->def, ~0);
122 }
123 
124 static bool
implement_intel_builtins(nir_shader * nir)125 implement_intel_builtins(nir_shader *nir)
126 {
127    bool progress = false;
128 
129    nir_foreach_function(func, nir) {
130       if (strcmp(func->name, "_Z10atomic_minPU3AS1Vff") == 0) {
131          /* float atom_min(__global float volatile *p, float val) */
132          implement_atomic_builtin(func, nir_atomic_op_fmin,
133                                   GLSL_TYPE_FLOAT, nir_var_mem_global);
134          progress = true;
135       } else if (strcmp(func->name, "_Z10atomic_maxPU3AS1Vff") == 0) {
136          /* float atom_max(__global float volatile *p, float val) */
137          implement_atomic_builtin(func, nir_atomic_op_fmax,
138                                   GLSL_TYPE_FLOAT, nir_var_mem_global);
139          progress = true;
140       } else if (strcmp(func->name, "_Z10atomic_minPU3AS3Vff") == 0) {
141          /* float atomic_min(__shared float volatile *, float) */
142          implement_atomic_builtin(func, nir_atomic_op_fmin,
143                                   GLSL_TYPE_FLOAT, nir_var_mem_shared);
144          progress = true;
145       } else if (strcmp(func->name, "_Z10atomic_maxPU3AS3Vff") == 0) {
146          /* float atomic_max(__shared float volatile *, float) */
147          implement_atomic_builtin(func, nir_atomic_op_fmax,
148                                   GLSL_TYPE_FLOAT, nir_var_mem_shared);
149          progress = true;
150       } else if (strcmp(func->name, "intel_sub_group_ballot") == 0) {
151          implement_sub_group_ballot_builtin(func);
152          progress = true;
153       }
154    }
155 
156    nir_shader_preserve_all_metadata(nir);
157 
158    return progress;
159 }
160 
161 static bool
lower_kernel_intrinsics(nir_shader * nir)162 lower_kernel_intrinsics(nir_shader *nir)
163 {
164    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
165 
166    bool progress = false;
167 
168    unsigned kernel_sysvals_start = 0;
169    unsigned kernel_arg_start = sizeof(struct brw_kernel_sysvals);
170    nir->num_uniforms += kernel_arg_start;
171 
172    nir_builder b = nir_builder_create(impl);
173 
174    nir_foreach_block(block, impl) {
175       nir_foreach_instr_safe(instr, block) {
176          if (instr->type != nir_instr_type_intrinsic)
177             continue;
178 
179          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
180          switch (intrin->intrinsic) {
181          case nir_intrinsic_load_kernel_input: {
182             b.cursor = nir_instr_remove(&intrin->instr);
183 
184             nir_intrinsic_instr *load =
185                nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
186             load->num_components = intrin->num_components;
187             load->src[0] = nir_src_for_ssa(nir_u2u32(&b, intrin->src[0].ssa));
188             nir_intrinsic_set_base(load, kernel_arg_start);
189             nir_intrinsic_set_range(load, nir->num_uniforms);
190             nir_def_init(&load->instr, &load->def,
191                          intrin->def.num_components,
192                          intrin->def.bit_size);
193             nir_builder_instr_insert(&b, &load->instr);
194 
195             nir_def_rewrite_uses(&intrin->def, &load->def);
196             progress = true;
197             break;
198          }
199 
200          case nir_intrinsic_load_constant_base_ptr: {
201             b.cursor = nir_instr_remove(&intrin->instr);
202             nir_def *const_data_base_addr = nir_pack_64_2x32_split(&b,
203                nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW),
204                nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
205             nir_def_rewrite_uses(&intrin->def, const_data_base_addr);
206             progress = true;
207             break;
208          }
209 
210          case nir_intrinsic_load_num_workgroups: {
211             b.cursor = nir_instr_remove(&intrin->instr);
212 
213             nir_intrinsic_instr *load =
214                nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
215             load->num_components = 3;
216             load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
217             nir_intrinsic_set_base(load, kernel_sysvals_start +
218                offsetof(struct brw_kernel_sysvals, num_work_groups));
219             nir_intrinsic_set_range(load, 3 * 4);
220             nir_def_init(&load->instr, &load->def, 3, 32);
221             nir_builder_instr_insert(&b, &load->instr);
222             nir_def_rewrite_uses(&intrin->def, &load->def);
223             progress = true;
224             break;
225          }
226 
227          default:
228             break;
229          }
230       }
231    }
232 
233    if (progress) {
234       nir_metadata_preserve(impl, nir_metadata_block_index |
235                                   nir_metadata_dominance);
236    } else {
237       nir_metadata_preserve(impl, nir_metadata_all);
238    }
239 
240    return progress;
241 }
242 
243 bool
brw_kernel_from_spirv(struct brw_compiler * compiler,struct disk_cache * disk_cache,struct brw_kernel * kernel,void * log_data,void * mem_ctx,const uint32_t * spirv,size_t spirv_size,const char * entrypoint_name,char ** error_str)244 brw_kernel_from_spirv(struct brw_compiler *compiler,
245                       struct disk_cache *disk_cache,
246                       struct brw_kernel *kernel,
247                       void *log_data, void *mem_ctx,
248                       const uint32_t *spirv, size_t spirv_size,
249                       const char *entrypoint_name,
250                       char **error_str)
251 {
252    const struct intel_device_info *devinfo = compiler->devinfo;
253    const nir_shader_compiler_options *nir_options =
254       compiler->nir_options[MESA_SHADER_KERNEL];
255 
256    struct spirv_to_nir_options spirv_options = {
257       .environment = NIR_SPIRV_OPENCL,
258       .caps = {
259          .address = true,
260          .float16 = true,
261          .float64 = true,
262          .groups = true,
263          .image_write_without_format = true,
264          .int8 = true,
265          .int16 = true,
266          .int64 = true,
267          .int64_atomics = true,
268          .kernel = true,
269          .linkage = true, /* We receive linked kernel from clc */
270          .float_controls = true,
271          .generic_pointers = true,
272          .storage_8bit = true,
273          .storage_16bit = true,
274          .subgroup_arithmetic = true,
275          .subgroup_basic = true,
276          .subgroup_ballot = true,
277          .subgroup_dispatch = true,
278          .subgroup_quad = true,
279          .subgroup_shuffle = true,
280          .subgroup_vote = true,
281 
282          .intel_subgroup_shuffle = true,
283          .intel_subgroup_buffer_block_io = true,
284       },
285       .shared_addr_format = nir_address_format_62bit_generic,
286       .global_addr_format = nir_address_format_62bit_generic,
287       .temp_addr_format = nir_address_format_62bit_generic,
288       .constant_addr_format = nir_address_format_64bit_global,
289    };
290 
291    spirv_options.clc_shader = load_clc_shader(compiler, disk_cache,
292                                               nir_options, &spirv_options);
293    if (spirv_options.clc_shader == NULL) {
294       fprintf(stderr, "ERROR: libclc shader missing."
295               " Consider installing the libclc package\n");
296       abort();
297    }
298 
299    assert(spirv_size % 4 == 0);
300    nir_shader *nir =
301       spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
302                    entrypoint_name, &spirv_options, nir_options);
303    nir_validate_shader(nir, "after spirv_to_nir");
304    nir_validate_ssa_dominance(nir, "after spirv_to_nir");
305    ralloc_steal(mem_ctx, nir);
306    nir->info.name = ralloc_strdup(nir, entrypoint_name);
307 
308    if (INTEL_DEBUG(DEBUG_CS)) {
309       /* Re-index SSA defs so we print more sensible numbers. */
310       nir_foreach_function_impl(impl, nir) {
311          nir_index_ssa_defs(impl);
312       }
313 
314       fprintf(stderr, "NIR (from SPIR-V) for kernel\n");
315       nir_print_shader(nir, stderr);
316    }
317 
318    NIR_PASS_V(nir, implement_intel_builtins);
319    NIR_PASS_V(nir, nir_link_shader_functions, spirv_options.clc_shader);
320 
321    /* We have to lower away local constant initializers right before we
322     * inline functions.  That way they get properly initialized at the top
323     * of the function and not at the top of its caller.
324     */
325    NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
326    NIR_PASS_V(nir, nir_lower_returns);
327    NIR_PASS_V(nir, nir_inline_functions);
328    NIR_PASS_V(nir, nir_copy_prop);
329    NIR_PASS_V(nir, nir_opt_deref);
330 
331    /* Pick off the single entrypoint that we want */
332    nir_remove_non_entrypoints(nir);
333 
334    /* Now that we've deleted all but the main function, we can go ahead and
335     * lower the rest of the constant initializers.  We do this here so that
336     * nir_remove_dead_variables and split_per_member_structs below see the
337     * corresponding stores.
338     */
339    NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
340 
341    /* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B
342     * aligned and so it can just read/write them as vec4s.  This results in a
343     * LOT of vec4->vec3 casts on loads and stores.  One solution to this
344     * problem is to get rid of all vec3 variables.
345     */
346    NIR_PASS_V(nir, nir_lower_vec3_to_vec4,
347               nir_var_shader_temp | nir_var_function_temp |
348               nir_var_mem_shared | nir_var_mem_global|
349               nir_var_mem_constant);
350 
351    /* We assign explicit types early so that the optimizer can take advantage
352     * of that information and hopefully get rid of some of our memcpys.
353     */
354    NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
355               nir_var_uniform |
356               nir_var_shader_temp | nir_var_function_temp |
357               nir_var_mem_shared | nir_var_mem_global,
358               glsl_get_cl_type_size_align);
359 
360    struct brw_nir_compiler_opts opts = {};
361    brw_preprocess_nir(compiler, nir, &opts);
362 
363    int max_arg_idx = -1;
364    nir_foreach_uniform_variable(var, nir) {
365       assert(var->data.location < 256);
366       max_arg_idx = MAX2(max_arg_idx, var->data.location);
367    }
368 
369    kernel->args_size = nir->num_uniforms;
370    kernel->arg_count = max_arg_idx + 1;
371 
372    /* No bindings */
373    struct brw_kernel_arg_desc *args =
374       rzalloc_array(mem_ctx, struct brw_kernel_arg_desc, kernel->arg_count);
375    kernel->args = args;
376 
377    nir_foreach_uniform_variable(var, nir) {
378       struct brw_kernel_arg_desc arg_desc = {
379          .offset = var->data.driver_location,
380          .size = glsl_get_explicit_size(var->type, false),
381       };
382       assert(arg_desc.offset + arg_desc.size <= nir->num_uniforms);
383 
384       assert(var->data.location >= 0);
385       args[var->data.location] = arg_desc;
386    }
387 
388    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_all, NULL);
389 
390    /* Lower again, this time after dead-variables to get more compact variable
391     * layouts.
392     */
393    nir->global_mem_size = 0;
394    nir->scratch_size = 0;
395    nir->info.shared_size = 0;
396    NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
397               nir_var_shader_temp | nir_var_function_temp |
398               nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant,
399               glsl_get_cl_type_size_align);
400    if (nir->constant_data_size > 0) {
401       assert(nir->constant_data == NULL);
402       nir->constant_data = rzalloc_size(nir, nir->constant_data_size);
403       nir_gather_explicit_io_initializers(nir, nir->constant_data,
404                                           nir->constant_data_size,
405                                           nir_var_mem_constant);
406    }
407 
408    if (INTEL_DEBUG(DEBUG_CS)) {
409       /* Re-index SSA defs so we print more sensible numbers. */
410       nir_foreach_function_impl(impl, nir) {
411          nir_index_ssa_defs(impl);
412       }
413 
414       fprintf(stderr, "NIR (before I/O lowering) for kernel\n");
415       nir_print_shader(nir, stderr);
416    }
417 
418    NIR_PASS_V(nir, nir_lower_memcpy);
419 
420    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
421               nir_address_format_64bit_global);
422 
423    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform,
424               nir_address_format_32bit_offset_as_64bit);
425 
426    NIR_PASS_V(nir, nir_lower_explicit_io,
427               nir_var_shader_temp | nir_var_function_temp |
428               nir_var_mem_shared | nir_var_mem_global,
429               nir_address_format_62bit_generic);
430 
431    NIR_PASS_V(nir, nir_lower_convert_alu_types, NULL);
432 
433    NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, devinfo, NULL);
434    NIR_PASS_V(nir, lower_kernel_intrinsics);
435 
436    struct brw_cs_prog_key key = { };
437 
438    memset(&kernel->prog_data, 0, sizeof(kernel->prog_data));
439    kernel->prog_data.base.nr_params = DIV_ROUND_UP(nir->num_uniforms, 4);
440 
441    struct brw_compile_cs_params params = {
442       .base = {
443          .nir = nir,
444          .stats = kernel->stats,
445          .log_data = log_data,
446          .mem_ctx = mem_ctx,
447       },
448       .key = &key,
449       .prog_data = &kernel->prog_data,
450    };
451 
452    kernel->code = brw_compile_cs(compiler, &params);
453 
454    if (error_str)
455       *error_str = params.base.error_str;
456 
457    return kernel->code != NULL;
458 }
459 
460 static nir_def *
rebuild_value_from_store(struct util_dynarray * stores,nir_def * value,unsigned read_offset)461 rebuild_value_from_store(struct util_dynarray *stores,
462                          nir_def *value, unsigned read_offset)
463 {
464    unsigned read_size = value->num_components * value->bit_size / 8;
465 
466    util_dynarray_foreach(stores, nir_intrinsic_instr *, _store) {
467       nir_intrinsic_instr *store = *_store;
468 
469       unsigned write_offset = nir_src_as_uint(store->src[1]);
470       unsigned write_size = nir_src_num_components(store->src[0]) *
471                             nir_src_bit_size(store->src[0]) / 8;
472       if (write_offset <= read_offset &&
473           (write_offset + write_size) >= (read_offset + read_size)) {
474          assert(nir_block_dominates(store->instr.block, value->parent_instr->block));
475          assert(write_size == read_size);
476          return store->src[0].ssa;
477       }
478    }
479    unreachable("Matching scratch store not found");
480 }
481 
482 /**
483  * Remove temporary variables stored to scratch to be then reloaded
484  * immediately. Remap the load to the store SSA value.
485  *
486  * This workaround is only meant to be applied to shaders in src/intel/shaders
487  * were we know there should be no issue. More complex cases might not work
488  * with this approach.
489  */
490 static bool
nir_remove_llvm17_scratch(nir_shader * nir)491 nir_remove_llvm17_scratch(nir_shader *nir)
492 {
493    struct util_dynarray scratch_stores;
494    void *mem_ctx = ralloc_context(NULL);
495 
496    util_dynarray_init(&scratch_stores, mem_ctx);
497 
498    nir_foreach_function_impl(func, nir) {
499       nir_foreach_block(block, func) {
500          nir_foreach_instr(instr, block) {
501             if (instr->type != nir_instr_type_intrinsic)
502                continue;
503 
504             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
505 
506             if (intrin->intrinsic != nir_intrinsic_store_scratch)
507                continue;
508 
509             nir_const_value *offset = nir_src_as_const_value(intrin->src[1]);
510             if (offset != NULL) {
511                util_dynarray_append(&scratch_stores, nir_intrinsic_instr *, intrin);
512             }
513          }
514       }
515    }
516 
517    bool progress = false;
518    if (util_dynarray_num_elements(&scratch_stores, nir_intrinsic_instr *) > 0) {
519       nir_foreach_function_impl(func, nir) {
520          nir_foreach_block(block, func) {
521             nir_foreach_instr_safe(instr, block) {
522                if (instr->type != nir_instr_type_intrinsic)
523                   continue;
524 
525                nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
526 
527                if (intrin->intrinsic != nir_intrinsic_load_scratch)
528                   continue;
529 
530                nir_const_value *offset = nir_src_as_const_value(intrin->src[0]);
531                if (offset == NULL)
532                   continue;
533 
534                nir_def_rewrite_uses(&intrin->def,
535                                     rebuild_value_from_store(
536                                        &scratch_stores, &intrin->def,
537                                        nir_src_as_uint(intrin->src[0])));
538                nir_instr_remove(instr);
539 
540                progress = true;
541             }
542          }
543       }
544    }
545 
546    util_dynarray_foreach(&scratch_stores, nir_intrinsic_instr *, _store) {
547       nir_intrinsic_instr *store = *_store;
548       nir_instr_remove(&store->instr);
549    }
550 
551    /* Quick sanity check */
552    assert(util_dynarray_num_elements(&scratch_stores, nir_intrinsic_instr *) == 0 ||
553           progress);
554 
555    ralloc_free(mem_ctx);
556 
557    return progress;
558 }
559 
560 static void
cleanup_llvm17_scratch(nir_shader * nir)561 cleanup_llvm17_scratch(nir_shader *nir)
562 {
563    {
564       bool progress;
565       do {
566          progress = false;
567          NIR_PASS(progress, nir, nir_copy_prop);
568          NIR_PASS(progress, nir, nir_opt_dce);
569          NIR_PASS(progress, nir, nir_opt_constant_folding);
570          NIR_PASS(progress, nir, nir_opt_cse);
571          NIR_PASS(progress, nir, nir_opt_algebraic);
572       } while (progress);
573    }
574 
575    nir_remove_llvm17_scratch(nir);
576 
577    {
578       bool progress;
579       do {
580          progress = false;
581          NIR_PASS(progress, nir, nir_copy_prop);
582          NIR_PASS(progress, nir, nir_opt_dce);
583          NIR_PASS(progress, nir, nir_opt_constant_folding);
584          NIR_PASS(progress, nir, nir_opt_cse);
585          NIR_PASS(progress, nir, nir_opt_algebraic);
586       } while (progress);
587    }
588 }
589 
590 nir_shader *
brw_nir_from_spirv(void * mem_ctx,unsigned gfx_version,const uint32_t * spirv,size_t spirv_size,bool llvm17_wa)591 brw_nir_from_spirv(void *mem_ctx, unsigned gfx_version, const uint32_t *spirv,
592                    size_t spirv_size, bool llvm17_wa)
593 {
594    struct spirv_to_nir_options spirv_options = {
595       .environment = NIR_SPIRV_OPENCL,
596       .caps = {
597          .address = true,
598          .groups = true,
599          .image_write_without_format = true,
600          .int8 = true,
601          .int16 = true,
602          .int64 = true,
603          .int64_atomics = true,
604          .kernel = true,
605          .linkage = true, /* We receive linked kernel from clc */
606          .float_controls = true,
607          .generic_pointers = true,
608          .storage_8bit = true,
609          .storage_16bit = true,
610          .subgroup_arithmetic = true,
611          .subgroup_basic = true,
612          .subgroup_ballot = true,
613          .subgroup_dispatch = true,
614          .subgroup_quad = true,
615          .subgroup_shuffle = true,
616          .subgroup_vote = true,
617 
618          .intel_subgroup_shuffle = true,
619          .intel_subgroup_buffer_block_io = true,
620       },
621       .shared_addr_format = nir_address_format_62bit_generic,
622       .global_addr_format = nir_address_format_62bit_generic,
623       .temp_addr_format = nir_address_format_62bit_generic,
624       .constant_addr_format = nir_address_format_64bit_global,
625       .create_library = true,
626    };
627 
628    assert(spirv_size % 4 == 0);
629 
630    assert(gfx_version);
631    const nir_shader_compiler_options *nir_options =
632       gfx_version >= 9 ? &brw_scalar_nir_options
633                        : &elk_scalar_nir_options;
634 
635    nir_shader *nir =
636       spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
637                    "library", &spirv_options, nir_options);
638    nir_validate_shader(nir, "after spirv_to_nir");
639    nir_validate_ssa_dominance(nir, "after spirv_to_nir");
640    ralloc_steal(mem_ctx, nir);
641    nir->info.name = ralloc_strdup(nir, "library");
642 
643    if (INTEL_DEBUG(DEBUG_CS)) {
644       /* Re-index SSA defs so we print more sensible numbers. */
645       nir_foreach_function_impl(impl, nir) {
646          nir_index_ssa_defs(impl);
647       }
648 
649       fprintf(stderr, "NIR (from SPIR-V) for kernel\n");
650       nir_print_shader(nir, stderr);
651    }
652 
653    NIR_PASS_V(nir, implement_intel_builtins);
654    NIR_PASS_V(nir, nir_link_shader_functions, spirv_options.clc_shader);
655 
656    /* We have to lower away local constant initializers right before we
657     * inline functions.  That way they get properly initialized at the top
658     * of the function and not at the top of its caller.
659     */
660    NIR_PASS_V(nir, nir_lower_variable_initializers, ~(nir_var_shader_temp |
661                                                       nir_var_function_temp));
662    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo |
663               nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL);
664    {
665       bool progress;
666       do
667       {
668          progress = false;
669          NIR_PASS(progress, nir, nir_copy_prop);
670          NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
671          NIR_PASS(progress, nir, nir_opt_deref);
672          NIR_PASS(progress, nir, nir_opt_dce);
673          NIR_PASS(progress, nir, nir_opt_undef);
674          NIR_PASS(progress, nir, nir_opt_constant_folding);
675          NIR_PASS(progress, nir, nir_opt_cse);
676          NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
677          NIR_PASS(progress, nir, nir_opt_algebraic);
678       } while (progress);
679    }
680 
681    NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
682    NIR_PASS_V(nir, nir_lower_returns);
683    NIR_PASS_V(nir, nir_inline_functions);
684 
685    assert(nir->scratch_size == 0);
686    NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align);
687 
688    {
689       bool progress;
690       do
691       {
692          progress = false;
693          NIR_PASS(progress, nir, nir_copy_prop);
694          NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
695          NIR_PASS(progress, nir, nir_opt_deref);
696          NIR_PASS(progress, nir, nir_opt_dce);
697          NIR_PASS(progress, nir, nir_opt_undef);
698          NIR_PASS(progress, nir, nir_opt_constant_folding);
699          NIR_PASS(progress, nir, nir_opt_cse);
700          NIR_PASS(progress, nir, nir_split_var_copies);
701          NIR_PASS(progress, nir, nir_lower_var_copies);
702          NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
703          NIR_PASS(progress, nir, nir_opt_algebraic);
704          NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
705          NIR_PASS(progress, nir, nir_opt_dead_cf);
706          NIR_PASS(progress, nir, nir_opt_remove_phis);
707          NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
708          NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform);
709          NIR_PASS(progress, nir, nir_opt_memcpy);
710       } while (progress);
711    }
712 
713    NIR_PASS_V(nir, nir_scale_fdiv);
714 
715    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo |
716               nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL);
717 
718 
719    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | nir_var_function_temp, NULL);
720 
721    nir->scratch_size = 0;
722    NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
723               nir_var_mem_shared | nir_var_function_temp | nir_var_shader_temp |
724               nir_var_mem_global | nir_var_mem_constant,
725               glsl_get_cl_type_size_align);
726 
727    // Lower memcpy - needs to wait until types are sized
728    {
729       bool progress;
730       do {
731          progress = false;
732          NIR_PASS(progress, nir, nir_opt_memcpy);
733          NIR_PASS(progress, nir, nir_copy_prop);
734          NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
735          NIR_PASS(progress, nir, nir_opt_deref);
736          NIR_PASS(progress, nir, nir_opt_dce);
737          NIR_PASS(progress, nir, nir_split_var_copies);
738          NIR_PASS(progress, nir, nir_lower_var_copies);
739          NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
740          NIR_PASS(progress, nir, nir_opt_constant_folding);
741          NIR_PASS(progress, nir, nir_opt_cse);
742       } while (progress);
743    }
744    NIR_PASS_V(nir, nir_lower_memcpy);
745 
746    NIR_PASS_V(nir, nir_lower_explicit_io,
747               nir_var_mem_shared | nir_var_function_temp | nir_var_shader_temp | nir_var_uniform,
748               nir_address_format_32bit_offset_as_64bit);
749 
750    NIR_PASS_V(nir, nir_lower_system_values);
751 
752    /* Hopefully we can drop this once lower_vars_to_ssa has improved to not
753     * lower everything to scratch.
754     */
755    if (llvm17_wa)
756       cleanup_llvm17_scratch(nir);
757 
758    /* Lower again, this time after dead-variables to get more compact variable
759     * layouts.
760     */
761    nir->global_mem_size = 0;
762    nir->scratch_size = 0;
763    nir->info.shared_size = 0;
764    NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
765               nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant,
766               glsl_get_cl_type_size_align);
767    if (nir->constant_data_size > 0) {
768       assert(nir->constant_data == NULL);
769       nir->constant_data = rzalloc_size(nir, nir->constant_data_size);
770       nir_gather_explicit_io_initializers(nir, nir->constant_data,
771                                           nir->constant_data_size,
772                                           nir_var_mem_constant);
773    }
774 
775    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
776               nir_address_format_64bit_global);
777 
778    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform,
779               nir_address_format_32bit_offset_as_64bit);
780 
781    NIR_PASS_V(nir, nir_lower_explicit_io,
782               nir_var_shader_temp | nir_var_function_temp |
783               nir_var_mem_shared | nir_var_mem_global,
784               nir_address_format_62bit_generic);
785 
786    if (INTEL_DEBUG(DEBUG_CS)) {
787       /* Re-index SSA defs so we print more sensible numbers. */
788       nir_foreach_function_impl(impl, nir) {
789          nir_index_ssa_defs(impl);
790       }
791 
792       fprintf(stderr, "NIR (before I/O lowering) for kernel\n");
793       nir_print_shader(nir, stderr);
794    }
795 
796    return nir;
797 }
798