1 /*
2 * Copyright © 2020 Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_kernel.h"
25 #include "brw_nir.h"
26 #include "elk/elk_nir_options.h"
27 #include "intel_nir.h"
28
29 #include "intel_nir.h"
30 #include "nir_clc_helpers.h"
31 #include "compiler/nir/nir_builder.h"
32 #include "compiler/spirv/nir_spirv.h"
33 #include "dev/intel_debug.h"
34 #include "util/u_atomic.h"
35 #include "util/u_dynarray.h"
36
37 static const nir_shader *
load_clc_shader(struct brw_compiler * compiler,struct disk_cache * disk_cache,const nir_shader_compiler_options * nir_options,const struct spirv_to_nir_options * spirv_options)38 load_clc_shader(struct brw_compiler *compiler, struct disk_cache *disk_cache,
39 const nir_shader_compiler_options *nir_options,
40 const struct spirv_to_nir_options *spirv_options)
41 {
42 if (compiler->clc_shader)
43 return compiler->clc_shader;
44
45 nir_shader *nir = nir_load_libclc_shader(64, disk_cache,
46 spirv_options, nir_options,
47 disk_cache != NULL);
48 if (nir == NULL)
49 return NULL;
50
51 const nir_shader *old_nir =
52 p_atomic_cmpxchg(&compiler->clc_shader, NULL, nir);
53 if (old_nir == NULL) {
54 /* We won the race */
55 ralloc_steal(compiler, nir);
56 return nir;
57 } else {
58 /* Someone else built the shader first */
59 ralloc_free(nir);
60 return old_nir;
61 }
62 }
63
64 static nir_builder
builder_init_new_impl(nir_function * func)65 builder_init_new_impl(nir_function *func)
66 {
67 nir_function_impl *impl = nir_function_impl_create(func);
68 return nir_builder_at(nir_before_impl(impl));
69 }
70
71 static void
implement_atomic_builtin(nir_function * func,nir_atomic_op atomic_op,enum glsl_base_type data_base_type,nir_variable_mode mode)72 implement_atomic_builtin(nir_function *func, nir_atomic_op atomic_op,
73 enum glsl_base_type data_base_type,
74 nir_variable_mode mode)
75 {
76 nir_builder b = builder_init_new_impl(func);
77 const struct glsl_type *data_type = glsl_scalar_type(data_base_type);
78
79 unsigned p = 0;
80
81 nir_deref_instr *ret = NULL;
82 ret = nir_build_deref_cast(&b, nir_load_param(&b, p++),
83 nir_var_function_temp, data_type, 0);
84
85 nir_intrinsic_op op = nir_intrinsic_deref_atomic;
86 nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b.shader, op);
87 nir_intrinsic_set_atomic_op(atomic, atomic_op);
88
89 for (unsigned i = 0; i < nir_intrinsic_infos[op].num_srcs; i++) {
90 nir_def *src = nir_load_param(&b, p++);
91 if (i == 0) {
92 /* The first source is our deref */
93 assert(nir_intrinsic_infos[op].src_components[i] == -1);
94 src = &nir_build_deref_cast(&b, src, mode, data_type, 0)->def;
95 }
96 atomic->src[i] = nir_src_for_ssa(src);
97 }
98
99 nir_def_init_for_type(&atomic->instr, &atomic->def, data_type);
100
101 nir_builder_instr_insert(&b, &atomic->instr);
102 nir_store_deref(&b, ret, &atomic->def, ~0);
103 }
104
105 static void
implement_sub_group_ballot_builtin(nir_function * func)106 implement_sub_group_ballot_builtin(nir_function *func)
107 {
108 nir_builder b = builder_init_new_impl(func);
109 nir_deref_instr *ret =
110 nir_build_deref_cast(&b, nir_load_param(&b, 0),
111 nir_var_function_temp, glsl_uint_type(), 0);
112 nir_def *cond = nir_load_param(&b, 1);
113
114 nir_intrinsic_instr *ballot =
115 nir_intrinsic_instr_create(b.shader, nir_intrinsic_ballot);
116 ballot->src[0] = nir_src_for_ssa(cond);
117 ballot->num_components = 1;
118 nir_def_init(&ballot->instr, &ballot->def, 1, 32);
119 nir_builder_instr_insert(&b, &ballot->instr);
120
121 nir_store_deref(&b, ret, &ballot->def, ~0);
122 }
123
124 static bool
implement_intel_builtins(nir_shader * nir)125 implement_intel_builtins(nir_shader *nir)
126 {
127 bool progress = false;
128
129 nir_foreach_function(func, nir) {
130 if (strcmp(func->name, "_Z10atomic_minPU3AS1Vff") == 0) {
131 /* float atom_min(__global float volatile *p, float val) */
132 implement_atomic_builtin(func, nir_atomic_op_fmin,
133 GLSL_TYPE_FLOAT, nir_var_mem_global);
134 progress = true;
135 } else if (strcmp(func->name, "_Z10atomic_maxPU3AS1Vff") == 0) {
136 /* float atom_max(__global float volatile *p, float val) */
137 implement_atomic_builtin(func, nir_atomic_op_fmax,
138 GLSL_TYPE_FLOAT, nir_var_mem_global);
139 progress = true;
140 } else if (strcmp(func->name, "_Z10atomic_minPU3AS3Vff") == 0) {
141 /* float atomic_min(__shared float volatile *, float) */
142 implement_atomic_builtin(func, nir_atomic_op_fmin,
143 GLSL_TYPE_FLOAT, nir_var_mem_shared);
144 progress = true;
145 } else if (strcmp(func->name, "_Z10atomic_maxPU3AS3Vff") == 0) {
146 /* float atomic_max(__shared float volatile *, float) */
147 implement_atomic_builtin(func, nir_atomic_op_fmax,
148 GLSL_TYPE_FLOAT, nir_var_mem_shared);
149 progress = true;
150 } else if (strcmp(func->name, "intel_sub_group_ballot") == 0) {
151 implement_sub_group_ballot_builtin(func);
152 progress = true;
153 }
154 }
155
156 nir_shader_preserve_all_metadata(nir);
157
158 return progress;
159 }
160
161 static bool
lower_kernel_intrinsics(nir_shader * nir)162 lower_kernel_intrinsics(nir_shader *nir)
163 {
164 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
165
166 bool progress = false;
167
168 unsigned kernel_sysvals_start = 0;
169 unsigned kernel_arg_start = sizeof(struct brw_kernel_sysvals);
170 nir->num_uniforms += kernel_arg_start;
171
172 nir_builder b = nir_builder_create(impl);
173
174 nir_foreach_block(block, impl) {
175 nir_foreach_instr_safe(instr, block) {
176 if (instr->type != nir_instr_type_intrinsic)
177 continue;
178
179 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
180 switch (intrin->intrinsic) {
181 case nir_intrinsic_load_kernel_input: {
182 b.cursor = nir_instr_remove(&intrin->instr);
183
184 nir_intrinsic_instr *load =
185 nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
186 load->num_components = intrin->num_components;
187 load->src[0] = nir_src_for_ssa(nir_u2u32(&b, intrin->src[0].ssa));
188 nir_intrinsic_set_base(load, kernel_arg_start);
189 nir_intrinsic_set_range(load, nir->num_uniforms);
190 nir_def_init(&load->instr, &load->def,
191 intrin->def.num_components,
192 intrin->def.bit_size);
193 nir_builder_instr_insert(&b, &load->instr);
194
195 nir_def_rewrite_uses(&intrin->def, &load->def);
196 progress = true;
197 break;
198 }
199
200 case nir_intrinsic_load_constant_base_ptr: {
201 b.cursor = nir_instr_remove(&intrin->instr);
202 nir_def *const_data_base_addr = nir_pack_64_2x32_split(&b,
203 nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW),
204 nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
205 nir_def_rewrite_uses(&intrin->def, const_data_base_addr);
206 progress = true;
207 break;
208 }
209
210 case nir_intrinsic_load_num_workgroups: {
211 b.cursor = nir_instr_remove(&intrin->instr);
212
213 nir_intrinsic_instr *load =
214 nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
215 load->num_components = 3;
216 load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
217 nir_intrinsic_set_base(load, kernel_sysvals_start +
218 offsetof(struct brw_kernel_sysvals, num_work_groups));
219 nir_intrinsic_set_range(load, 3 * 4);
220 nir_def_init(&load->instr, &load->def, 3, 32);
221 nir_builder_instr_insert(&b, &load->instr);
222 nir_def_rewrite_uses(&intrin->def, &load->def);
223 progress = true;
224 break;
225 }
226
227 default:
228 break;
229 }
230 }
231 }
232
233 if (progress) {
234 nir_metadata_preserve(impl, nir_metadata_block_index |
235 nir_metadata_dominance);
236 } else {
237 nir_metadata_preserve(impl, nir_metadata_all);
238 }
239
240 return progress;
241 }
242
243 bool
brw_kernel_from_spirv(struct brw_compiler * compiler,struct disk_cache * disk_cache,struct brw_kernel * kernel,void * log_data,void * mem_ctx,const uint32_t * spirv,size_t spirv_size,const char * entrypoint_name,char ** error_str)244 brw_kernel_from_spirv(struct brw_compiler *compiler,
245 struct disk_cache *disk_cache,
246 struct brw_kernel *kernel,
247 void *log_data, void *mem_ctx,
248 const uint32_t *spirv, size_t spirv_size,
249 const char *entrypoint_name,
250 char **error_str)
251 {
252 const struct intel_device_info *devinfo = compiler->devinfo;
253 const nir_shader_compiler_options *nir_options =
254 compiler->nir_options[MESA_SHADER_KERNEL];
255
256 struct spirv_to_nir_options spirv_options = {
257 .environment = NIR_SPIRV_OPENCL,
258 .caps = {
259 .address = true,
260 .float16 = true,
261 .float64 = true,
262 .groups = true,
263 .image_write_without_format = true,
264 .int8 = true,
265 .int16 = true,
266 .int64 = true,
267 .int64_atomics = true,
268 .kernel = true,
269 .linkage = true, /* We receive linked kernel from clc */
270 .float_controls = true,
271 .generic_pointers = true,
272 .storage_8bit = true,
273 .storage_16bit = true,
274 .subgroup_arithmetic = true,
275 .subgroup_basic = true,
276 .subgroup_ballot = true,
277 .subgroup_dispatch = true,
278 .subgroup_quad = true,
279 .subgroup_shuffle = true,
280 .subgroup_vote = true,
281
282 .intel_subgroup_shuffle = true,
283 .intel_subgroup_buffer_block_io = true,
284 },
285 .shared_addr_format = nir_address_format_62bit_generic,
286 .global_addr_format = nir_address_format_62bit_generic,
287 .temp_addr_format = nir_address_format_62bit_generic,
288 .constant_addr_format = nir_address_format_64bit_global,
289 };
290
291 spirv_options.clc_shader = load_clc_shader(compiler, disk_cache,
292 nir_options, &spirv_options);
293 if (spirv_options.clc_shader == NULL) {
294 fprintf(stderr, "ERROR: libclc shader missing."
295 " Consider installing the libclc package\n");
296 abort();
297 }
298
299 assert(spirv_size % 4 == 0);
300 nir_shader *nir =
301 spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
302 entrypoint_name, &spirv_options, nir_options);
303 nir_validate_shader(nir, "after spirv_to_nir");
304 nir_validate_ssa_dominance(nir, "after spirv_to_nir");
305 ralloc_steal(mem_ctx, nir);
306 nir->info.name = ralloc_strdup(nir, entrypoint_name);
307
308 if (INTEL_DEBUG(DEBUG_CS)) {
309 /* Re-index SSA defs so we print more sensible numbers. */
310 nir_foreach_function_impl(impl, nir) {
311 nir_index_ssa_defs(impl);
312 }
313
314 fprintf(stderr, "NIR (from SPIR-V) for kernel\n");
315 nir_print_shader(nir, stderr);
316 }
317
318 NIR_PASS_V(nir, implement_intel_builtins);
319 NIR_PASS_V(nir, nir_link_shader_functions, spirv_options.clc_shader);
320
321 /* We have to lower away local constant initializers right before we
322 * inline functions. That way they get properly initialized at the top
323 * of the function and not at the top of its caller.
324 */
325 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
326 NIR_PASS_V(nir, nir_lower_returns);
327 NIR_PASS_V(nir, nir_inline_functions);
328 NIR_PASS_V(nir, nir_copy_prop);
329 NIR_PASS_V(nir, nir_opt_deref);
330
331 /* Pick off the single entrypoint that we want */
332 nir_remove_non_entrypoints(nir);
333
334 /* Now that we've deleted all but the main function, we can go ahead and
335 * lower the rest of the constant initializers. We do this here so that
336 * nir_remove_dead_variables and split_per_member_structs below see the
337 * corresponding stores.
338 */
339 NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
340
341 /* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B
342 * aligned and so it can just read/write them as vec4s. This results in a
343 * LOT of vec4->vec3 casts on loads and stores. One solution to this
344 * problem is to get rid of all vec3 variables.
345 */
346 NIR_PASS_V(nir, nir_lower_vec3_to_vec4,
347 nir_var_shader_temp | nir_var_function_temp |
348 nir_var_mem_shared | nir_var_mem_global|
349 nir_var_mem_constant);
350
351 /* We assign explicit types early so that the optimizer can take advantage
352 * of that information and hopefully get rid of some of our memcpys.
353 */
354 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
355 nir_var_uniform |
356 nir_var_shader_temp | nir_var_function_temp |
357 nir_var_mem_shared | nir_var_mem_global,
358 glsl_get_cl_type_size_align);
359
360 struct brw_nir_compiler_opts opts = {};
361 brw_preprocess_nir(compiler, nir, &opts);
362
363 int max_arg_idx = -1;
364 nir_foreach_uniform_variable(var, nir) {
365 assert(var->data.location < 256);
366 max_arg_idx = MAX2(max_arg_idx, var->data.location);
367 }
368
369 kernel->args_size = nir->num_uniforms;
370 kernel->arg_count = max_arg_idx + 1;
371
372 /* No bindings */
373 struct brw_kernel_arg_desc *args =
374 rzalloc_array(mem_ctx, struct brw_kernel_arg_desc, kernel->arg_count);
375 kernel->args = args;
376
377 nir_foreach_uniform_variable(var, nir) {
378 struct brw_kernel_arg_desc arg_desc = {
379 .offset = var->data.driver_location,
380 .size = glsl_get_explicit_size(var->type, false),
381 };
382 assert(arg_desc.offset + arg_desc.size <= nir->num_uniforms);
383
384 assert(var->data.location >= 0);
385 args[var->data.location] = arg_desc;
386 }
387
388 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_all, NULL);
389
390 /* Lower again, this time after dead-variables to get more compact variable
391 * layouts.
392 */
393 nir->global_mem_size = 0;
394 nir->scratch_size = 0;
395 nir->info.shared_size = 0;
396 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
397 nir_var_shader_temp | nir_var_function_temp |
398 nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant,
399 glsl_get_cl_type_size_align);
400 if (nir->constant_data_size > 0) {
401 assert(nir->constant_data == NULL);
402 nir->constant_data = rzalloc_size(nir, nir->constant_data_size);
403 nir_gather_explicit_io_initializers(nir, nir->constant_data,
404 nir->constant_data_size,
405 nir_var_mem_constant);
406 }
407
408 if (INTEL_DEBUG(DEBUG_CS)) {
409 /* Re-index SSA defs so we print more sensible numbers. */
410 nir_foreach_function_impl(impl, nir) {
411 nir_index_ssa_defs(impl);
412 }
413
414 fprintf(stderr, "NIR (before I/O lowering) for kernel\n");
415 nir_print_shader(nir, stderr);
416 }
417
418 NIR_PASS_V(nir, nir_lower_memcpy);
419
420 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
421 nir_address_format_64bit_global);
422
423 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform,
424 nir_address_format_32bit_offset_as_64bit);
425
426 NIR_PASS_V(nir, nir_lower_explicit_io,
427 nir_var_shader_temp | nir_var_function_temp |
428 nir_var_mem_shared | nir_var_mem_global,
429 nir_address_format_62bit_generic);
430
431 NIR_PASS_V(nir, nir_lower_convert_alu_types, NULL);
432
433 NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, devinfo, NULL);
434 NIR_PASS_V(nir, lower_kernel_intrinsics);
435
436 struct brw_cs_prog_key key = { };
437
438 memset(&kernel->prog_data, 0, sizeof(kernel->prog_data));
439 kernel->prog_data.base.nr_params = DIV_ROUND_UP(nir->num_uniforms, 4);
440
441 struct brw_compile_cs_params params = {
442 .base = {
443 .nir = nir,
444 .stats = kernel->stats,
445 .log_data = log_data,
446 .mem_ctx = mem_ctx,
447 },
448 .key = &key,
449 .prog_data = &kernel->prog_data,
450 };
451
452 kernel->code = brw_compile_cs(compiler, ¶ms);
453
454 if (error_str)
455 *error_str = params.base.error_str;
456
457 return kernel->code != NULL;
458 }
459
460 static nir_def *
rebuild_value_from_store(struct util_dynarray * stores,nir_def * value,unsigned read_offset)461 rebuild_value_from_store(struct util_dynarray *stores,
462 nir_def *value, unsigned read_offset)
463 {
464 unsigned read_size = value->num_components * value->bit_size / 8;
465
466 util_dynarray_foreach(stores, nir_intrinsic_instr *, _store) {
467 nir_intrinsic_instr *store = *_store;
468
469 unsigned write_offset = nir_src_as_uint(store->src[1]);
470 unsigned write_size = nir_src_num_components(store->src[0]) *
471 nir_src_bit_size(store->src[0]) / 8;
472 if (write_offset <= read_offset &&
473 (write_offset + write_size) >= (read_offset + read_size)) {
474 assert(nir_block_dominates(store->instr.block, value->parent_instr->block));
475 assert(write_size == read_size);
476 return store->src[0].ssa;
477 }
478 }
479 unreachable("Matching scratch store not found");
480 }
481
482 /**
483 * Remove temporary variables stored to scratch to be then reloaded
484 * immediately. Remap the load to the store SSA value.
485 *
486 * This workaround is only meant to be applied to shaders in src/intel/shaders
487 * were we know there should be no issue. More complex cases might not work
488 * with this approach.
489 */
490 static bool
nir_remove_llvm17_scratch(nir_shader * nir)491 nir_remove_llvm17_scratch(nir_shader *nir)
492 {
493 struct util_dynarray scratch_stores;
494 void *mem_ctx = ralloc_context(NULL);
495
496 util_dynarray_init(&scratch_stores, mem_ctx);
497
498 nir_foreach_function_impl(func, nir) {
499 nir_foreach_block(block, func) {
500 nir_foreach_instr(instr, block) {
501 if (instr->type != nir_instr_type_intrinsic)
502 continue;
503
504 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
505
506 if (intrin->intrinsic != nir_intrinsic_store_scratch)
507 continue;
508
509 nir_const_value *offset = nir_src_as_const_value(intrin->src[1]);
510 if (offset != NULL) {
511 util_dynarray_append(&scratch_stores, nir_intrinsic_instr *, intrin);
512 }
513 }
514 }
515 }
516
517 bool progress = false;
518 if (util_dynarray_num_elements(&scratch_stores, nir_intrinsic_instr *) > 0) {
519 nir_foreach_function_impl(func, nir) {
520 nir_foreach_block(block, func) {
521 nir_foreach_instr_safe(instr, block) {
522 if (instr->type != nir_instr_type_intrinsic)
523 continue;
524
525 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
526
527 if (intrin->intrinsic != nir_intrinsic_load_scratch)
528 continue;
529
530 nir_const_value *offset = nir_src_as_const_value(intrin->src[0]);
531 if (offset == NULL)
532 continue;
533
534 nir_def_rewrite_uses(&intrin->def,
535 rebuild_value_from_store(
536 &scratch_stores, &intrin->def,
537 nir_src_as_uint(intrin->src[0])));
538 nir_instr_remove(instr);
539
540 progress = true;
541 }
542 }
543 }
544 }
545
546 util_dynarray_foreach(&scratch_stores, nir_intrinsic_instr *, _store) {
547 nir_intrinsic_instr *store = *_store;
548 nir_instr_remove(&store->instr);
549 }
550
551 /* Quick sanity check */
552 assert(util_dynarray_num_elements(&scratch_stores, nir_intrinsic_instr *) == 0 ||
553 progress);
554
555 ralloc_free(mem_ctx);
556
557 return progress;
558 }
559
560 static void
cleanup_llvm17_scratch(nir_shader * nir)561 cleanup_llvm17_scratch(nir_shader *nir)
562 {
563 {
564 bool progress;
565 do {
566 progress = false;
567 NIR_PASS(progress, nir, nir_copy_prop);
568 NIR_PASS(progress, nir, nir_opt_dce);
569 NIR_PASS(progress, nir, nir_opt_constant_folding);
570 NIR_PASS(progress, nir, nir_opt_cse);
571 NIR_PASS(progress, nir, nir_opt_algebraic);
572 } while (progress);
573 }
574
575 nir_remove_llvm17_scratch(nir);
576
577 {
578 bool progress;
579 do {
580 progress = false;
581 NIR_PASS(progress, nir, nir_copy_prop);
582 NIR_PASS(progress, nir, nir_opt_dce);
583 NIR_PASS(progress, nir, nir_opt_constant_folding);
584 NIR_PASS(progress, nir, nir_opt_cse);
585 NIR_PASS(progress, nir, nir_opt_algebraic);
586 } while (progress);
587 }
588 }
589
590 nir_shader *
brw_nir_from_spirv(void * mem_ctx,unsigned gfx_version,const uint32_t * spirv,size_t spirv_size,bool llvm17_wa)591 brw_nir_from_spirv(void *mem_ctx, unsigned gfx_version, const uint32_t *spirv,
592 size_t spirv_size, bool llvm17_wa)
593 {
594 struct spirv_to_nir_options spirv_options = {
595 .environment = NIR_SPIRV_OPENCL,
596 .caps = {
597 .address = true,
598 .groups = true,
599 .image_write_without_format = true,
600 .int8 = true,
601 .int16 = true,
602 .int64 = true,
603 .int64_atomics = true,
604 .kernel = true,
605 .linkage = true, /* We receive linked kernel from clc */
606 .float_controls = true,
607 .generic_pointers = true,
608 .storage_8bit = true,
609 .storage_16bit = true,
610 .subgroup_arithmetic = true,
611 .subgroup_basic = true,
612 .subgroup_ballot = true,
613 .subgroup_dispatch = true,
614 .subgroup_quad = true,
615 .subgroup_shuffle = true,
616 .subgroup_vote = true,
617
618 .intel_subgroup_shuffle = true,
619 .intel_subgroup_buffer_block_io = true,
620 },
621 .shared_addr_format = nir_address_format_62bit_generic,
622 .global_addr_format = nir_address_format_62bit_generic,
623 .temp_addr_format = nir_address_format_62bit_generic,
624 .constant_addr_format = nir_address_format_64bit_global,
625 .create_library = true,
626 };
627
628 assert(spirv_size % 4 == 0);
629
630 assert(gfx_version);
631 const nir_shader_compiler_options *nir_options =
632 gfx_version >= 9 ? &brw_scalar_nir_options
633 : &elk_scalar_nir_options;
634
635 nir_shader *nir =
636 spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
637 "library", &spirv_options, nir_options);
638 nir_validate_shader(nir, "after spirv_to_nir");
639 nir_validate_ssa_dominance(nir, "after spirv_to_nir");
640 ralloc_steal(mem_ctx, nir);
641 nir->info.name = ralloc_strdup(nir, "library");
642
643 if (INTEL_DEBUG(DEBUG_CS)) {
644 /* Re-index SSA defs so we print more sensible numbers. */
645 nir_foreach_function_impl(impl, nir) {
646 nir_index_ssa_defs(impl);
647 }
648
649 fprintf(stderr, "NIR (from SPIR-V) for kernel\n");
650 nir_print_shader(nir, stderr);
651 }
652
653 NIR_PASS_V(nir, implement_intel_builtins);
654 NIR_PASS_V(nir, nir_link_shader_functions, spirv_options.clc_shader);
655
656 /* We have to lower away local constant initializers right before we
657 * inline functions. That way they get properly initialized at the top
658 * of the function and not at the top of its caller.
659 */
660 NIR_PASS_V(nir, nir_lower_variable_initializers, ~(nir_var_shader_temp |
661 nir_var_function_temp));
662 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo |
663 nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL);
664 {
665 bool progress;
666 do
667 {
668 progress = false;
669 NIR_PASS(progress, nir, nir_copy_prop);
670 NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
671 NIR_PASS(progress, nir, nir_opt_deref);
672 NIR_PASS(progress, nir, nir_opt_dce);
673 NIR_PASS(progress, nir, nir_opt_undef);
674 NIR_PASS(progress, nir, nir_opt_constant_folding);
675 NIR_PASS(progress, nir, nir_opt_cse);
676 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
677 NIR_PASS(progress, nir, nir_opt_algebraic);
678 } while (progress);
679 }
680
681 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
682 NIR_PASS_V(nir, nir_lower_returns);
683 NIR_PASS_V(nir, nir_inline_functions);
684
685 assert(nir->scratch_size == 0);
686 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align);
687
688 {
689 bool progress;
690 do
691 {
692 progress = false;
693 NIR_PASS(progress, nir, nir_copy_prop);
694 NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
695 NIR_PASS(progress, nir, nir_opt_deref);
696 NIR_PASS(progress, nir, nir_opt_dce);
697 NIR_PASS(progress, nir, nir_opt_undef);
698 NIR_PASS(progress, nir, nir_opt_constant_folding);
699 NIR_PASS(progress, nir, nir_opt_cse);
700 NIR_PASS(progress, nir, nir_split_var_copies);
701 NIR_PASS(progress, nir, nir_lower_var_copies);
702 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
703 NIR_PASS(progress, nir, nir_opt_algebraic);
704 NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
705 NIR_PASS(progress, nir, nir_opt_dead_cf);
706 NIR_PASS(progress, nir, nir_opt_remove_phis);
707 NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
708 NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform);
709 NIR_PASS(progress, nir, nir_opt_memcpy);
710 } while (progress);
711 }
712
713 NIR_PASS_V(nir, nir_scale_fdiv);
714
715 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo |
716 nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL);
717
718
719 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | nir_var_function_temp, NULL);
720
721 nir->scratch_size = 0;
722 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
723 nir_var_mem_shared | nir_var_function_temp | nir_var_shader_temp |
724 nir_var_mem_global | nir_var_mem_constant,
725 glsl_get_cl_type_size_align);
726
727 // Lower memcpy - needs to wait until types are sized
728 {
729 bool progress;
730 do {
731 progress = false;
732 NIR_PASS(progress, nir, nir_opt_memcpy);
733 NIR_PASS(progress, nir, nir_copy_prop);
734 NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
735 NIR_PASS(progress, nir, nir_opt_deref);
736 NIR_PASS(progress, nir, nir_opt_dce);
737 NIR_PASS(progress, nir, nir_split_var_copies);
738 NIR_PASS(progress, nir, nir_lower_var_copies);
739 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
740 NIR_PASS(progress, nir, nir_opt_constant_folding);
741 NIR_PASS(progress, nir, nir_opt_cse);
742 } while (progress);
743 }
744 NIR_PASS_V(nir, nir_lower_memcpy);
745
746 NIR_PASS_V(nir, nir_lower_explicit_io,
747 nir_var_mem_shared | nir_var_function_temp | nir_var_shader_temp | nir_var_uniform,
748 nir_address_format_32bit_offset_as_64bit);
749
750 NIR_PASS_V(nir, nir_lower_system_values);
751
752 /* Hopefully we can drop this once lower_vars_to_ssa has improved to not
753 * lower everything to scratch.
754 */
755 if (llvm17_wa)
756 cleanup_llvm17_scratch(nir);
757
758 /* Lower again, this time after dead-variables to get more compact variable
759 * layouts.
760 */
761 nir->global_mem_size = 0;
762 nir->scratch_size = 0;
763 nir->info.shared_size = 0;
764 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
765 nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant,
766 glsl_get_cl_type_size_align);
767 if (nir->constant_data_size > 0) {
768 assert(nir->constant_data == NULL);
769 nir->constant_data = rzalloc_size(nir, nir->constant_data_size);
770 nir_gather_explicit_io_initializers(nir, nir->constant_data,
771 nir->constant_data_size,
772 nir_var_mem_constant);
773 }
774
775 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
776 nir_address_format_64bit_global);
777
778 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform,
779 nir_address_format_32bit_offset_as_64bit);
780
781 NIR_PASS_V(nir, nir_lower_explicit_io,
782 nir_var_shader_temp | nir_var_function_temp |
783 nir_var_mem_shared | nir_var_mem_global,
784 nir_address_format_62bit_generic);
785
786 if (INTEL_DEBUG(DEBUG_CS)) {
787 /* Re-index SSA defs so we print more sensible numbers. */
788 nir_foreach_function_impl(impl, nir) {
789 nir_index_ssa_defs(impl);
790 }
791
792 fprintf(stderr, "NIR (before I/O lowering) for kernel\n");
793 nir_print_shader(nir, stderr);
794 }
795
796 return nir;
797 }
798