1 /*
2 * Copyright © 2020 Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_kernel.h"
25 #include "brw_nir.h"
26 #include "elk/elk_nir_options.h"
27 #include "intel_nir.h"
28
29 #include "intel_nir.h"
30 #include "nir_clc_helpers.h"
31 #include "compiler/nir/nir_builder.h"
32 #include "compiler/spirv/nir_spirv.h"
33 #include "compiler/spirv/spirv_info.h"
34 #include "dev/intel_debug.h"
35 #include "util/u_atomic.h"
36 #include "util/u_dynarray.h"
37
38 static const nir_shader *
load_clc_shader(struct brw_compiler * compiler,struct disk_cache * disk_cache,const nir_shader_compiler_options * nir_options,const struct spirv_to_nir_options * spirv_options)39 load_clc_shader(struct brw_compiler *compiler, struct disk_cache *disk_cache,
40 const nir_shader_compiler_options *nir_options,
41 const struct spirv_to_nir_options *spirv_options)
42 {
43 if (compiler->clc_shader)
44 return compiler->clc_shader;
45
46 nir_shader *nir = nir_load_libclc_shader(64, disk_cache,
47 spirv_options, nir_options,
48 disk_cache != NULL);
49 if (nir == NULL)
50 return NULL;
51
52 const nir_shader *old_nir =
53 p_atomic_cmpxchg(&compiler->clc_shader, NULL, nir);
54 if (old_nir == NULL) {
55 /* We won the race */
56 ralloc_steal(compiler, nir);
57 return nir;
58 } else {
59 /* Someone else built the shader first */
60 ralloc_free(nir);
61 return old_nir;
62 }
63 }
64
65 static nir_builder
builder_init_new_impl(nir_function * func)66 builder_init_new_impl(nir_function *func)
67 {
68 nir_function_impl *impl = nir_function_impl_create(func);
69 return nir_builder_at(nir_before_impl(impl));
70 }
71
72 static void
implement_atomic_builtin(nir_function * func,nir_atomic_op atomic_op,enum glsl_base_type data_base_type,nir_variable_mode mode)73 implement_atomic_builtin(nir_function *func, nir_atomic_op atomic_op,
74 enum glsl_base_type data_base_type,
75 nir_variable_mode mode)
76 {
77 nir_builder b = builder_init_new_impl(func);
78 const struct glsl_type *data_type = glsl_scalar_type(data_base_type);
79
80 unsigned p = 0;
81
82 nir_deref_instr *ret = NULL;
83 ret = nir_build_deref_cast(&b, nir_load_param(&b, p++),
84 nir_var_function_temp, data_type, 0);
85
86 nir_intrinsic_op op = nir_intrinsic_deref_atomic;
87 nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b.shader, op);
88 nir_intrinsic_set_atomic_op(atomic, atomic_op);
89
90 for (unsigned i = 0; i < nir_intrinsic_infos[op].num_srcs; i++) {
91 nir_def *src = nir_load_param(&b, p++);
92 if (i == 0) {
93 /* The first source is our deref */
94 assert(nir_intrinsic_infos[op].src_components[i] == -1);
95 src = &nir_build_deref_cast(&b, src, mode, data_type, 0)->def;
96 }
97 atomic->src[i] = nir_src_for_ssa(src);
98 }
99
100 nir_def_init_for_type(&atomic->instr, &atomic->def, data_type);
101
102 nir_builder_instr_insert(&b, &atomic->instr);
103 nir_store_deref(&b, ret, &atomic->def, ~0);
104 }
105
106 static void
implement_sub_group_ballot_builtin(nir_function * func)107 implement_sub_group_ballot_builtin(nir_function *func)
108 {
109 nir_builder b = builder_init_new_impl(func);
110 nir_deref_instr *ret =
111 nir_build_deref_cast(&b, nir_load_param(&b, 0),
112 nir_var_function_temp, glsl_uint_type(), 0);
113 nir_def *cond = nir_load_param(&b, 1);
114
115 nir_intrinsic_instr *ballot =
116 nir_intrinsic_instr_create(b.shader, nir_intrinsic_ballot);
117 ballot->src[0] = nir_src_for_ssa(cond);
118 ballot->num_components = 1;
119 nir_def_init(&ballot->instr, &ballot->def, 1, 32);
120 nir_builder_instr_insert(&b, &ballot->instr);
121
122 nir_store_deref(&b, ret, &ballot->def, ~0);
123 }
124
125 static bool
implement_intel_builtins(nir_shader * nir)126 implement_intel_builtins(nir_shader *nir)
127 {
128 bool progress = false;
129
130 nir_foreach_function(func, nir) {
131 if (strcmp(func->name, "_Z10atomic_minPU3AS1Vff") == 0) {
132 /* float atom_min(__global float volatile *p, float val) */
133 implement_atomic_builtin(func, nir_atomic_op_fmin,
134 GLSL_TYPE_FLOAT, nir_var_mem_global);
135 progress = true;
136 } else if (strcmp(func->name, "_Z10atomic_maxPU3AS1Vff") == 0) {
137 /* float atom_max(__global float volatile *p, float val) */
138 implement_atomic_builtin(func, nir_atomic_op_fmax,
139 GLSL_TYPE_FLOAT, nir_var_mem_global);
140 progress = true;
141 } else if (strcmp(func->name, "_Z10atomic_minPU3AS3Vff") == 0) {
142 /* float atomic_min(__shared float volatile *, float) */
143 implement_atomic_builtin(func, nir_atomic_op_fmin,
144 GLSL_TYPE_FLOAT, nir_var_mem_shared);
145 progress = true;
146 } else if (strcmp(func->name, "_Z10atomic_maxPU3AS3Vff") == 0) {
147 /* float atomic_max(__shared float volatile *, float) */
148 implement_atomic_builtin(func, nir_atomic_op_fmax,
149 GLSL_TYPE_FLOAT, nir_var_mem_shared);
150 progress = true;
151 } else if (strcmp(func->name, "intel_sub_group_ballot") == 0) {
152 implement_sub_group_ballot_builtin(func);
153 progress = true;
154 }
155 }
156
157 nir_shader_preserve_all_metadata(nir);
158
159 return progress;
160 }
161
162 static bool
lower_kernel_intrinsics(nir_shader * nir)163 lower_kernel_intrinsics(nir_shader *nir)
164 {
165 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
166
167 bool progress = false;
168
169 unsigned kernel_sysvals_start = 0;
170 unsigned kernel_arg_start = sizeof(struct brw_kernel_sysvals);
171 nir->num_uniforms += kernel_arg_start;
172
173 nir_builder b = nir_builder_create(impl);
174
175 nir_foreach_block(block, impl) {
176 nir_foreach_instr_safe(instr, block) {
177 if (instr->type != nir_instr_type_intrinsic)
178 continue;
179
180 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
181 switch (intrin->intrinsic) {
182 case nir_intrinsic_load_kernel_input: {
183 b.cursor = nir_instr_remove(&intrin->instr);
184
185 nir_intrinsic_instr *load =
186 nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
187 load->num_components = intrin->num_components;
188 load->src[0] = nir_src_for_ssa(nir_u2u32(&b, intrin->src[0].ssa));
189 nir_intrinsic_set_base(load, kernel_arg_start);
190 nir_intrinsic_set_range(load, nir->num_uniforms);
191 nir_def_init(&load->instr, &load->def,
192 intrin->def.num_components,
193 intrin->def.bit_size);
194 nir_builder_instr_insert(&b, &load->instr);
195
196 nir_def_rewrite_uses(&intrin->def, &load->def);
197 progress = true;
198 break;
199 }
200
201 case nir_intrinsic_load_constant_base_ptr: {
202 b.cursor = nir_instr_remove(&intrin->instr);
203 nir_def *const_data_base_addr = nir_pack_64_2x32_split(&b,
204 nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW),
205 nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
206 nir_def_rewrite_uses(&intrin->def, const_data_base_addr);
207 progress = true;
208 break;
209 }
210
211 case nir_intrinsic_load_num_workgroups: {
212 b.cursor = nir_instr_remove(&intrin->instr);
213
214 nir_intrinsic_instr *load =
215 nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
216 load->num_components = 3;
217 load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
218 nir_intrinsic_set_base(load, kernel_sysvals_start +
219 offsetof(struct brw_kernel_sysvals, num_work_groups));
220 nir_intrinsic_set_range(load, 3 * 4);
221 nir_def_init(&load->instr, &load->def, 3, 32);
222 nir_builder_instr_insert(&b, &load->instr);
223 nir_def_rewrite_uses(&intrin->def, &load->def);
224 progress = true;
225 break;
226 }
227
228 default:
229 break;
230 }
231 }
232 }
233
234 if (progress) {
235 nir_metadata_preserve(impl, nir_metadata_control_flow);
236 } else {
237 nir_metadata_preserve(impl, nir_metadata_all);
238 }
239
240 return progress;
241 }
242
243 static const struct spirv_capabilities spirv_caps = {
244 .Addresses = true,
245 .Float16 = true,
246 .Float64 = true,
247 .Groups = true,
248 .StorageImageWriteWithoutFormat = true,
249 .Int8 = true,
250 .Int16 = true,
251 .Int64 = true,
252 .Int64Atomics = true,
253 .Kernel = true,
254 .Linkage = true, /* We receive linked kernel from clc */
255 .DenormFlushToZero = true,
256 .DenormPreserve = true,
257 .SignedZeroInfNanPreserve = true,
258 .RoundingModeRTE = true,
259 .RoundingModeRTZ = true,
260 .GenericPointer = true,
261 .GroupNonUniform = true,
262 .GroupNonUniformArithmetic = true,
263 .GroupNonUniformClustered = true,
264 .GroupNonUniformBallot = true,
265 .GroupNonUniformQuad = true,
266 .GroupNonUniformShuffle = true,
267 .GroupNonUniformVote = true,
268 .SubgroupDispatch = true,
269
270 .SubgroupShuffleINTEL = true,
271 .SubgroupBufferBlockIOINTEL = true,
272 };
273
274 bool
brw_kernel_from_spirv(struct brw_compiler * compiler,struct disk_cache * disk_cache,struct brw_kernel * kernel,void * log_data,void * mem_ctx,const uint32_t * spirv,size_t spirv_size,const char * entrypoint_name,char ** error_str)275 brw_kernel_from_spirv(struct brw_compiler *compiler,
276 struct disk_cache *disk_cache,
277 struct brw_kernel *kernel,
278 void *log_data, void *mem_ctx,
279 const uint32_t *spirv, size_t spirv_size,
280 const char *entrypoint_name,
281 char **error_str)
282 {
283 const struct intel_device_info *devinfo = compiler->devinfo;
284 const nir_shader_compiler_options *nir_options =
285 compiler->nir_options[MESA_SHADER_KERNEL];
286
287 struct spirv_to_nir_options spirv_options = {
288 .environment = NIR_SPIRV_OPENCL,
289 .capabilities = &spirv_caps,
290 .printf = true,
291 .shared_addr_format = nir_address_format_62bit_generic,
292 .global_addr_format = nir_address_format_62bit_generic,
293 .temp_addr_format = nir_address_format_62bit_generic,
294 .constant_addr_format = nir_address_format_64bit_global,
295 };
296
297 spirv_options.clc_shader = load_clc_shader(compiler, disk_cache,
298 nir_options, &spirv_options);
299 if (spirv_options.clc_shader == NULL) {
300 fprintf(stderr, "ERROR: libclc shader missing."
301 " Consider installing the libclc package\n");
302 abort();
303 }
304
305 assert(spirv_size % 4 == 0);
306 nir_shader *nir =
307 spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
308 entrypoint_name, &spirv_options, nir_options);
309 nir_validate_shader(nir, "after spirv_to_nir");
310 nir_validate_ssa_dominance(nir, "after spirv_to_nir");
311 ralloc_steal(mem_ctx, nir);
312 nir->info.name = ralloc_strdup(nir, entrypoint_name);
313
314 if (INTEL_DEBUG(DEBUG_CS)) {
315 /* Re-index SSA defs so we print more sensible numbers. */
316 nir_foreach_function_impl(impl, nir) {
317 nir_index_ssa_defs(impl);
318 }
319
320 fprintf(stderr, "NIR (from SPIR-V) for kernel\n");
321 nir_print_shader(nir, stderr);
322 }
323
324 nir_lower_printf_options printf_opts = {
325 .ptr_bit_size = 64,
326 .max_buffer_size = 1024 * 1024,
327 .use_printf_base_identifier = true,
328 };
329 NIR_PASS_V(nir, nir_lower_printf, &printf_opts);
330
331 NIR_PASS_V(nir, implement_intel_builtins);
332 NIR_PASS_V(nir, nir_link_shader_functions, spirv_options.clc_shader);
333
334 /* We have to lower away local constant initializers right before we
335 * inline functions. That way they get properly initialized at the top
336 * of the function and not at the top of its caller.
337 */
338 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
339 NIR_PASS_V(nir, nir_lower_returns);
340 NIR_PASS_V(nir, nir_inline_functions);
341 NIR_PASS_V(nir, nir_copy_prop);
342 NIR_PASS_V(nir, nir_opt_deref);
343
344 /* Pick off the single entrypoint that we want */
345 nir_remove_non_entrypoints(nir);
346
347 /* Now that we've deleted all but the main function, we can go ahead and
348 * lower the rest of the constant initializers. We do this here so that
349 * nir_remove_dead_variables and split_per_member_structs below see the
350 * corresponding stores.
351 */
352 NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
353
354 /* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B
355 * aligned and so it can just read/write them as vec4s. This results in a
356 * LOT of vec4->vec3 casts on loads and stores. One solution to this
357 * problem is to get rid of all vec3 variables.
358 */
359 NIR_PASS_V(nir, nir_lower_vec3_to_vec4,
360 nir_var_shader_temp | nir_var_function_temp |
361 nir_var_mem_shared | nir_var_mem_global|
362 nir_var_mem_constant);
363
364 /* We assign explicit types early so that the optimizer can take advantage
365 * of that information and hopefully get rid of some of our memcpys.
366 */
367 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
368 nir_var_uniform |
369 nir_var_shader_temp | nir_var_function_temp |
370 nir_var_mem_shared | nir_var_mem_global,
371 glsl_get_cl_type_size_align);
372
373 struct brw_nir_compiler_opts opts = {};
374 brw_preprocess_nir(compiler, nir, &opts);
375
376 int max_arg_idx = -1;
377 nir_foreach_uniform_variable(var, nir) {
378 assert(var->data.location < 256);
379 max_arg_idx = MAX2(max_arg_idx, var->data.location);
380 }
381
382 kernel->args_size = nir->num_uniforms;
383 kernel->arg_count = max_arg_idx + 1;
384
385 /* No bindings */
386 struct brw_kernel_arg_desc *args =
387 rzalloc_array(mem_ctx, struct brw_kernel_arg_desc, kernel->arg_count);
388 kernel->args = args;
389
390 nir_foreach_uniform_variable(var, nir) {
391 struct brw_kernel_arg_desc arg_desc = {
392 .offset = var->data.driver_location,
393 .size = glsl_get_explicit_size(var->type, false),
394 };
395 assert(arg_desc.offset + arg_desc.size <= nir->num_uniforms);
396
397 assert(var->data.location >= 0);
398 args[var->data.location] = arg_desc;
399 }
400
401 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_all, NULL);
402
403 /* Lower again, this time after dead-variables to get more compact variable
404 * layouts.
405 */
406 nir->global_mem_size = 0;
407 nir->scratch_size = 0;
408 nir->info.shared_size = 0;
409 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
410 nir_var_shader_temp | nir_var_function_temp |
411 nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant,
412 glsl_get_cl_type_size_align);
413 if (nir->constant_data_size > 0) {
414 assert(nir->constant_data == NULL);
415 nir->constant_data = rzalloc_size(nir, nir->constant_data_size);
416 nir_gather_explicit_io_initializers(nir, nir->constant_data,
417 nir->constant_data_size,
418 nir_var_mem_constant);
419 }
420
421 if (INTEL_DEBUG(DEBUG_CS)) {
422 /* Re-index SSA defs so we print more sensible numbers. */
423 nir_foreach_function_impl(impl, nir) {
424 nir_index_ssa_defs(impl);
425 }
426
427 fprintf(stderr, "NIR (before I/O lowering) for kernel\n");
428 nir_print_shader(nir, stderr);
429 }
430
431 NIR_PASS_V(nir, nir_lower_memcpy);
432
433 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
434 nir_address_format_64bit_global);
435
436 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform,
437 nir_address_format_32bit_offset_as_64bit);
438
439 NIR_PASS_V(nir, nir_lower_explicit_io,
440 nir_var_shader_temp | nir_var_function_temp |
441 nir_var_mem_shared | nir_var_mem_global,
442 nir_address_format_62bit_generic);
443
444 NIR_PASS_V(nir, nir_lower_convert_alu_types, NULL);
445
446 NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics, devinfo, NULL);
447 NIR_PASS_V(nir, lower_kernel_intrinsics);
448
449 struct brw_cs_prog_key key = { };
450
451 memset(&kernel->prog_data, 0, sizeof(kernel->prog_data));
452 kernel->prog_data.base.nr_params = DIV_ROUND_UP(nir->num_uniforms, 4);
453
454 struct brw_compile_cs_params params = {
455 .base = {
456 .nir = nir,
457 .stats = kernel->stats,
458 .log_data = log_data,
459 .mem_ctx = mem_ctx,
460 },
461 .key = &key,
462 .prog_data = &kernel->prog_data,
463 };
464
465 kernel->code = brw_compile_cs(compiler, ¶ms);
466
467 if (error_str)
468 *error_str = params.base.error_str;
469
470 return kernel->code != NULL;
471 }
472
473 static nir_def *
rebuild_value_from_store(struct util_dynarray * stores,nir_def * value,unsigned read_offset)474 rebuild_value_from_store(struct util_dynarray *stores,
475 nir_def *value, unsigned read_offset)
476 {
477 unsigned read_size = value->num_components * value->bit_size / 8;
478
479 util_dynarray_foreach(stores, nir_intrinsic_instr *, _store) {
480 nir_intrinsic_instr *store = *_store;
481
482 unsigned write_offset = nir_src_as_uint(store->src[1]);
483 unsigned write_size = nir_src_num_components(store->src[0]) *
484 nir_src_bit_size(store->src[0]) / 8;
485 if (write_offset <= read_offset &&
486 (write_offset + write_size) >= (read_offset + read_size)) {
487 assert(nir_block_dominates(store->instr.block, value->parent_instr->block));
488 assert(write_size == read_size);
489 return store->src[0].ssa;
490 }
491 }
492 unreachable("Matching scratch store not found");
493 }
494
495 /**
496 * Remove temporary variables stored to scratch to be then reloaded
497 * immediately. Remap the load to the store SSA value.
498 *
499 * This workaround is only meant to be applied to shaders in src/intel/shaders
500 * were we know there should be no issue. More complex cases might not work
501 * with this approach.
502 */
503 static bool
nir_remove_llvm17_scratch(nir_shader * nir)504 nir_remove_llvm17_scratch(nir_shader *nir)
505 {
506 struct util_dynarray scratch_stores;
507 void *mem_ctx = ralloc_context(NULL);
508
509 util_dynarray_init(&scratch_stores, mem_ctx);
510
511 nir_foreach_function_impl(func, nir) {
512 nir_foreach_block(block, func) {
513 nir_foreach_instr(instr, block) {
514 if (instr->type != nir_instr_type_intrinsic)
515 continue;
516
517 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
518
519 if (intrin->intrinsic != nir_intrinsic_store_scratch)
520 continue;
521
522 nir_const_value *offset = nir_src_as_const_value(intrin->src[1]);
523 if (offset != NULL) {
524 util_dynarray_append(&scratch_stores, nir_intrinsic_instr *, intrin);
525 }
526 }
527 }
528 }
529
530 bool progress = false;
531 if (util_dynarray_num_elements(&scratch_stores, nir_intrinsic_instr *) > 0) {
532 nir_foreach_function_impl(func, nir) {
533 nir_foreach_block(block, func) {
534 nir_foreach_instr_safe(instr, block) {
535 if (instr->type != nir_instr_type_intrinsic)
536 continue;
537
538 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
539
540 if (intrin->intrinsic != nir_intrinsic_load_scratch)
541 continue;
542
543 nir_const_value *offset = nir_src_as_const_value(intrin->src[0]);
544 if (offset == NULL)
545 continue;
546
547 nir_def_replace(&intrin->def,
548 rebuild_value_from_store(&scratch_stores, &intrin->def, nir_src_as_uint(intrin->src[0])));
549
550 progress = true;
551 }
552 }
553 }
554 }
555
556 util_dynarray_foreach(&scratch_stores, nir_intrinsic_instr *, _store) {
557 nir_intrinsic_instr *store = *_store;
558 nir_instr_remove(&store->instr);
559 }
560
561 /* Quick sanity check */
562 assert(util_dynarray_num_elements(&scratch_stores, nir_intrinsic_instr *) == 0 ||
563 progress);
564
565 ralloc_free(mem_ctx);
566
567 return progress;
568 }
569
570 static void
cleanup_llvm17_scratch(nir_shader * nir)571 cleanup_llvm17_scratch(nir_shader *nir)
572 {
573 {
574 bool progress;
575 do {
576 progress = false;
577 NIR_PASS(progress, nir, nir_copy_prop);
578 NIR_PASS(progress, nir, nir_opt_dce);
579 NIR_PASS(progress, nir, nir_opt_constant_folding);
580 NIR_PASS(progress, nir, nir_opt_cse);
581 NIR_PASS(progress, nir, nir_opt_algebraic);
582 } while (progress);
583 }
584
585 nir_remove_llvm17_scratch(nir);
586
587 {
588 bool progress;
589 do {
590 progress = false;
591 NIR_PASS(progress, nir, nir_copy_prop);
592 NIR_PASS(progress, nir, nir_opt_dce);
593 NIR_PASS(progress, nir, nir_opt_constant_folding);
594 NIR_PASS(progress, nir, nir_opt_cse);
595 NIR_PASS(progress, nir, nir_opt_algebraic);
596 } while (progress);
597 }
598 }
599
600 nir_shader *
brw_nir_from_spirv(void * mem_ctx,unsigned gfx_version,const uint32_t * spirv,size_t spirv_size,bool llvm17_wa)601 brw_nir_from_spirv(void *mem_ctx, unsigned gfx_version, const uint32_t *spirv,
602 size_t spirv_size, bool llvm17_wa)
603 {
604 struct spirv_to_nir_options spirv_options = {
605 .environment = NIR_SPIRV_OPENCL,
606 .capabilities = &spirv_caps,
607 .printf = true,
608 .shared_addr_format = nir_address_format_62bit_generic,
609 .global_addr_format = nir_address_format_62bit_generic,
610 .temp_addr_format = nir_address_format_62bit_generic,
611 .constant_addr_format = nir_address_format_64bit_global,
612 .create_library = true,
613 };
614
615 assert(spirv_size % 4 == 0);
616
617 assert(gfx_version);
618 const nir_shader_compiler_options *nir_options =
619 gfx_version >= 9 ? &brw_scalar_nir_options
620 : &elk_scalar_nir_options;
621
622 nir_shader *nir =
623 spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
624 "library", &spirv_options, nir_options);
625 nir_validate_shader(nir, "after spirv_to_nir");
626 nir_validate_ssa_dominance(nir, "after spirv_to_nir");
627 ralloc_steal(mem_ctx, nir);
628 nir->info.name = ralloc_strdup(nir, "library");
629
630 if (INTEL_DEBUG(DEBUG_CS)) {
631 /* Re-index SSA defs so we print more sensible numbers. */
632 nir_foreach_function_impl(impl, nir) {
633 nir_index_ssa_defs(impl);
634 }
635
636 fprintf(stderr, "NIR (from SPIR-V) for kernel\n");
637 nir_print_shader(nir, stderr);
638 }
639
640 nir_lower_printf_options printf_opts = {
641 .ptr_bit_size = 64,
642 .use_printf_base_identifier = true,
643 };
644 NIR_PASS_V(nir, nir_lower_printf, &printf_opts);
645
646 NIR_PASS_V(nir, implement_intel_builtins);
647 NIR_PASS_V(nir, nir_link_shader_functions, spirv_options.clc_shader);
648
649 /* We have to lower away local constant initializers right before we
650 * inline functions. That way they get properly initialized at the top
651 * of the function and not at the top of its caller.
652 */
653 NIR_PASS_V(nir, nir_lower_variable_initializers, ~(nir_var_shader_temp |
654 nir_var_function_temp));
655 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo |
656 nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL);
657 {
658 bool progress;
659 do
660 {
661 progress = false;
662 NIR_PASS(progress, nir, nir_copy_prop);
663 NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
664 NIR_PASS(progress, nir, nir_opt_deref);
665 NIR_PASS(progress, nir, nir_opt_dce);
666 NIR_PASS(progress, nir, nir_opt_undef);
667 NIR_PASS(progress, nir, nir_opt_constant_folding);
668 NIR_PASS(progress, nir, nir_opt_cse);
669 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
670 NIR_PASS(progress, nir, nir_opt_algebraic);
671 } while (progress);
672 }
673
674 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
675 NIR_PASS_V(nir, nir_lower_returns);
676 NIR_PASS_V(nir, nir_inline_functions);
677
678 assert(nir->scratch_size == 0);
679 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align);
680
681 {
682 bool progress;
683 do
684 {
685 progress = false;
686 NIR_PASS(progress, nir, nir_copy_prop);
687 NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
688 NIR_PASS(progress, nir, nir_opt_deref);
689 NIR_PASS(progress, nir, nir_opt_dce);
690 NIR_PASS(progress, nir, nir_opt_undef);
691 NIR_PASS(progress, nir, nir_opt_constant_folding);
692 NIR_PASS(progress, nir, nir_opt_cse);
693 NIR_PASS(progress, nir, nir_split_var_copies);
694 NIR_PASS(progress, nir, nir_lower_var_copies);
695 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
696 NIR_PASS(progress, nir, nir_opt_algebraic);
697 NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
698 NIR_PASS(progress, nir, nir_opt_dead_cf);
699 NIR_PASS(progress, nir, nir_opt_remove_phis);
700 NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
701 NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform);
702 NIR_PASS(progress, nir, nir_opt_memcpy);
703 } while (progress);
704 }
705
706 NIR_PASS_V(nir, nir_scale_fdiv);
707
708 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo |
709 nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL);
710
711
712 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | nir_var_function_temp, NULL);
713
714 nir->scratch_size = 0;
715 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
716 nir_var_mem_shared | nir_var_function_temp | nir_var_shader_temp |
717 nir_var_mem_global | nir_var_mem_constant,
718 glsl_get_cl_type_size_align);
719
720 // Lower memcpy - needs to wait until types are sized
721 {
722 bool progress;
723 do {
724 progress = false;
725 NIR_PASS(progress, nir, nir_opt_memcpy);
726 NIR_PASS(progress, nir, nir_copy_prop);
727 NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
728 NIR_PASS(progress, nir, nir_opt_deref);
729 NIR_PASS(progress, nir, nir_opt_dce);
730 NIR_PASS(progress, nir, nir_split_var_copies);
731 NIR_PASS(progress, nir, nir_lower_var_copies);
732 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
733 NIR_PASS(progress, nir, nir_opt_constant_folding);
734 NIR_PASS(progress, nir, nir_opt_cse);
735 } while (progress);
736 }
737 NIR_PASS_V(nir, nir_lower_memcpy);
738
739 NIR_PASS_V(nir, nir_lower_explicit_io,
740 nir_var_mem_shared | nir_var_function_temp | nir_var_shader_temp | nir_var_uniform,
741 nir_address_format_32bit_offset_as_64bit);
742
743 NIR_PASS_V(nir, nir_lower_system_values);
744
745 /* Hopefully we can drop this once lower_vars_to_ssa has improved to not
746 * lower everything to scratch.
747 */
748 if (llvm17_wa)
749 cleanup_llvm17_scratch(nir);
750
751 /* Lower again, this time after dead-variables to get more compact variable
752 * layouts.
753 */
754 nir->global_mem_size = 0;
755 nir->scratch_size = 0;
756 nir->info.shared_size = 0;
757 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
758 nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant,
759 glsl_get_cl_type_size_align);
760 if (nir->constant_data_size > 0) {
761 assert(nir->constant_data == NULL);
762 nir->constant_data = rzalloc_size(nir, nir->constant_data_size);
763 nir_gather_explicit_io_initializers(nir, nir->constant_data,
764 nir->constant_data_size,
765 nir_var_mem_constant);
766 }
767
768 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,
769 nir_address_format_64bit_global);
770
771 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform,
772 nir_address_format_32bit_offset_as_64bit);
773
774 NIR_PASS_V(nir, nir_lower_explicit_io,
775 nir_var_shader_temp | nir_var_function_temp |
776 nir_var_mem_shared | nir_var_mem_global,
777 nir_address_format_62bit_generic);
778
779 if (INTEL_DEBUG(DEBUG_CS)) {
780 /* Re-index SSA defs so we print more sensible numbers. */
781 nir_foreach_function_impl(impl, nir) {
782 nir_index_ssa_defs(impl);
783 }
784
785 fprintf(stderr, "NIR (before I/O lowering) for kernel\n");
786 nir_print_shader(nir, stderr);
787 }
788
789 return nir;
790 }
791