1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_serialize.h"
26 #include "glsl_types.h"
27 #include "nir_types.h"
28 #include "clc_compiler.h"
29 #include "clc_helpers.h"
30 #include "clc_nir.h"
31 #include "../compiler/dxil_nir.h"
32 #include "../compiler/dxil_nir_lower_int_samplers.h"
33 #include "../compiler/nir_to_dxil.h"
34
35 #include "util/u_debug.h"
36 #include <util/u_math.h>
37 #include "spirv/nir_spirv.h"
38 #include "nir_builder.h"
39 #include "nir_builtin_builder.h"
40
41 #include "git_sha1.h"
42
43 struct clc_image_lower_context
44 {
45 struct clc_dxil_metadata *metadata;
46 unsigned *num_srvs;
47 unsigned *num_uavs;
48 nir_deref_instr *deref;
49 unsigned num_buf_ids;
50 int metadata_index;
51 };
52
53 static int
lower_image_deref_impl(nir_builder * b,struct clc_image_lower_context * context,const struct glsl_type * new_var_type,nir_variable_mode var_mode,unsigned * num_bindings)54 lower_image_deref_impl(nir_builder *b, struct clc_image_lower_context *context,
55 const struct glsl_type *new_var_type,
56 nir_variable_mode var_mode,
57 unsigned *num_bindings)
58 {
59 nir_variable *in_var = nir_deref_instr_get_variable(context->deref);
60 nir_variable *image = nir_variable_create(b->shader, var_mode, new_var_type, NULL);
61 image->data.access = in_var->data.access;
62 image->data.binding = in_var->data.binding;
63 if (context->num_buf_ids > 0) {
64 // Need to assign a new binding
65 context->metadata->args[context->metadata_index].
66 image.buf_ids[context->num_buf_ids] = image->data.binding = (*num_bindings)++;
67 }
68 context->num_buf_ids++;
69 return image->data.binding;
70 }
71
72 static int
lower_read_only_image_deref(nir_builder * b,struct clc_image_lower_context * context,nir_alu_type image_type)73 lower_read_only_image_deref(nir_builder *b, struct clc_image_lower_context *context,
74 nir_alu_type image_type)
75 {
76 nir_variable *in_var = nir_deref_instr_get_variable(context->deref);
77
78 // Non-writeable images should be converted to samplers,
79 // since they may have texture operations done on them
80 const struct glsl_type *new_var_type =
81 glsl_texture_type(glsl_get_sampler_dim(in_var->type),
82 glsl_sampler_type_is_array(in_var->type),
83 nir_get_glsl_base_type_for_nir_type(image_type | 32));
84 return lower_image_deref_impl(b, context, new_var_type, nir_var_uniform, context->num_srvs);
85 }
86
87 static int
lower_read_write_image_deref(nir_builder * b,struct clc_image_lower_context * context,nir_alu_type image_type)88 lower_read_write_image_deref(nir_builder *b, struct clc_image_lower_context *context,
89 nir_alu_type image_type)
90 {
91 nir_variable *in_var = nir_deref_instr_get_variable(context->deref);
92 const struct glsl_type *new_var_type =
93 glsl_image_type(glsl_get_sampler_dim(in_var->type),
94 glsl_sampler_type_is_array(in_var->type),
95 nir_get_glsl_base_type_for_nir_type(image_type | 32));
96 return lower_image_deref_impl(b, context, new_var_type, nir_var_image, context->num_uavs);
97 }
98
99 static void
clc_lower_input_image_deref(nir_builder * b,struct clc_image_lower_context * context)100 clc_lower_input_image_deref(nir_builder *b, struct clc_image_lower_context *context)
101 {
102 // The input variable here isn't actually an image, it's just the
103 // image format data.
104 //
105 // For every use of an image in a different way, we'll add an
106 // appropriate image to match it. That can result in up to
107 // 3 images (float4, int4, uint4) for each image. Only one of these
108 // formats will actually produce correct data, but a single kernel
109 // could use runtime conditionals to potentially access any of them.
110 //
111 // If the image is used in a query that doesn't have a corresponding
112 // DXIL intrinsic (CL image channel order or channel format), then
113 // we'll add a kernel input for that data that'll be lowered by the
114 // explicit IO pass later on.
115 //
116 // After all that, we can remove the image input variable and deref.
117
118 enum image_type {
119 FLOAT4,
120 INT4,
121 UINT4,
122 IMAGE_TYPE_COUNT
123 };
124
125 int image_bindings[IMAGE_TYPE_COUNT] = {-1, -1, -1};
126 nir_ssa_def *format_deref_dest = NULL, *order_deref_dest = NULL;
127
128 nir_variable *in_var = nir_deref_instr_get_variable(context->deref);
129
130 context->metadata_index = 0;
131 while (context->metadata->args[context->metadata_index].image.buf_ids[0] != in_var->data.binding)
132 context->metadata_index++;
133
134 context->num_buf_ids = 0;
135
136 /* Do this in 2 passes:
137 * 1. When encountering a strongly-typed access (load/store), replace the deref
138 * with one that references an appropriately typed variable. When encountering
139 * an untyped access (size query), if we have a strongly-typed variable already,
140 * replace the deref to point to it.
141 * 2. If there's any references left, they should all be untyped. If we found
142 * a strongly-typed access later in the 1st pass, then just replace the reference.
143 * If we didn't, e.g. the resource is only used for a size query, then pick an
144 * arbitrary type for it.
145 */
146 for (int pass = 0; pass < 2; ++pass) {
147 nir_foreach_use_safe(src, &context->deref->dest.ssa) {
148 enum image_type type;
149
150 if (src->parent_instr->type == nir_instr_type_intrinsic) {
151 nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(src->parent_instr);
152 nir_alu_type dest_type;
153
154 b->cursor = nir_before_instr(&intrinsic->instr);
155
156 switch (intrinsic->intrinsic) {
157 case nir_intrinsic_image_deref_load:
158 case nir_intrinsic_image_deref_store: {
159 dest_type = intrinsic->intrinsic == nir_intrinsic_image_deref_load ?
160 nir_intrinsic_dest_type(intrinsic) : nir_intrinsic_src_type(intrinsic);
161
162 switch (nir_alu_type_get_base_type(dest_type)) {
163 case nir_type_float: type = FLOAT4; break;
164 case nir_type_int: type = INT4; break;
165 case nir_type_uint: type = UINT4; break;
166 default: unreachable("Unsupported image type for load.");
167 }
168
169 int image_binding = image_bindings[type];
170 if (image_binding < 0) {
171 image_binding = image_bindings[type] =
172 lower_read_write_image_deref(b, context, dest_type);
173 }
174
175 assert((in_var->data.access & ACCESS_NON_WRITEABLE) == 0);
176 nir_rewrite_image_intrinsic(intrinsic, nir_imm_int(b, image_binding), false);
177 break;
178 }
179
180 case nir_intrinsic_image_deref_size: {
181 int image_binding = -1;
182 for (unsigned i = 0; i < IMAGE_TYPE_COUNT; ++i) {
183 if (image_bindings[i] >= 0) {
184 image_binding = image_bindings[i];
185 break;
186 }
187 }
188 if (image_binding < 0) {
189 // Skip for now and come back to it
190 if (pass == 0)
191 break;
192
193 type = FLOAT4;
194 image_binding = image_bindings[type] =
195 lower_read_write_image_deref(b, context, nir_type_float32);
196 }
197
198 assert((in_var->data.access & ACCESS_NON_WRITEABLE) == 0);
199 nir_rewrite_image_intrinsic(intrinsic, nir_imm_int(b, image_binding), false);
200 break;
201 }
202
203 case nir_intrinsic_image_deref_format:
204 case nir_intrinsic_image_deref_order: {
205 nir_ssa_def **cached_deref = intrinsic->intrinsic == nir_intrinsic_image_deref_format ?
206 &format_deref_dest : &order_deref_dest;
207 if (!*cached_deref) {
208 nir_variable *new_input = nir_variable_create(b->shader, nir_var_uniform, glsl_uint_type(), NULL);
209 new_input->data.driver_location = in_var->data.driver_location;
210 if (intrinsic->intrinsic == nir_intrinsic_image_deref_format) {
211 /* Match cl_image_format { image_channel_order, image_channel_data_type }; */
212 new_input->data.driver_location += glsl_get_cl_size(new_input->type);
213 }
214
215 b->cursor = nir_after_instr(&context->deref->instr);
216 *cached_deref = nir_load_var(b, new_input);
217 }
218
219 /* No actual intrinsic needed here, just reference the loaded variable */
220 nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa, *cached_deref);
221 nir_instr_remove(&intrinsic->instr);
222 break;
223 }
224
225 default:
226 unreachable("Unsupported image intrinsic");
227 }
228 } else if (src->parent_instr->type == nir_instr_type_tex) {
229 assert(in_var->data.access & ACCESS_NON_WRITEABLE);
230 nir_tex_instr *tex = nir_instr_as_tex(src->parent_instr);
231
232 switch (nir_alu_type_get_base_type(tex->dest_type)) {
233 case nir_type_float: type = FLOAT4; break;
234 case nir_type_int: type = INT4; break;
235 case nir_type_uint: type = UINT4; break;
236 default: unreachable("Unsupported image format for sample.");
237 }
238
239 int image_binding = image_bindings[type];
240 if (image_binding < 0) {
241 image_binding = image_bindings[type] =
242 lower_read_only_image_deref(b, context, tex->dest_type);
243 }
244
245 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_texture_deref));
246 tex->texture_index = image_binding;
247 }
248 }
249 }
250
251 context->metadata->args[context->metadata_index].image.num_buf_ids = context->num_buf_ids;
252
253 nir_instr_remove(&context->deref->instr);
254 exec_node_remove(&in_var->node);
255 }
256
257 static void
clc_lower_images(nir_shader * nir,struct clc_image_lower_context * context)258 clc_lower_images(nir_shader *nir, struct clc_image_lower_context *context)
259 {
260 nir_foreach_function(func, nir) {
261 if (!func->is_entrypoint)
262 continue;
263 assert(func->impl);
264
265 nir_builder b;
266 nir_builder_init(&b, func->impl);
267
268 nir_foreach_block(block, func->impl) {
269 nir_foreach_instr_safe(instr, block) {
270 if (instr->type == nir_instr_type_deref) {
271 context->deref = nir_instr_as_deref(instr);
272
273 if (glsl_type_is_image(context->deref->type)) {
274 assert(context->deref->deref_type == nir_deref_type_var);
275 clc_lower_input_image_deref(&b, context);
276 }
277 }
278 }
279 }
280 }
281 }
282
283 static void
clc_lower_64bit_semantics(nir_shader * nir)284 clc_lower_64bit_semantics(nir_shader *nir)
285 {
286 nir_foreach_function(func, nir) {
287 nir_builder b;
288 nir_builder_init(&b, func->impl);
289
290 nir_foreach_block(block, func->impl) {
291 nir_foreach_instr_safe(instr, block) {
292 if (instr->type == nir_instr_type_intrinsic) {
293 nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
294 switch (intrinsic->intrinsic) {
295 case nir_intrinsic_load_global_invocation_id:
296 case nir_intrinsic_load_global_invocation_id_zero_base:
297 case nir_intrinsic_load_base_global_invocation_id:
298 case nir_intrinsic_load_local_invocation_id:
299 case nir_intrinsic_load_workgroup_id:
300 case nir_intrinsic_load_workgroup_id_zero_base:
301 case nir_intrinsic_load_base_workgroup_id:
302 case nir_intrinsic_load_num_workgroups:
303 break;
304 default:
305 continue;
306 }
307
308 if (nir_instr_ssa_def(instr)->bit_size != 64)
309 continue;
310
311 intrinsic->dest.ssa.bit_size = 32;
312 b.cursor = nir_after_instr(instr);
313
314 nir_ssa_def *i64 = nir_u2u64(&b, &intrinsic->dest.ssa);
315 nir_ssa_def_rewrite_uses_after(
316 &intrinsic->dest.ssa,
317 i64,
318 i64->parent_instr);
319 }
320 }
321 }
322 }
323 }
324
325 static void
clc_lower_nonnormalized_samplers(nir_shader * nir,const dxil_wrap_sampler_state * states)326 clc_lower_nonnormalized_samplers(nir_shader *nir,
327 const dxil_wrap_sampler_state *states)
328 {
329 nir_foreach_function(func, nir) {
330 if (!func->is_entrypoint)
331 continue;
332 assert(func->impl);
333
334 nir_builder b;
335 nir_builder_init(&b, func->impl);
336
337 nir_foreach_block(block, func->impl) {
338 nir_foreach_instr_safe(instr, block) {
339 if (instr->type != nir_instr_type_tex)
340 continue;
341 nir_tex_instr *tex = nir_instr_as_tex(instr);
342
343 int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
344 if (sampler_src_idx == -1)
345 continue;
346
347 nir_src *sampler_src = &tex->src[sampler_src_idx].src;
348 assert(sampler_src->is_ssa && sampler_src->ssa->parent_instr->type == nir_instr_type_deref);
349 nir_variable *sampler = nir_deref_instr_get_variable(
350 nir_instr_as_deref(sampler_src->ssa->parent_instr));
351
352 // If the sampler returns ints, we'll handle this in the int lowering pass
353 if (nir_alu_type_get_base_type(tex->dest_type) != nir_type_float)
354 continue;
355
356 // If sampler uses normalized coords, nothing to do
357 if (!states[sampler->data.binding].is_nonnormalized_coords)
358 continue;
359
360 b.cursor = nir_before_instr(&tex->instr);
361
362 int coords_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
363 assert(coords_idx != -1);
364 nir_ssa_def *coords =
365 nir_ssa_for_src(&b, tex->src[coords_idx].src, tex->coord_components);
366
367 nir_ssa_def *txs = nir_i2f32(&b, nir_get_texture_size(&b, tex));
368
369 // Normalize coords for tex
370 nir_ssa_def *scale = nir_frcp(&b, txs);
371 nir_ssa_def *comps[4];
372 for (unsigned i = 0; i < coords->num_components; ++i) {
373 comps[i] = nir_channel(&b, coords, i);
374 if (tex->is_array && i == coords->num_components - 1) {
375 // Don't scale the array index, but do clamp it
376 comps[i] = nir_fround_even(&b, comps[i]);
377 comps[i] = nir_fmax(&b, comps[i], nir_imm_float(&b, 0.0f));
378 comps[i] = nir_fmin(&b, comps[i], nir_fsub(&b, nir_channel(&b, txs, i), nir_imm_float(&b, 1.0f)));
379 break;
380 }
381
382 // The CTS is pretty clear that this value has to be floored for nearest sampling
383 // but must not be for linear sampling.
384 if (!states[sampler->data.binding].is_linear_filtering)
385 comps[i] = nir_fadd_imm(&b, nir_ffloor(&b, comps[i]), 0.5f);
386 comps[i] = nir_fmul(&b, comps[i], nir_channel(&b, scale, i));
387 }
388 nir_ssa_def *normalized_coords = nir_vec(&b, comps, coords->num_components);
389 nir_instr_rewrite_src(&tex->instr,
390 &tex->src[coords_idx].src,
391 nir_src_for_ssa(normalized_coords));
392 }
393 }
394 }
395 }
396
397 static nir_variable *
add_kernel_inputs_var(struct clc_dxil_object * dxil,nir_shader * nir,unsigned * cbv_id)398 add_kernel_inputs_var(struct clc_dxil_object *dxil, nir_shader *nir,
399 unsigned *cbv_id)
400 {
401 if (!dxil->kernel->num_args)
402 return NULL;
403
404 unsigned size = 0;
405
406 nir_foreach_variable_with_modes(var, nir, nir_var_uniform)
407 size = MAX2(size,
408 var->data.driver_location +
409 glsl_get_cl_size(var->type));
410
411 size = align(size, 4);
412
413 const struct glsl_type *array_type = glsl_array_type(glsl_uint_type(), size / 4, 4);
414 const struct glsl_struct_field field = { array_type, "arr" };
415 nir_variable *var =
416 nir_variable_create(nir, nir_var_mem_ubo,
417 glsl_struct_type(&field, 1, "kernel_inputs", false),
418 "kernel_inputs");
419 var->data.binding = (*cbv_id)++;
420 var->data.how_declared = nir_var_hidden;
421 return var;
422 }
423
424 static nir_variable *
add_work_properties_var(struct clc_dxil_object * dxil,struct nir_shader * nir,unsigned * cbv_id)425 add_work_properties_var(struct clc_dxil_object *dxil,
426 struct nir_shader *nir, unsigned *cbv_id)
427 {
428 const struct glsl_type *array_type =
429 glsl_array_type(glsl_uint_type(),
430 sizeof(struct clc_work_properties_data) / sizeof(unsigned),
431 sizeof(unsigned));
432 const struct glsl_struct_field field = { array_type, "arr" };
433 nir_variable *var =
434 nir_variable_create(nir, nir_var_mem_ubo,
435 glsl_struct_type(&field, 1, "kernel_work_properties", false),
436 "kernel_work_properies");
437 var->data.binding = (*cbv_id)++;
438 var->data.how_declared = nir_var_hidden;
439 return var;
440 }
441
442 static void
clc_lower_constant_to_ssbo(nir_shader * nir,const struct clc_kernel_info * kerninfo,unsigned * uav_id)443 clc_lower_constant_to_ssbo(nir_shader *nir,
444 const struct clc_kernel_info *kerninfo, unsigned *uav_id)
445 {
446 /* Update UBO vars and assign them a binding. */
447 nir_foreach_variable_with_modes(var, nir, nir_var_mem_constant) {
448 var->data.mode = nir_var_mem_ssbo;
449 var->data.binding = (*uav_id)++;
450 }
451
452 /* And finally patch all the derefs referincing the constant
453 * variables/pointers.
454 */
455 nir_foreach_function(func, nir) {
456 if (!func->is_entrypoint)
457 continue;
458
459 assert(func->impl);
460
461 nir_builder b;
462 nir_builder_init(&b, func->impl);
463
464 nir_foreach_block(block, func->impl) {
465 nir_foreach_instr(instr, block) {
466 if (instr->type != nir_instr_type_deref)
467 continue;
468
469 nir_deref_instr *deref = nir_instr_as_deref(instr);
470
471 if (deref->modes != nir_var_mem_constant)
472 continue;
473
474 deref->modes = nir_var_mem_ssbo;
475 }
476 }
477 }
478 }
479
480 static void
clc_lower_global_to_ssbo(nir_shader * nir)481 clc_lower_global_to_ssbo(nir_shader *nir)
482 {
483 nir_foreach_function(func, nir) {
484 if (!func->is_entrypoint)
485 continue;
486
487 assert(func->impl);
488
489 nir_foreach_block(block, func->impl) {
490 nir_foreach_instr(instr, block) {
491 if (instr->type != nir_instr_type_deref)
492 continue;
493
494 nir_deref_instr *deref = nir_instr_as_deref(instr);
495
496 if (deref->modes != nir_var_mem_global)
497 continue;
498
499 deref->modes = nir_var_mem_ssbo;
500 }
501 }
502 }
503 }
504
505 static void
copy_const_initializer(const nir_constant * constant,const struct glsl_type * type,uint8_t * data)506 copy_const_initializer(const nir_constant *constant, const struct glsl_type *type,
507 uint8_t *data)
508 {
509 if (glsl_type_is_array(type)) {
510 const struct glsl_type *elm_type = glsl_get_array_element(type);
511 unsigned step_size = glsl_get_explicit_stride(type);
512
513 for (unsigned i = 0; i < constant->num_elements; i++) {
514 copy_const_initializer(constant->elements[i], elm_type,
515 data + (i * step_size));
516 }
517 } else if (glsl_type_is_struct(type)) {
518 for (unsigned i = 0; i < constant->num_elements; i++) {
519 const struct glsl_type *elm_type = glsl_get_struct_field(type, i);
520 int offset = glsl_get_struct_field_offset(type, i);
521 copy_const_initializer(constant->elements[i], elm_type, data + offset);
522 }
523 } else {
524 assert(glsl_type_is_vector_or_scalar(type));
525
526 for (unsigned i = 0; i < glsl_get_components(type); i++) {
527 switch (glsl_get_bit_size(type)) {
528 case 64:
529 *((uint64_t *)data) = constant->values[i].u64;
530 break;
531 case 32:
532 *((uint32_t *)data) = constant->values[i].u32;
533 break;
534 case 16:
535 *((uint16_t *)data) = constant->values[i].u16;
536 break;
537 case 8:
538 *((uint8_t *)data) = constant->values[i].u8;
539 break;
540 default:
541 unreachable("Invalid base type");
542 }
543
544 data += glsl_get_bit_size(type) / 8;
545 }
546 }
547 }
548
549 static const struct glsl_type *
get_cast_type(unsigned bit_size)550 get_cast_type(unsigned bit_size)
551 {
552 switch (bit_size) {
553 case 64:
554 return glsl_int64_t_type();
555 case 32:
556 return glsl_int_type();
557 case 16:
558 return glsl_int16_t_type();
559 case 8:
560 return glsl_int8_t_type();
561 }
562 unreachable("Invalid bit_size");
563 }
564
565 static void
split_unaligned_load(nir_builder * b,nir_intrinsic_instr * intrin,unsigned alignment)566 split_unaligned_load(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment)
567 {
568 enum gl_access_qualifier access = nir_intrinsic_access(intrin);
569 nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS * NIR_MAX_VEC_COMPONENTS * sizeof(int64_t) / 8];
570 unsigned comp_size = intrin->dest.ssa.bit_size / 8;
571 unsigned num_comps = intrin->dest.ssa.num_components;
572
573 b->cursor = nir_before_instr(&intrin->instr);
574
575 nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]);
576
577 const struct glsl_type *cast_type = get_cast_type(alignment * 8);
578 nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment);
579
580 unsigned num_loads = DIV_ROUND_UP(comp_size * num_comps, alignment);
581 for (unsigned i = 0; i < num_loads; ++i) {
582 nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size));
583 srcs[i] = nir_load_deref_with_access(b, elem, access);
584 }
585
586 nir_ssa_def *new_dest = nir_extract_bits(b, srcs, num_loads, 0, num_comps, intrin->dest.ssa.bit_size);
587 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest);
588 nir_instr_remove(&intrin->instr);
589 }
590
591 static void
split_unaligned_store(nir_builder * b,nir_intrinsic_instr * intrin,unsigned alignment)592 split_unaligned_store(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment)
593 {
594 enum gl_access_qualifier access = nir_intrinsic_access(intrin);
595
596 assert(intrin->src[1].is_ssa);
597 nir_ssa_def *value = intrin->src[1].ssa;
598 unsigned comp_size = value->bit_size / 8;
599 unsigned num_comps = value->num_components;
600
601 b->cursor = nir_before_instr(&intrin->instr);
602
603 nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]);
604
605 const struct glsl_type *cast_type = get_cast_type(alignment * 8);
606 nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment);
607
608 unsigned num_stores = DIV_ROUND_UP(comp_size * num_comps, alignment);
609 for (unsigned i = 0; i < num_stores; ++i) {
610 nir_ssa_def *substore_val = nir_extract_bits(b, &value, 1, i * alignment * 8, 1, alignment * 8);
611 nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size));
612 nir_store_deref_with_access(b, elem, substore_val, ~0, access);
613 }
614
615 nir_instr_remove(&intrin->instr);
616 }
617
618 static bool
split_unaligned_loads_stores(nir_shader * shader)619 split_unaligned_loads_stores(nir_shader *shader)
620 {
621 bool progress = false;
622
623 nir_foreach_function(function, shader) {
624 if (!function->impl)
625 continue;
626
627 nir_builder b;
628 nir_builder_init(&b, function->impl);
629
630 nir_foreach_block(block, function->impl) {
631 nir_foreach_instr_safe(instr, block) {
632 if (instr->type != nir_instr_type_intrinsic)
633 continue;
634 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
635 if (intrin->intrinsic != nir_intrinsic_load_deref &&
636 intrin->intrinsic != nir_intrinsic_store_deref)
637 continue;
638 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
639
640 unsigned align_mul = 0, align_offset = 0;
641 nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset);
642
643 unsigned alignment = align_offset ? 1 << (ffs(align_offset) - 1) : align_mul;
644
645 /* We can load anything at 4-byte alignment, except for
646 * UBOs (AKA CBs where the granularity is 16 bytes).
647 */
648 if (alignment >= (deref->modes == nir_var_mem_ubo ? 16 : 4))
649 continue;
650
651 nir_ssa_def *val;
652 if (intrin->intrinsic == nir_intrinsic_load_deref) {
653 assert(intrin->dest.is_ssa);
654 val = &intrin->dest.ssa;
655 } else {
656 assert(intrin->src[1].is_ssa);
657 val = intrin->src[1].ssa;
658 }
659
660 unsigned natural_alignment =
661 val->bit_size / 8 *
662 (val->num_components == 3 ? 4 : val->num_components);
663
664 if (alignment >= natural_alignment)
665 continue;
666
667 if (intrin->intrinsic == nir_intrinsic_load_deref)
668 split_unaligned_load(&b, intrin, alignment);
669 else
670 split_unaligned_store(&b, intrin, alignment);
671 progress = true;
672 }
673 }
674 }
675
676 return progress;
677 }
678
679 static enum pipe_tex_wrap
wrap_from_cl_addressing(unsigned addressing_mode)680 wrap_from_cl_addressing(unsigned addressing_mode)
681 {
682 switch (addressing_mode)
683 {
684 default:
685 case SAMPLER_ADDRESSING_MODE_NONE:
686 case SAMPLER_ADDRESSING_MODE_CLAMP:
687 // Since OpenCL's only border color is 0's and D3D specs out-of-bounds loads to return 0, don't apply any wrap mode
688 return (enum pipe_tex_wrap)-1;
689 case SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE: return PIPE_TEX_WRAP_CLAMP_TO_EDGE;
690 case SAMPLER_ADDRESSING_MODE_REPEAT: return PIPE_TEX_WRAP_REPEAT;
691 case SAMPLER_ADDRESSING_MODE_REPEAT_MIRRORED: return PIPE_TEX_WRAP_MIRROR_REPEAT;
692 }
693 }
694
shader_has_double(nir_shader * nir)695 static bool shader_has_double(nir_shader *nir)
696 {
697 foreach_list_typed(nir_function, func, node, &nir->functions) {
698 if (!func->is_entrypoint)
699 continue;
700
701 assert(func->impl);
702
703 nir_foreach_block(block, func->impl) {
704 nir_foreach_instr_safe(instr, block) {
705 if (instr->type != nir_instr_type_alu)
706 continue;
707
708 nir_alu_instr *alu = nir_instr_as_alu(instr);
709 const nir_op_info *info = &nir_op_infos[alu->op];
710
711 if (info->output_type & nir_type_float &&
712 nir_dest_bit_size(alu->dest.dest) == 64)
713 return true;
714 }
715 }
716 }
717
718 return false;
719 }
720
721 struct clc_libclc *
clc_libclc_new_dxil(const struct clc_logger * logger,const struct clc_libclc_dxil_options * options)722 clc_libclc_new_dxil(const struct clc_logger *logger,
723 const struct clc_libclc_dxil_options *options)
724 {
725 struct clc_libclc_options clc_options = {
726 .optimize = options->optimize,
727 .nir_options = dxil_get_nir_compiler_options(),
728 };
729
730 return clc_libclc_new(logger, &clc_options);
731 }
732
733 bool
clc_spirv_to_dxil(struct clc_libclc * lib,const struct clc_binary * linked_spirv,const struct clc_parsed_spirv * parsed_data,const char * entrypoint,const struct clc_runtime_kernel_conf * conf,const struct clc_spirv_specialization_consts * consts,const struct clc_logger * logger,struct clc_dxil_object * out_dxil)734 clc_spirv_to_dxil(struct clc_libclc *lib,
735 const struct clc_binary *linked_spirv,
736 const struct clc_parsed_spirv *parsed_data,
737 const char *entrypoint,
738 const struct clc_runtime_kernel_conf *conf,
739 const struct clc_spirv_specialization_consts *consts,
740 const struct clc_logger *logger,
741 struct clc_dxil_object *out_dxil)
742 {
743 struct nir_shader *nir;
744
745 for (unsigned i = 0; i < parsed_data->num_kernels; i++) {
746 if (!strcmp(parsed_data->kernels[i].name, entrypoint)) {
747 out_dxil->kernel = &parsed_data->kernels[i];
748 break;
749 }
750 }
751
752 if (!out_dxil->kernel) {
753 clc_error(logger, "no '%s' kernel found", entrypoint);
754 return false;
755 }
756
757 const struct spirv_to_nir_options spirv_options = {
758 .environment = NIR_SPIRV_OPENCL,
759 .clc_shader = clc_libclc_get_clc_shader(lib),
760 .constant_addr_format = nir_address_format_32bit_index_offset_pack64,
761 .global_addr_format = nir_address_format_32bit_index_offset_pack64,
762 .shared_addr_format = nir_address_format_32bit_offset_as_64bit,
763 .temp_addr_format = nir_address_format_32bit_offset_as_64bit,
764 .float_controls_execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32,
765 .caps = {
766 .address = true,
767 .float64 = true,
768 .int8 = true,
769 .int16 = true,
770 .int64 = true,
771 .kernel = true,
772 .kernel_image = true,
773 .kernel_image_read_write = true,
774 .literal_sampler = true,
775 .printf = true,
776 },
777 };
778 nir_shader_compiler_options nir_options =
779 *dxil_get_nir_compiler_options();
780
781 if (conf && conf->lower_bit_size & 64) {
782 nir_options.lower_pack_64_2x32_split = false;
783 nir_options.lower_unpack_64_2x32_split = false;
784 nir_options.lower_int64_options = ~0;
785 }
786
787 if (conf && conf->lower_bit_size & 16)
788 nir_options.support_16bit_alu = true;
789
790 glsl_type_singleton_init_or_ref();
791
792 nir = spirv_to_nir(linked_spirv->data, linked_spirv->size / 4,
793 consts ? (struct nir_spirv_specialization *)consts->specializations : NULL,
794 consts ? consts->num_specializations : 0,
795 MESA_SHADER_KERNEL, entrypoint,
796 &spirv_options,
797 &nir_options);
798 if (!nir) {
799 clc_error(logger, "spirv_to_nir() failed");
800 goto err_free_dxil;
801 }
802 nir->info.workgroup_size_variable = true;
803
804 NIR_PASS_V(nir, nir_lower_goto_ifs);
805 NIR_PASS_V(nir, nir_opt_dead_cf);
806
807 struct clc_dxil_metadata *metadata = &out_dxil->metadata;
808
809 metadata->args = calloc(out_dxil->kernel->num_args,
810 sizeof(*metadata->args));
811 if (!metadata->args) {
812 clc_error(logger, "failed to allocate arg positions");
813 goto err_free_dxil;
814 }
815
816 {
817 bool progress;
818 do
819 {
820 progress = false;
821 NIR_PASS(progress, nir, nir_copy_prop);
822 NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
823 NIR_PASS(progress, nir, nir_opt_deref);
824 NIR_PASS(progress, nir, nir_opt_dce);
825 NIR_PASS(progress, nir, nir_opt_undef);
826 NIR_PASS(progress, nir, nir_opt_constant_folding);
827 NIR_PASS(progress, nir, nir_opt_cse);
828 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
829 NIR_PASS(progress, nir, nir_opt_algebraic);
830 } while (progress);
831 }
832
833 // Inline all functions first.
834 // according to the comment on nir_inline_functions
835 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
836 NIR_PASS_V(nir, nir_lower_returns);
837 NIR_PASS_V(nir, nir_lower_libclc, clc_libclc_get_clc_shader(lib));
838 NIR_PASS_V(nir, nir_inline_functions);
839
840 // Pick off the single entrypoint that we want.
841 nir_remove_non_entrypoints(nir);
842
843 {
844 bool progress;
845 do
846 {
847 progress = false;
848 NIR_PASS(progress, nir, nir_copy_prop);
849 NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
850 NIR_PASS(progress, nir, nir_opt_deref);
851 NIR_PASS(progress, nir, nir_opt_dce);
852 NIR_PASS(progress, nir, nir_opt_undef);
853 NIR_PASS(progress, nir, nir_opt_constant_folding);
854 NIR_PASS(progress, nir, nir_opt_cse);
855 NIR_PASS(progress, nir, nir_split_var_copies);
856 NIR_PASS(progress, nir, nir_lower_var_copies);
857 NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
858 NIR_PASS(progress, nir, nir_opt_algebraic);
859 NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_aggressive_last_continue | nir_opt_if_optimize_phi_true_false);
860 NIR_PASS(progress, nir, nir_opt_dead_cf);
861 NIR_PASS(progress, nir, nir_opt_remove_phis);
862 NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
863 NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform);
864 } while (progress);
865 }
866
867 NIR_PASS_V(nir, nir_scale_fdiv);
868
869 dxil_wrap_sampler_state int_sampler_states[PIPE_MAX_SHADER_SAMPLER_VIEWS] = { {{0}} };
870 unsigned sampler_id = 0;
871
872 struct exec_list inline_samplers_list;
873 exec_list_make_empty(&inline_samplers_list);
874
875 // Move inline samplers to the end of the uniforms list
876 nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) {
877 if (glsl_type_is_sampler(var->type) && var->data.sampler.is_inline_sampler) {
878 exec_node_remove(&var->node);
879 exec_list_push_tail(&inline_samplers_list, &var->node);
880 }
881 }
882 exec_node_insert_list_after(exec_list_get_tail(&nir->variables), &inline_samplers_list);
883
884 NIR_PASS_V(nir, nir_lower_variable_initializers, ~(nir_var_function_temp | nir_var_shader_temp));
885
886 // Lower memcpy
887 NIR_PASS_V(nir, dxil_nir_lower_memcpy_deref);
888
889 // Ensure the printf struct has explicit types, but we'll throw away the scratch size, because we haven't
890 // necessarily removed all temp variables (e.g. the printf struct itself) at this point, so we'll rerun this later
891 assert(nir->scratch_size == 0);
892 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align);
893
894 nir_lower_printf_options printf_options = {
895 .treat_doubles_as_floats = true,
896 .max_buffer_size = 1024 * 1024
897 };
898 NIR_PASS_V(nir, nir_lower_printf, &printf_options);
899
900 metadata->printf.info_count = nir->printf_info_count;
901 metadata->printf.infos = calloc(nir->printf_info_count, sizeof(struct clc_printf_info));
902 for (unsigned i = 0; i < nir->printf_info_count; i++) {
903 metadata->printf.infos[i].str = malloc(nir->printf_info[i].string_size);
904 memcpy(metadata->printf.infos[i].str, nir->printf_info[i].strings, nir->printf_info[i].string_size);
905 metadata->printf.infos[i].num_args = nir->printf_info[i].num_args;
906 metadata->printf.infos[i].arg_sizes = malloc(nir->printf_info[i].num_args * sizeof(unsigned));
907 memcpy(metadata->printf.infos[i].arg_sizes, nir->printf_info[i].arg_sizes, nir->printf_info[i].num_args * sizeof(unsigned));
908 }
909
910 // copy propagate to prepare for lower_explicit_io
911 NIR_PASS_V(nir, nir_split_var_copies);
912 NIR_PASS_V(nir, nir_opt_copy_prop_vars);
913 NIR_PASS_V(nir, nir_lower_var_copies);
914 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
915 NIR_PASS_V(nir, nir_lower_alu);
916 NIR_PASS_V(nir, nir_opt_dce);
917 NIR_PASS_V(nir, nir_opt_deref);
918
919 // For uniforms (kernel inputs, minus images), run this before adjusting variable list via image/sampler lowering
920 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_uniform, glsl_get_cl_type_size_align);
921
922 // Calculate input offsets/metadata.
923 unsigned uav_id = 0;
924 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
925 int i = var->data.location;
926 if (i < 0)
927 continue;
928
929 unsigned size = glsl_get_cl_size(var->type);
930
931 metadata->args[i].offset = var->data.driver_location;
932 metadata->args[i].size = size;
933 metadata->kernel_inputs_buf_size = MAX2(metadata->kernel_inputs_buf_size,
934 var->data.driver_location + size);
935 if (out_dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_GLOBAL ||
936 out_dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_CONSTANT) {
937 metadata->args[i].globconstptr.buf_id = uav_id++;
938 } else if (glsl_type_is_sampler(var->type)) {
939 unsigned address_mode = conf ? conf->args[i].sampler.addressing_mode : 0u;
940 int_sampler_states[sampler_id].wrap[0] =
941 int_sampler_states[sampler_id].wrap[1] =
942 int_sampler_states[sampler_id].wrap[2] = wrap_from_cl_addressing(address_mode);
943 int_sampler_states[sampler_id].is_nonnormalized_coords =
944 conf ? !conf->args[i].sampler.normalized_coords : 0;
945 int_sampler_states[sampler_id].is_linear_filtering =
946 conf ? conf->args[i].sampler.linear_filtering : 0;
947 metadata->args[i].sampler.sampler_id = var->data.binding = sampler_id++;
948 }
949 }
950
951 unsigned num_global_inputs = uav_id;
952
953 // Second pass over inputs to calculate image bindings
954 unsigned srv_id = 0;
955 nir_foreach_image_variable(var, nir) {
956 int i = var->data.location;
957 if (i < 0)
958 continue;
959
960 assert(glsl_type_is_image(var->type));
961
962 if (var->data.access == ACCESS_NON_WRITEABLE) {
963 metadata->args[i].image.buf_ids[0] = srv_id++;
964 } else {
965 // Write or read-write are UAVs
966 metadata->args[i].image.buf_ids[0] = uav_id++;
967 }
968
969 metadata->args[i].image.num_buf_ids = 1;
970 var->data.binding = metadata->args[i].image.buf_ids[0];
971
972 // Assign location that'll be used for uniforms for format/order
973 var->data.driver_location = metadata->kernel_inputs_buf_size;
974 metadata->args[i].offset = metadata->kernel_inputs_buf_size;
975 metadata->args[i].size = 8;
976 metadata->kernel_inputs_buf_size += metadata->args[i].size;
977 }
978
979 // Before removing dead uniforms, dedupe constant samplers to make more dead uniforms
980 NIR_PASS_V(nir, clc_nir_dedupe_const_samplers);
981 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo |
982 nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL);
983
984 // Fill out inline sampler metadata, now that they've been deduped and dead ones removed
985 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
986 if (glsl_type_is_sampler(var->type) && var->data.sampler.is_inline_sampler) {
987 int_sampler_states[sampler_id].wrap[0] =
988 int_sampler_states[sampler_id].wrap[1] =
989 int_sampler_states[sampler_id].wrap[2] =
990 wrap_from_cl_addressing(var->data.sampler.addressing_mode);
991 int_sampler_states[sampler_id].is_nonnormalized_coords =
992 !var->data.sampler.normalized_coordinates;
993 int_sampler_states[sampler_id].is_linear_filtering =
994 var->data.sampler.filter_mode == SAMPLER_FILTER_MODE_LINEAR;
995 var->data.binding = sampler_id++;
996
997 assert(metadata->num_const_samplers < CLC_MAX_SAMPLERS);
998 metadata->const_samplers[metadata->num_const_samplers].sampler_id = var->data.binding;
999 metadata->const_samplers[metadata->num_const_samplers].addressing_mode = var->data.sampler.addressing_mode;
1000 metadata->const_samplers[metadata->num_const_samplers].normalized_coords = var->data.sampler.normalized_coordinates;
1001 metadata->const_samplers[metadata->num_const_samplers].filter_mode = var->data.sampler.filter_mode;
1002 metadata->num_const_samplers++;
1003 }
1004 }
1005
1006 // Needs to come before lower_explicit_io
1007 NIR_PASS_V(nir, nir_lower_readonly_images_to_tex, false);
1008 struct clc_image_lower_context image_lower_context = { metadata, &srv_id, &uav_id };
1009 NIR_PASS_V(nir, clc_lower_images, &image_lower_context);
1010 NIR_PASS_V(nir, clc_lower_nonnormalized_samplers, int_sampler_states);
1011 NIR_PASS_V(nir, nir_lower_samplers);
1012 NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex,
1013 int_sampler_states, NULL, 14.0f);
1014
1015 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | nir_var_function_temp, NULL);
1016
1017 nir->scratch_size = 0;
1018 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
1019 nir_var_mem_shared | nir_var_function_temp | nir_var_mem_global | nir_var_mem_constant,
1020 glsl_get_cl_type_size_align);
1021
1022 NIR_PASS_V(nir, dxil_nir_lower_ubo_to_temp);
1023 NIR_PASS_V(nir, clc_lower_constant_to_ssbo, out_dxil->kernel, &uav_id);
1024 NIR_PASS_V(nir, clc_lower_global_to_ssbo);
1025
1026 bool has_printf = false;
1027 NIR_PASS(has_printf, nir, clc_lower_printf_base, uav_id);
1028 metadata->printf.uav_id = has_printf ? uav_id++ : -1;
1029
1030 NIR_PASS_V(nir, dxil_nir_lower_deref_ssbo);
1031
1032 NIR_PASS_V(nir, split_unaligned_loads_stores);
1033
1034 assert(nir->info.cs.ptr_size == 64);
1035 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
1036 nir_address_format_32bit_index_offset_pack64);
1037 NIR_PASS_V(nir, nir_lower_explicit_io,
1038 nir_var_mem_shared | nir_var_function_temp | nir_var_uniform,
1039 nir_address_format_32bit_offset_as_64bit);
1040
1041 NIR_PASS_V(nir, nir_lower_system_values);
1042
1043 nir_lower_compute_system_values_options compute_options = {
1044 .has_base_global_invocation_id = (conf && conf->support_global_work_id_offsets),
1045 .has_base_workgroup_id = (conf && conf->support_workgroup_id_offsets),
1046 };
1047 NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_options);
1048
1049 NIR_PASS_V(nir, clc_lower_64bit_semantics);
1050
1051 NIR_PASS_V(nir, nir_opt_deref);
1052 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
1053
1054 unsigned cbv_id = 0;
1055
1056 nir_variable *inputs_var =
1057 add_kernel_inputs_var(out_dxil, nir, &cbv_id);
1058 nir_variable *work_properties_var =
1059 add_work_properties_var(out_dxil, nir, &cbv_id);
1060
1061 memcpy(metadata->local_size, nir->info.workgroup_size,
1062 sizeof(metadata->local_size));
1063 memcpy(metadata->local_size_hint, nir->info.cs.workgroup_size_hint,
1064 sizeof(metadata->local_size));
1065
1066 // Patch the localsize before calling clc_nir_lower_system_values().
1067 if (conf) {
1068 for (unsigned i = 0; i < ARRAY_SIZE(nir->info.workgroup_size); i++) {
1069 if (!conf->local_size[i] ||
1070 conf->local_size[i] == nir->info.workgroup_size[i])
1071 continue;
1072
1073 if (nir->info.workgroup_size[i] &&
1074 nir->info.workgroup_size[i] != conf->local_size[i]) {
1075 debug_printf("D3D12: runtime local size does not match reqd_work_group_size() values\n");
1076 goto err_free_dxil;
1077 }
1078
1079 nir->info.workgroup_size[i] = conf->local_size[i];
1080 }
1081 memcpy(metadata->local_size, nir->info.workgroup_size,
1082 sizeof(metadata->local_size));
1083 } else {
1084 /* Make sure there's at least one thread that's set to run */
1085 for (unsigned i = 0; i < ARRAY_SIZE(nir->info.workgroup_size); i++) {
1086 if (nir->info.workgroup_size[i] == 0)
1087 nir->info.workgroup_size[i] = 1;
1088 }
1089 }
1090
1091 NIR_PASS_V(nir, clc_nir_lower_kernel_input_loads, inputs_var);
1092 NIR_PASS_V(nir, split_unaligned_loads_stores);
1093 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
1094 nir_address_format_32bit_index_offset);
1095 NIR_PASS_V(nir, clc_nir_lower_system_values, work_properties_var);
1096 NIR_PASS_V(nir, dxil_nir_lower_loads_stores_to_dxil);
1097 NIR_PASS_V(nir, dxil_nir_opt_alu_deref_srcs);
1098 NIR_PASS_V(nir, dxil_nir_lower_atomics_to_dxil);
1099 NIR_PASS_V(nir, nir_lower_fp16_casts);
1100 NIR_PASS_V(nir, nir_lower_convert_alu_types, NULL);
1101
1102 // Convert pack to pack_split
1103 NIR_PASS_V(nir, nir_lower_pack);
1104 // Lower pack_split to bit math
1105 NIR_PASS_V(nir, nir_opt_algebraic);
1106
1107 NIR_PASS_V(nir, nir_opt_dce);
1108
1109 nir_validate_shader(nir, "Validate before feeding NIR to the DXIL compiler");
1110 struct nir_to_dxil_options opts = {
1111 .interpolate_at_vertex = false,
1112 .lower_int16 = (conf && (conf->lower_bit_size & 16) != 0),
1113 .disable_math_refactoring = true,
1114 .num_kernel_globals = num_global_inputs,
1115 .environment = DXIL_ENVIRONMENT_CL,
1116 .shader_model_max = SHADER_MODEL_6_2,
1117 .validator_version_max = DXIL_VALIDATOR_1_4,
1118 };
1119
1120 for (unsigned i = 0; i < out_dxil->kernel->num_args; i++) {
1121 if (out_dxil->kernel->args[i].address_qualifier != CLC_KERNEL_ARG_ADDRESS_LOCAL)
1122 continue;
1123
1124 /* If we don't have the runtime conf yet, we just create a dummy variable.
1125 * This will be adjusted when clc_spirv_to_dxil() is called with a conf
1126 * argument.
1127 */
1128 unsigned size = 4;
1129 if (conf && conf->args)
1130 size = conf->args[i].localptr.size;
1131
1132 /* The alignment required for the pointee type is not easy to get from
1133 * here, so let's base our logic on the size itself. Anything bigger than
1134 * the maximum alignment constraint (which is 128 bytes, since ulong16 or
1135 * doubl16 size are the biggest base types) should be aligned on this
1136 * maximum alignment constraint. For smaller types, we use the size
1137 * itself to calculate the alignment.
1138 */
1139 unsigned alignment = size < 128 ? (1 << (ffs(size) - 1)) : 128;
1140
1141 nir->info.shared_size = align(nir->info.shared_size, alignment);
1142 metadata->args[i].localptr.sharedmem_offset = nir->info.shared_size;
1143 nir->info.shared_size += size;
1144 }
1145
1146 metadata->local_mem_size = nir->info.shared_size;
1147 metadata->priv_mem_size = nir->scratch_size;
1148
1149 /* DXIL double math is too limited compared to what NIR expects. Let's refuse
1150 * to compile a shader when it contains double operations until we have
1151 * double lowering hooked up.
1152 */
1153 if (shader_has_double(nir)) {
1154 clc_error(logger, "NIR shader contains doubles, which we don't support yet");
1155 goto err_free_dxil;
1156 }
1157
1158 struct blob tmp;
1159 if (!nir_to_dxil(nir, &opts, &tmp)) {
1160 debug_printf("D3D12: nir_to_dxil failed\n");
1161 goto err_free_dxil;
1162 }
1163
1164 nir_foreach_variable_with_modes(var, nir, nir_var_mem_ssbo) {
1165 if (var->constant_initializer) {
1166 if (glsl_type_is_array(var->type)) {
1167 int size = align(glsl_get_cl_size(var->type), 4);
1168 uint8_t *data = malloc(size);
1169 if (!data)
1170 goto err_free_dxil;
1171
1172 copy_const_initializer(var->constant_initializer, var->type, data);
1173 metadata->consts[metadata->num_consts].data = data;
1174 metadata->consts[metadata->num_consts].size = size;
1175 metadata->consts[metadata->num_consts].uav_id = var->data.binding;
1176 metadata->num_consts++;
1177 } else
1178 unreachable("unexpected constant initializer");
1179 }
1180 }
1181
1182 metadata->kernel_inputs_cbv_id = inputs_var ? inputs_var->data.binding : 0;
1183 metadata->work_properties_cbv_id = work_properties_var->data.binding;
1184 metadata->num_uavs = uav_id;
1185 metadata->num_srvs = srv_id;
1186 metadata->num_samplers = sampler_id;
1187
1188 ralloc_free(nir);
1189 glsl_type_singleton_decref();
1190
1191 blob_finish_get_buffer(&tmp, &out_dxil->binary.data,
1192 &out_dxil->binary.size);
1193 return true;
1194
1195 err_free_dxil:
1196 clc_free_dxil_object(out_dxil);
1197 return false;
1198 }
1199
clc_free_dxil_object(struct clc_dxil_object * dxil)1200 void clc_free_dxil_object(struct clc_dxil_object *dxil)
1201 {
1202 for (unsigned i = 0; i < dxil->metadata.num_consts; i++)
1203 free(dxil->metadata.consts[i].data);
1204
1205 for (unsigned i = 0; i < dxil->metadata.printf.info_count; i++) {
1206 free(dxil->metadata.printf.infos[i].arg_sizes);
1207 free(dxil->metadata.printf.infos[i].str);
1208 }
1209 free(dxil->metadata.printf.infos);
1210
1211 free(dxil->binary.data);
1212 }
1213
clc_compiler_get_version(void)1214 uint64_t clc_compiler_get_version(void)
1215 {
1216 const char sha1[] = MESA_GIT_SHA1;
1217 const char* dash = strchr(sha1, '-');
1218 if (dash) {
1219 return strtoull(dash + 1, NULL, 16);
1220 }
1221 return 0;
1222 }
1223