1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_to_dxil.h"
25
26 #include "dxil_container.h"
27 #include "dxil_dump.h"
28 #include "dxil_enums.h"
29 #include "dxil_function.h"
30 #include "dxil_module.h"
31 #include "dxil_nir.h"
32 #include "dxil_signature.h"
33
34 #include "nir/nir_builder.h"
35 #include "nir_deref.h"
36 #include "util/ralloc.h"
37 #include "util/u_debug.h"
38 #include "util/u_dynarray.h"
39 #include "util/u_math.h"
40
41 #include "git_sha1.h"
42
43 #include "vulkan/vulkan_core.h"
44
45 #include <stdint.h>
46
47 int debug_dxil = 0;
48
49 static const struct debug_named_value
50 dxil_debug_options[] = {
51 { "verbose", DXIL_DEBUG_VERBOSE, NULL },
52 { "dump_blob", DXIL_DEBUG_DUMP_BLOB , "Write shader blobs" },
53 { "trace", DXIL_DEBUG_TRACE , "Trace instruction conversion" },
54 { "dump_module", DXIL_DEBUG_DUMP_MODULE, "dump module tree to stderr"},
55 DEBUG_NAMED_VALUE_END
56 };
57
58 DEBUG_GET_ONCE_FLAGS_OPTION(debug_dxil, "DXIL_DEBUG", dxil_debug_options, 0)
59
60 static void
log_nir_instr_unsupported(const struct dxil_logger * logger,const char * message_prefix,const nir_instr * instr)61 log_nir_instr_unsupported(const struct dxil_logger *logger,
62 const char *message_prefix, const nir_instr *instr)
63 {
64 char *msg = NULL;
65 char *instr_str = nir_instr_as_str(instr, NULL);
66 asprintf(&msg, "%s: %s\n", message_prefix, instr_str);
67 ralloc_free(instr_str);
68 assert(msg);
69 logger->log(logger->priv, msg);
70 free(msg);
71 }
72
73 static void
default_logger_func(void * priv,const char * msg)74 default_logger_func(void *priv, const char *msg)
75 {
76 fprintf(stderr, "%s", msg);
77 unreachable("Unhandled error");
78 }
79
80 static const struct dxil_logger default_logger = { .priv = NULL, .log = default_logger_func };
81
82 #define TRACE_CONVERSION(instr) \
83 if (debug_dxil & DXIL_DEBUG_TRACE) \
84 do { \
85 fprintf(stderr, "Convert '"); \
86 nir_print_instr(instr, stderr); \
87 fprintf(stderr, "'\n"); \
88 } while (0)
89
90 static const nir_shader_compiler_options
91 nir_options = {
92 .compact_arrays = true,
93 .lower_ineg = true,
94 .lower_fneg = true,
95 .lower_ffma16 = true,
96 .lower_ffma32 = true,
97 .lower_isign = true,
98 .lower_fsign = true,
99 .lower_iabs = true,
100 .lower_fmod = true,
101 .lower_fpow = true,
102 .lower_scmp = true,
103 .lower_ldexp = true,
104 .lower_flrp16 = true,
105 .lower_flrp32 = true,
106 .lower_flrp64 = true,
107 .lower_bitfield_extract = true,
108 .lower_ifind_msb = true,
109 .lower_ufind_msb = true,
110 .lower_extract_word = true,
111 .lower_extract_byte = true,
112 .lower_insert_word = true,
113 .lower_insert_byte = true,
114 .lower_all_io_to_elements = true,
115 .lower_hadd = true,
116 .lower_uadd_sat = true,
117 .lower_usub_sat = true,
118 .lower_iadd_sat = true,
119 .lower_uadd_carry = true,
120 .lower_usub_borrow = true,
121 .lower_mul_high = true,
122 .lower_pack_half_2x16 = true,
123 .lower_pack_unorm_4x8 = true,
124 .lower_pack_snorm_4x8 = true,
125 .lower_pack_snorm_2x16 = true,
126 .lower_pack_unorm_2x16 = true,
127 .lower_pack_64_2x32_split = true,
128 .lower_pack_32_2x16_split = true,
129 .lower_pack_64_4x16 = true,
130 .lower_unpack_64_2x32_split = true,
131 .lower_unpack_32_2x16_split = true,
132 .lower_unpack_half_2x16 = true,
133 .lower_unpack_snorm_2x16 = true,
134 .lower_unpack_snorm_4x8 = true,
135 .lower_unpack_unorm_2x16 = true,
136 .lower_unpack_unorm_4x8 = true,
137 .lower_interpolate_at = true,
138 .has_fsub = true,
139 .has_isub = true,
140 .has_bfe = true,
141 .has_find_msb_rev = true,
142 .vertex_id_zero_based = true,
143 .lower_base_vertex = true,
144 .lower_helper_invocation = true,
145 .has_cs_global_id = true,
146 .lower_mul_2x32_64 = true,
147 .lower_doubles_options =
148 nir_lower_drcp |
149 nir_lower_dsqrt |
150 nir_lower_drsq |
151 nir_lower_dfract |
152 nir_lower_dtrunc |
153 nir_lower_dfloor |
154 nir_lower_dceil |
155 nir_lower_dround_even,
156 .lower_uniforms_to_ubo = true,
157 .max_unroll_iterations = 32, /* arbitrary */
158 .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out),
159 .lower_device_index_to_zero = true,
160 .support_16bit_alu = true,
161 .preserve_mediump = true,
162 .discard_is_demote = true,
163 .scalarize_ddx = true,
164 .io_options = nir_io_dont_use_pos_for_non_fs_varyings | nir_io_mediump_is_32bit,
165 };
166
167 const nir_shader_compiler_options*
dxil_get_base_nir_compiler_options(void)168 dxil_get_base_nir_compiler_options(void)
169 {
170 return &nir_options;
171 }
172
173 void
dxil_get_nir_compiler_options(nir_shader_compiler_options * options,enum dxil_shader_model shader_model_max,unsigned supported_int_sizes,unsigned supported_float_sizes)174 dxil_get_nir_compiler_options(nir_shader_compiler_options *options,
175 enum dxil_shader_model shader_model_max,
176 unsigned supported_int_sizes,
177 unsigned supported_float_sizes)
178 {
179 *options = nir_options;
180 if (!(supported_int_sizes & 64)) {
181 options->lower_pack_64_2x32_split = false;
182 options->lower_unpack_64_2x32_split = false;
183 options->lower_int64_options = ~0;
184 }
185 if (!(supported_float_sizes & 64))
186 options->lower_doubles_options = ~0;
187 if (shader_model_max >= SHADER_MODEL_6_4) {
188 options->has_sdot_4x8 = true;
189 options->has_udot_4x8 = true;
190 }
191 }
192
193 static bool
emit_llvm_ident(struct dxil_module * m)194 emit_llvm_ident(struct dxil_module *m)
195 {
196 const struct dxil_mdnode *compiler = dxil_get_metadata_string(m, "Mesa version " PACKAGE_VERSION MESA_GIT_SHA1);
197 if (!compiler)
198 return false;
199
200 const struct dxil_mdnode *llvm_ident = dxil_get_metadata_node(m, &compiler, 1);
201 return llvm_ident &&
202 dxil_add_metadata_named_node(m, "llvm.ident", &llvm_ident, 1);
203 }
204
205 static bool
emit_named_version(struct dxil_module * m,const char * name,int major,int minor)206 emit_named_version(struct dxil_module *m, const char *name,
207 int major, int minor)
208 {
209 const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, major);
210 const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, minor);
211 const struct dxil_mdnode *version_nodes[] = { major_node, minor_node };
212 const struct dxil_mdnode *version = dxil_get_metadata_node(m, version_nodes,
213 ARRAY_SIZE(version_nodes));
214 return dxil_add_metadata_named_node(m, name, &version, 1);
215 }
216
217 static const char *
get_shader_kind_str(enum dxil_shader_kind kind)218 get_shader_kind_str(enum dxil_shader_kind kind)
219 {
220 switch (kind) {
221 case DXIL_PIXEL_SHADER:
222 return "ps";
223 case DXIL_VERTEX_SHADER:
224 return "vs";
225 case DXIL_GEOMETRY_SHADER:
226 return "gs";
227 case DXIL_HULL_SHADER:
228 return "hs";
229 case DXIL_DOMAIN_SHADER:
230 return "ds";
231 case DXIL_COMPUTE_SHADER:
232 return "cs";
233 default:
234 unreachable("invalid shader kind");
235 }
236 }
237
238 static bool
emit_dx_shader_model(struct dxil_module * m)239 emit_dx_shader_model(struct dxil_module *m)
240 {
241 const struct dxil_mdnode *type_node = dxil_get_metadata_string(m, get_shader_kind_str(m->shader_kind));
242 const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, m->major_version);
243 const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, m->minor_version);
244 const struct dxil_mdnode *shader_model[] = { type_node, major_node,
245 minor_node };
246 const struct dxil_mdnode *dx_shader_model = dxil_get_metadata_node(m, shader_model, ARRAY_SIZE(shader_model));
247
248 return dxil_add_metadata_named_node(m, "dx.shaderModel",
249 &dx_shader_model, 1);
250 }
251
252 enum {
253 DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG = 0,
254 DXIL_STRUCTURED_BUFFER_ELEMENT_STRIDE_TAG = 1
255 };
256
257 enum dxil_intr {
258 DXIL_INTR_LOAD_INPUT = 4,
259 DXIL_INTR_STORE_OUTPUT = 5,
260 DXIL_INTR_FABS = 6,
261 DXIL_INTR_SATURATE = 7,
262
263 DXIL_INTR_ISFINITE = 10,
264 DXIL_INTR_ISNORMAL = 11,
265
266 DXIL_INTR_FCOS = 12,
267 DXIL_INTR_FSIN = 13,
268
269 DXIL_INTR_FEXP2 = 21,
270 DXIL_INTR_FRC = 22,
271 DXIL_INTR_FLOG2 = 23,
272
273 DXIL_INTR_SQRT = 24,
274 DXIL_INTR_RSQRT = 25,
275 DXIL_INTR_ROUND_NE = 26,
276 DXIL_INTR_ROUND_NI = 27,
277 DXIL_INTR_ROUND_PI = 28,
278 DXIL_INTR_ROUND_Z = 29,
279
280 DXIL_INTR_BFREV = 30,
281 DXIL_INTR_COUNTBITS = 31,
282 DXIL_INTR_FIRSTBIT_LO = 32,
283 DXIL_INTR_FIRSTBIT_HI = 33,
284 DXIL_INTR_FIRSTBIT_SHI = 34,
285
286 DXIL_INTR_FMAX = 35,
287 DXIL_INTR_FMIN = 36,
288 DXIL_INTR_IMAX = 37,
289 DXIL_INTR_IMIN = 38,
290 DXIL_INTR_UMAX = 39,
291 DXIL_INTR_UMIN = 40,
292
293 DXIL_INTR_FMA = 47,
294
295 DXIL_INTR_IBFE = 51,
296 DXIL_INTR_UBFE = 52,
297 DXIL_INTR_BFI = 53,
298
299 DXIL_INTR_CREATE_HANDLE = 57,
300 DXIL_INTR_CBUFFER_LOAD_LEGACY = 59,
301
302 DXIL_INTR_SAMPLE = 60,
303 DXIL_INTR_SAMPLE_BIAS = 61,
304 DXIL_INTR_SAMPLE_LEVEL = 62,
305 DXIL_INTR_SAMPLE_GRAD = 63,
306 DXIL_INTR_SAMPLE_CMP = 64,
307 DXIL_INTR_SAMPLE_CMP_LVL_ZERO = 65,
308
309 DXIL_INTR_TEXTURE_LOAD = 66,
310 DXIL_INTR_TEXTURE_STORE = 67,
311
312 DXIL_INTR_BUFFER_LOAD = 68,
313 DXIL_INTR_BUFFER_STORE = 69,
314
315 DXIL_INTR_TEXTURE_SIZE = 72,
316 DXIL_INTR_TEXTURE_GATHER = 73,
317 DXIL_INTR_TEXTURE_GATHER_CMP = 74,
318
319 DXIL_INTR_TEXTURE2DMS_GET_SAMPLE_POSITION = 75,
320 DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION = 76,
321 DXIL_INTR_RENDER_TARGET_GET_SAMPLE_COUNT = 77,
322
323 DXIL_INTR_ATOMIC_BINOP = 78,
324 DXIL_INTR_ATOMIC_CMPXCHG = 79,
325 DXIL_INTR_BARRIER = 80,
326 DXIL_INTR_TEXTURE_LOD = 81,
327
328 DXIL_INTR_DISCARD = 82,
329 DXIL_INTR_DDX_COARSE = 83,
330 DXIL_INTR_DDY_COARSE = 84,
331 DXIL_INTR_DDX_FINE = 85,
332 DXIL_INTR_DDY_FINE = 86,
333
334 DXIL_INTR_EVAL_SNAPPED = 87,
335 DXIL_INTR_EVAL_SAMPLE_INDEX = 88,
336 DXIL_INTR_EVAL_CENTROID = 89,
337
338 DXIL_INTR_SAMPLE_INDEX = 90,
339 DXIL_INTR_COVERAGE = 91,
340
341 DXIL_INTR_THREAD_ID = 93,
342 DXIL_INTR_GROUP_ID = 94,
343 DXIL_INTR_THREAD_ID_IN_GROUP = 95,
344 DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP = 96,
345
346 DXIL_INTR_EMIT_STREAM = 97,
347 DXIL_INTR_CUT_STREAM = 98,
348
349 DXIL_INTR_GS_INSTANCE_ID = 100,
350
351 DXIL_INTR_MAKE_DOUBLE = 101,
352 DXIL_INTR_SPLIT_DOUBLE = 102,
353
354 DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT = 103,
355 DXIL_INTR_LOAD_PATCH_CONSTANT = 104,
356 DXIL_INTR_DOMAIN_LOCATION = 105,
357 DXIL_INTR_STORE_PATCH_CONSTANT = 106,
358 DXIL_INTR_OUTPUT_CONTROL_POINT_ID = 107,
359 DXIL_INTR_PRIMITIVE_ID = 108,
360
361 DXIL_INTR_WAVE_IS_FIRST_LANE = 110,
362 DXIL_INTR_WAVE_GET_LANE_INDEX = 111,
363 DXIL_INTR_WAVE_GET_LANE_COUNT = 112,
364 DXIL_INTR_WAVE_ANY_TRUE = 113,
365 DXIL_INTR_WAVE_ALL_TRUE = 114,
366 DXIL_INTR_WAVE_ACTIVE_ALL_EQUAL = 115,
367 DXIL_INTR_WAVE_ACTIVE_BALLOT = 116,
368 DXIL_INTR_WAVE_READ_LANE_AT = 117,
369 DXIL_INTR_WAVE_READ_LANE_FIRST = 118,
370 DXIL_INTR_WAVE_ACTIVE_OP = 119,
371 DXIL_INTR_WAVE_ACTIVE_BIT = 120,
372 DXIL_INTR_WAVE_PREFIX_OP = 121,
373 DXIL_INTR_QUAD_READ_LANE_AT = 122,
374 DXIL_INTR_QUAD_OP = 123,
375
376 DXIL_INTR_LEGACY_F32TOF16 = 130,
377 DXIL_INTR_LEGACY_F16TOF32 = 131,
378
379 DXIL_INTR_ATTRIBUTE_AT_VERTEX = 137,
380 DXIL_INTR_VIEW_ID = 138,
381
382 DXIL_INTR_RAW_BUFFER_LOAD = 139,
383 DXIL_INTR_RAW_BUFFER_STORE = 140,
384
385 DXIL_INTR_DOT4_ADD_I8_PACKED = 163,
386 DXIL_INTR_DOT4_ADD_U8_PACKED = 164,
387
388 DXIL_INTR_ANNOTATE_HANDLE = 216,
389 DXIL_INTR_CREATE_HANDLE_FROM_BINDING = 217,
390 DXIL_INTR_CREATE_HANDLE_FROM_HEAP = 218,
391
392 DXIL_INTR_IS_HELPER_LANE = 221,
393 DXIL_INTR_SAMPLE_CMP_LEVEL = 224,
394 DXIL_INTR_SAMPLE_CMP_GRAD = 254,
395 DXIL_INTR_SAMPLE_CMP_BIAS = 255,
396
397 DXIL_INTR_START_VERTEX_LOCATION = 256,
398 DXIL_INTR_START_INSTANCE_LOCATION = 257,
399 };
400
401 enum dxil_atomic_op {
402 DXIL_ATOMIC_ADD = 0,
403 DXIL_ATOMIC_AND = 1,
404 DXIL_ATOMIC_OR = 2,
405 DXIL_ATOMIC_XOR = 3,
406 DXIL_ATOMIC_IMIN = 4,
407 DXIL_ATOMIC_IMAX = 5,
408 DXIL_ATOMIC_UMIN = 6,
409 DXIL_ATOMIC_UMAX = 7,
410 DXIL_ATOMIC_EXCHANGE = 8,
411 };
412
413 static enum dxil_atomic_op
nir_atomic_to_dxil_atomic(nir_atomic_op op)414 nir_atomic_to_dxil_atomic(nir_atomic_op op)
415 {
416 switch (op) {
417 case nir_atomic_op_iadd: return DXIL_ATOMIC_ADD;
418 case nir_atomic_op_iand: return DXIL_ATOMIC_AND;
419 case nir_atomic_op_ior: return DXIL_ATOMIC_OR;
420 case nir_atomic_op_ixor: return DXIL_ATOMIC_XOR;
421 case nir_atomic_op_imin: return DXIL_ATOMIC_IMIN;
422 case nir_atomic_op_imax: return DXIL_ATOMIC_IMAX;
423 case nir_atomic_op_umin: return DXIL_ATOMIC_UMIN;
424 case nir_atomic_op_umax: return DXIL_ATOMIC_UMAX;
425 case nir_atomic_op_xchg: return DXIL_ATOMIC_EXCHANGE;
426 default: unreachable("Unsupported atomic op");
427 }
428 }
429
430 static enum dxil_rmw_op
nir_atomic_to_dxil_rmw(nir_atomic_op op)431 nir_atomic_to_dxil_rmw(nir_atomic_op op)
432 {
433 switch (op) {
434 case nir_atomic_op_iadd: return DXIL_RMWOP_ADD;
435 case nir_atomic_op_iand: return DXIL_RMWOP_AND;
436 case nir_atomic_op_ior: return DXIL_RMWOP_OR;
437 case nir_atomic_op_ixor: return DXIL_RMWOP_XOR;
438 case nir_atomic_op_imin: return DXIL_RMWOP_MIN;
439 case nir_atomic_op_imax: return DXIL_RMWOP_MAX;
440 case nir_atomic_op_umin: return DXIL_RMWOP_UMIN;
441 case nir_atomic_op_umax: return DXIL_RMWOP_UMAX;
442 case nir_atomic_op_xchg: return DXIL_RMWOP_XCHG;
443 default: unreachable("Unsupported atomic op");
444 }
445 }
446
447 typedef struct {
448 unsigned id;
449 unsigned binding;
450 unsigned size;
451 unsigned space;
452 } resource_array_layout;
453
454 static void
fill_resource_metadata(struct dxil_module * m,const struct dxil_mdnode ** fields,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout)455 fill_resource_metadata(struct dxil_module *m, const struct dxil_mdnode **fields,
456 const struct dxil_type *struct_type,
457 const char *name, const resource_array_layout *layout)
458 {
459 const struct dxil_type *pointer_type = dxil_module_get_pointer_type(m, struct_type);
460 const struct dxil_value *pointer_undef = dxil_module_get_undef(m, pointer_type);
461
462 fields[0] = dxil_get_metadata_int32(m, layout->id); // resource ID
463 fields[1] = dxil_get_metadata_value(m, pointer_type, pointer_undef); // global constant symbol
464 fields[2] = dxil_get_metadata_string(m, name ? name : ""); // name
465 fields[3] = dxil_get_metadata_int32(m, layout->space); // space ID
466 fields[4] = dxil_get_metadata_int32(m, layout->binding); // lower bound
467 fields[5] = dxil_get_metadata_int32(m, layout->size); // range size
468 }
469
470 static const struct dxil_mdnode *
emit_srv_metadata(struct dxil_module * m,const struct dxil_type * elem_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind)471 emit_srv_metadata(struct dxil_module *m, const struct dxil_type *elem_type,
472 const char *name, const resource_array_layout *layout,
473 enum dxil_component_type comp_type,
474 enum dxil_resource_kind res_kind)
475 {
476 const struct dxil_mdnode *fields[9];
477
478 const struct dxil_mdnode *metadata_tag_nodes[2];
479
480 fill_resource_metadata(m, fields, elem_type, name, layout);
481 fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
482 fields[7] = dxil_get_metadata_int1(m, 0); // sample count
483 if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
484 res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
485 metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
486 metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
487 fields[8] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
488 } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
489 fields[8] = NULL;
490 else
491 unreachable("Structured buffers not supported yet");
492
493 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
494 }
495
496 static const struct dxil_mdnode *
emit_uav_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind,enum gl_access_qualifier access)497 emit_uav_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
498 const char *name, const resource_array_layout *layout,
499 enum dxil_component_type comp_type,
500 enum dxil_resource_kind res_kind,
501 enum gl_access_qualifier access)
502 {
503 const struct dxil_mdnode *fields[11];
504
505 const struct dxil_mdnode *metadata_tag_nodes[2];
506
507 fill_resource_metadata(m, fields, struct_type, name, layout);
508 fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
509 fields[7] = dxil_get_metadata_int1(m, (access & ACCESS_COHERENT) != 0); // globally-coherent
510 fields[8] = dxil_get_metadata_int1(m, false); // has counter
511 fields[9] = dxil_get_metadata_int1(m, false); // is ROV
512 if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
513 res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
514 metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
515 metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
516 fields[10] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
517 } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
518 fields[10] = NULL;
519 else
520 unreachable("Structured buffers not supported yet");
521
522 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
523 }
524
525 static const struct dxil_mdnode *
emit_cbv_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,unsigned size)526 emit_cbv_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
527 const char *name, const resource_array_layout *layout,
528 unsigned size)
529 {
530 const struct dxil_mdnode *fields[8];
531
532 fill_resource_metadata(m, fields, struct_type, name, layout);
533 fields[6] = dxil_get_metadata_int32(m, size); // constant buffer size
534 fields[7] = NULL; // metadata
535
536 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
537 }
538
539 static const struct dxil_mdnode *
emit_sampler_metadata(struct dxil_module * m,const struct dxil_type * struct_type,nir_variable * var,const resource_array_layout * layout)540 emit_sampler_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
541 nir_variable *var, const resource_array_layout *layout)
542 {
543 const struct dxil_mdnode *fields[8];
544 const struct glsl_type *type = glsl_without_array(var->type);
545
546 fill_resource_metadata(m, fields, struct_type, var->name, layout);
547 enum dxil_sampler_kind sampler_kind = glsl_sampler_type_is_shadow(type) ?
548 DXIL_SAMPLER_KIND_COMPARISON : DXIL_SAMPLER_KIND_DEFAULT;
549 fields[6] = dxil_get_metadata_int32(m, sampler_kind); // sampler kind
550 fields[7] = NULL; // metadata
551
552 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
553 }
554
555
556 #define MAX_SRVS 128
557 #define MAX_UAVS 64
558 #define MAX_CBVS 64 // ??
559 #define MAX_SAMPLERS 64 // ??
560
561 struct dxil_def {
562 const struct dxil_value *chans[NIR_MAX_VEC_COMPONENTS];
563 };
564
565 struct ntd_context {
566 void *ralloc_ctx;
567 const struct nir_to_dxil_options *opts;
568 struct nir_shader *shader;
569
570 struct dxil_module mod;
571
572 struct util_dynarray srv_metadata_nodes;
573 const struct dxil_value *srv_handles[MAX_SRVS];
574
575 struct util_dynarray uav_metadata_nodes;
576 const struct dxil_value *ssbo_handles[MAX_UAVS];
577 const struct dxil_value *image_handles[MAX_UAVS];
578 uint32_t num_uavs;
579
580 struct util_dynarray cbv_metadata_nodes;
581 const struct dxil_value *cbv_handles[MAX_CBVS];
582
583 struct util_dynarray sampler_metadata_nodes;
584 const struct dxil_value *sampler_handles[MAX_SAMPLERS];
585
586 struct util_dynarray resources;
587
588 const struct dxil_mdnode *shader_property_nodes[6];
589 size_t num_shader_property_nodes;
590
591 struct dxil_def *defs;
592 unsigned num_defs;
593 struct hash_table *phis;
594
595 const struct dxil_value **sharedvars;
596 const struct dxil_value **scratchvars;
597 const struct dxil_value **consts;
598
599 nir_variable *system_value[SYSTEM_VALUE_MAX];
600
601 nir_function *tess_ctrl_patch_constant_func;
602 unsigned tess_input_control_point_count;
603
604 struct dxil_func_def *main_func_def;
605 struct dxil_func_def *tess_ctrl_patch_constant_func_def;
606 unsigned unnamed_ubo_count;
607
608 BITSET_WORD *float_types;
609 BITSET_WORD *int_types;
610
611 const struct dxil_logger *logger;
612 };
613
614 static const char*
unary_func_name(enum dxil_intr intr)615 unary_func_name(enum dxil_intr intr)
616 {
617 switch (intr) {
618 case DXIL_INTR_COUNTBITS:
619 case DXIL_INTR_FIRSTBIT_HI:
620 case DXIL_INTR_FIRSTBIT_SHI:
621 case DXIL_INTR_FIRSTBIT_LO:
622 return "dx.op.unaryBits";
623 case DXIL_INTR_ISFINITE:
624 case DXIL_INTR_ISNORMAL:
625 return "dx.op.isSpecialFloat";
626 default:
627 return "dx.op.unary";
628 }
629 }
630
631 static const struct dxil_value *
emit_unary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0)632 emit_unary_call(struct ntd_context *ctx, enum overload_type overload,
633 enum dxil_intr intr,
634 const struct dxil_value *op0)
635 {
636 const struct dxil_func *func = dxil_get_function(&ctx->mod,
637 unary_func_name(intr),
638 overload);
639 if (!func)
640 return NULL;
641
642 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
643 if (!opcode)
644 return NULL;
645
646 const struct dxil_value *args[] = {
647 opcode,
648 op0
649 };
650
651 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
652 }
653
654 static const struct dxil_value *
emit_binary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)655 emit_binary_call(struct ntd_context *ctx, enum overload_type overload,
656 enum dxil_intr intr,
657 const struct dxil_value *op0, const struct dxil_value *op1)
658 {
659 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.binary", overload);
660 if (!func)
661 return NULL;
662
663 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
664 if (!opcode)
665 return NULL;
666
667 const struct dxil_value *args[] = {
668 opcode,
669 op0,
670 op1
671 };
672
673 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
674 }
675
676 static const struct dxil_value *
emit_tertiary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)677 emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload,
678 enum dxil_intr intr,
679 const struct dxil_value *op0,
680 const struct dxil_value *op1,
681 const struct dxil_value *op2)
682 {
683 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.tertiary", overload);
684 if (!func)
685 return NULL;
686
687 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
688 if (!opcode)
689 return NULL;
690
691 const struct dxil_value *args[] = {
692 opcode,
693 op0,
694 op1,
695 op2
696 };
697
698 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
699 }
700
701 static const struct dxil_value *
emit_quaternary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2,const struct dxil_value * op3)702 emit_quaternary_call(struct ntd_context *ctx, enum overload_type overload,
703 enum dxil_intr intr,
704 const struct dxil_value *op0,
705 const struct dxil_value *op1,
706 const struct dxil_value *op2,
707 const struct dxil_value *op3)
708 {
709 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quaternary", overload);
710 if (!func)
711 return NULL;
712
713 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
714 if (!opcode)
715 return NULL;
716
717 const struct dxil_value *args[] = {
718 opcode,
719 op0,
720 op1,
721 op2,
722 op3
723 };
724
725 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
726 }
727
728 static const struct dxil_value *
emit_threadid_call(struct ntd_context * ctx,const struct dxil_value * comp)729 emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp)
730 {
731 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadId", DXIL_I32);
732 if (!func)
733 return NULL;
734
735 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
736 DXIL_INTR_THREAD_ID);
737 if (!opcode)
738 return NULL;
739
740 const struct dxil_value *args[] = {
741 opcode,
742 comp
743 };
744
745 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
746 }
747
748 static const struct dxil_value *
emit_threadidingroup_call(struct ntd_context * ctx,const struct dxil_value * comp)749 emit_threadidingroup_call(struct ntd_context *ctx,
750 const struct dxil_value *comp)
751 {
752 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadIdInGroup", DXIL_I32);
753
754 if (!func)
755 return NULL;
756
757 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
758 DXIL_INTR_THREAD_ID_IN_GROUP);
759 if (!opcode)
760 return NULL;
761
762 const struct dxil_value *args[] = {
763 opcode,
764 comp
765 };
766
767 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
768 }
769
770 static const struct dxil_value *
emit_flattenedthreadidingroup_call(struct ntd_context * ctx)771 emit_flattenedthreadidingroup_call(struct ntd_context *ctx)
772 {
773 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.flattenedThreadIdInGroup", DXIL_I32);
774
775 if (!func)
776 return NULL;
777
778 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
779 DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP);
780 if (!opcode)
781 return NULL;
782
783 const struct dxil_value *args[] = {
784 opcode
785 };
786
787 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
788 }
789
790 static const struct dxil_value *
emit_groupid_call(struct ntd_context * ctx,const struct dxil_value * comp)791 emit_groupid_call(struct ntd_context *ctx, const struct dxil_value *comp)
792 {
793 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.groupId", DXIL_I32);
794
795 if (!func)
796 return NULL;
797
798 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
799 DXIL_INTR_GROUP_ID);
800 if (!opcode)
801 return NULL;
802
803 const struct dxil_value *args[] = {
804 opcode,
805 comp
806 };
807
808 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
809 }
810
811 static const struct dxil_value *
emit_raw_bufferload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],enum overload_type overload,unsigned component_count,unsigned alignment)812 emit_raw_bufferload_call(struct ntd_context *ctx,
813 const struct dxil_value *handle,
814 const struct dxil_value *coord[2],
815 enum overload_type overload,
816 unsigned component_count,
817 unsigned alignment)
818 {
819 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.rawBufferLoad", overload);
820 if (!func)
821 return NULL;
822
823 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
824 DXIL_INTR_RAW_BUFFER_LOAD);
825 const struct dxil_value *args[] = {
826 opcode, handle, coord[0], coord[1],
827 dxil_module_get_int8_const(&ctx->mod, (1 << component_count) - 1),
828 dxil_module_get_int32_const(&ctx->mod, alignment),
829 };
830
831 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
832 }
833
834 static const struct dxil_value *
emit_bufferload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],enum overload_type overload)835 emit_bufferload_call(struct ntd_context *ctx,
836 const struct dxil_value *handle,
837 const struct dxil_value *coord[2],
838 enum overload_type overload)
839 {
840 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferLoad", overload);
841 if (!func)
842 return NULL;
843
844 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
845 DXIL_INTR_BUFFER_LOAD);
846 const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1] };
847
848 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
849 }
850
851 static bool
emit_raw_bufferstore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload,unsigned alignment)852 emit_raw_bufferstore_call(struct ntd_context *ctx,
853 const struct dxil_value *handle,
854 const struct dxil_value *coord[2],
855 const struct dxil_value *value[4],
856 const struct dxil_value *write_mask,
857 enum overload_type overload,
858 unsigned alignment)
859 {
860 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.rawBufferStore", overload);
861
862 if (!func)
863 return false;
864
865 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
866 DXIL_INTR_RAW_BUFFER_STORE);
867 const struct dxil_value *args[] = {
868 opcode, handle, coord[0], coord[1],
869 value[0], value[1], value[2], value[3],
870 write_mask,
871 dxil_module_get_int32_const(&ctx->mod, alignment),
872 };
873
874 return dxil_emit_call_void(&ctx->mod, func,
875 args, ARRAY_SIZE(args));
876 }
877
878 static bool
emit_bufferstore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)879 emit_bufferstore_call(struct ntd_context *ctx,
880 const struct dxil_value *handle,
881 const struct dxil_value *coord[2],
882 const struct dxil_value *value[4],
883 const struct dxil_value *write_mask,
884 enum overload_type overload)
885 {
886 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferStore", overload);
887
888 if (!func)
889 return false;
890
891 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
892 DXIL_INTR_BUFFER_STORE);
893 const struct dxil_value *args[] = {
894 opcode, handle, coord[0], coord[1],
895 value[0], value[1], value[2], value[3],
896 write_mask
897 };
898
899 return dxil_emit_call_void(&ctx->mod, func,
900 args, ARRAY_SIZE(args));
901 }
902
903 static const struct dxil_value *
emit_textureload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],enum overload_type overload)904 emit_textureload_call(struct ntd_context *ctx,
905 const struct dxil_value *handle,
906 const struct dxil_value *coord[3],
907 enum overload_type overload)
908 {
909 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", overload);
910 if (!func)
911 return NULL;
912 const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
913 const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
914
915 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
916 DXIL_INTR_TEXTURE_LOAD);
917 const struct dxil_value *args[] = { opcode, handle,
918 /*lod_or_sample*/ int_undef,
919 coord[0], coord[1], coord[2],
920 /* offsets */ int_undef, int_undef, int_undef};
921
922 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
923 }
924
925 static bool
emit_texturestore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)926 emit_texturestore_call(struct ntd_context *ctx,
927 const struct dxil_value *handle,
928 const struct dxil_value *coord[3],
929 const struct dxil_value *value[4],
930 const struct dxil_value *write_mask,
931 enum overload_type overload)
932 {
933 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureStore", overload);
934
935 if (!func)
936 return false;
937
938 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
939 DXIL_INTR_TEXTURE_STORE);
940 const struct dxil_value *args[] = {
941 opcode, handle, coord[0], coord[1], coord[2],
942 value[0], value[1], value[2], value[3],
943 write_mask
944 };
945
946 return dxil_emit_call_void(&ctx->mod, func,
947 args, ARRAY_SIZE(args));
948 }
949
950 static const struct dxil_value *
emit_atomic_binop(struct ntd_context * ctx,const struct dxil_value * handle,enum dxil_atomic_op atomic_op,const struct dxil_value * coord[3],const struct dxil_value * value)951 emit_atomic_binop(struct ntd_context *ctx,
952 const struct dxil_value *handle,
953 enum dxil_atomic_op atomic_op,
954 const struct dxil_value *coord[3],
955 const struct dxil_value *value)
956 {
957 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicBinOp", DXIL_I32);
958
959 if (!func)
960 return false;
961
962 const struct dxil_value *opcode =
963 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_BINOP);
964 const struct dxil_value *atomic_op_value =
965 dxil_module_get_int32_const(&ctx->mod, atomic_op);
966 const struct dxil_value *args[] = {
967 opcode, handle, atomic_op_value,
968 coord[0], coord[1], coord[2], value
969 };
970
971 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
972 }
973
974 static const struct dxil_value *
emit_atomic_cmpxchg(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * cmpval,const struct dxil_value * newval)975 emit_atomic_cmpxchg(struct ntd_context *ctx,
976 const struct dxil_value *handle,
977 const struct dxil_value *coord[3],
978 const struct dxil_value *cmpval,
979 const struct dxil_value *newval)
980 {
981 const struct dxil_func *func =
982 dxil_get_function(&ctx->mod, "dx.op.atomicCompareExchange", DXIL_I32);
983
984 if (!func)
985 return false;
986
987 const struct dxil_value *opcode =
988 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_CMPXCHG);
989 const struct dxil_value *args[] = {
990 opcode, handle, coord[0], coord[1], coord[2], cmpval, newval
991 };
992
993 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
994 }
995
996 static const struct dxil_value *
emit_createhandle_call_pre_6_6(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)997 emit_createhandle_call_pre_6_6(struct ntd_context *ctx,
998 enum dxil_resource_class resource_class,
999 unsigned lower_bound,
1000 unsigned upper_bound,
1001 unsigned space,
1002 unsigned resource_range_id,
1003 const struct dxil_value *resource_range_index,
1004 bool non_uniform_resource_index)
1005 {
1006 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE);
1007 const struct dxil_value *resource_class_value = dxil_module_get_int8_const(&ctx->mod, resource_class);
1008 const struct dxil_value *resource_range_id_value = dxil_module_get_int32_const(&ctx->mod, resource_range_id);
1009 const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1010 if (!opcode || !resource_class_value || !resource_range_id_value ||
1011 !non_uniform_resource_index_value)
1012 return NULL;
1013
1014 const struct dxil_value *args[] = {
1015 opcode,
1016 resource_class_value,
1017 resource_range_id_value,
1018 resource_range_index,
1019 non_uniform_resource_index_value
1020 };
1021
1022 const struct dxil_func *func =
1023 dxil_get_function(&ctx->mod, "dx.op.createHandle", DXIL_NONE);
1024
1025 if (!func)
1026 return NULL;
1027
1028 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1029 }
1030
1031 static const struct dxil_value *
emit_annotate_handle(struct ntd_context * ctx,const struct dxil_value * unannotated_handle,const struct dxil_value * res_props)1032 emit_annotate_handle(struct ntd_context *ctx,
1033 const struct dxil_value *unannotated_handle,
1034 const struct dxil_value *res_props)
1035 {
1036 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ANNOTATE_HANDLE);
1037 if (!opcode)
1038 return NULL;
1039
1040 const struct dxil_value *args[] = {
1041 opcode,
1042 unannotated_handle,
1043 res_props
1044 };
1045
1046 const struct dxil_func *func =
1047 dxil_get_function(&ctx->mod, "dx.op.annotateHandle", DXIL_NONE);
1048
1049 if (!func)
1050 return NULL;
1051
1052 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1053 }
1054
1055 static const struct dxil_value *
emit_annotate_handle_from_metadata(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned resource_range_id,const struct dxil_value * unannotated_handle)1056 emit_annotate_handle_from_metadata(struct ntd_context *ctx,
1057 enum dxil_resource_class resource_class,
1058 unsigned resource_range_id,
1059 const struct dxil_value *unannotated_handle)
1060 {
1061
1062 const struct util_dynarray *mdnodes;
1063 switch (resource_class) {
1064 case DXIL_RESOURCE_CLASS_SRV:
1065 mdnodes = &ctx->srv_metadata_nodes;
1066 break;
1067 case DXIL_RESOURCE_CLASS_UAV:
1068 mdnodes = &ctx->uav_metadata_nodes;
1069 break;
1070 case DXIL_RESOURCE_CLASS_CBV:
1071 mdnodes = &ctx->cbv_metadata_nodes;
1072 break;
1073 case DXIL_RESOURCE_CLASS_SAMPLER:
1074 mdnodes = &ctx->sampler_metadata_nodes;
1075 break;
1076 default:
1077 unreachable("Invalid resource class");
1078 }
1079
1080 const struct dxil_mdnode *mdnode = *util_dynarray_element(mdnodes, const struct dxil_mdnode *, resource_range_id);
1081 const struct dxil_value *res_props = dxil_module_get_res_props_const(&ctx->mod, resource_class, mdnode);
1082 if (!res_props)
1083 return NULL;
1084
1085 return emit_annotate_handle(ctx, unannotated_handle, res_props);
1086 }
1087
1088 static const struct dxil_value *
emit_createhandle_and_annotate(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1089 emit_createhandle_and_annotate(struct ntd_context *ctx,
1090 enum dxil_resource_class resource_class,
1091 unsigned lower_bound,
1092 unsigned upper_bound,
1093 unsigned space,
1094 unsigned resource_range_id,
1095 const struct dxil_value *resource_range_index,
1096 bool non_uniform_resource_index)
1097 {
1098 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE_FROM_BINDING);
1099 const struct dxil_value *res_bind = dxil_module_get_res_bind_const(&ctx->mod, lower_bound, upper_bound, space, resource_class);
1100 const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1101 if (!opcode || !res_bind || !non_uniform_resource_index_value)
1102 return NULL;
1103
1104 const struct dxil_value *args[] = {
1105 opcode,
1106 res_bind,
1107 resource_range_index,
1108 non_uniform_resource_index_value
1109 };
1110
1111 const struct dxil_func *func =
1112 dxil_get_function(&ctx->mod, "dx.op.createHandleFromBinding", DXIL_NONE);
1113
1114 if (!func)
1115 return NULL;
1116
1117 const struct dxil_value *unannotated_handle = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1118 if (!unannotated_handle)
1119 return NULL;
1120
1121 return emit_annotate_handle_from_metadata(ctx, resource_class, resource_range_id, unannotated_handle);
1122 }
1123
1124 static const struct dxil_value *
emit_createhandle_call(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1125 emit_createhandle_call(struct ntd_context *ctx,
1126 enum dxil_resource_class resource_class,
1127 unsigned lower_bound,
1128 unsigned upper_bound,
1129 unsigned space,
1130 unsigned resource_range_id,
1131 const struct dxil_value *resource_range_index,
1132 bool non_uniform_resource_index)
1133 {
1134 if (ctx->mod.minor_version < 6)
1135 return emit_createhandle_call_pre_6_6(ctx, resource_class, lower_bound, upper_bound, space, resource_range_id, resource_range_index, non_uniform_resource_index);
1136 else
1137 return emit_createhandle_and_annotate(ctx, resource_class, lower_bound, upper_bound, space, resource_range_id, resource_range_index, non_uniform_resource_index);
1138 }
1139
1140 static const struct dxil_value *
emit_createhandle_call_const_index(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,unsigned resource_range_index,bool non_uniform_resource_index)1141 emit_createhandle_call_const_index(struct ntd_context *ctx,
1142 enum dxil_resource_class resource_class,
1143 unsigned lower_bound,
1144 unsigned upper_bound,
1145 unsigned space,
1146 unsigned resource_range_id,
1147 unsigned resource_range_index,
1148 bool non_uniform_resource_index)
1149 {
1150
1151 const struct dxil_value *resource_range_index_value = dxil_module_get_int32_const(&ctx->mod, resource_range_index);
1152 if (!resource_range_index_value)
1153 return NULL;
1154
1155 return emit_createhandle_call(ctx, resource_class, lower_bound, upper_bound, space,
1156 resource_range_id, resource_range_index_value,
1157 non_uniform_resource_index);
1158 }
1159
1160 static const struct dxil_value *
emit_createhandle_heap(struct ntd_context * ctx,const struct dxil_value * resource_range_index,bool is_sampler,bool non_uniform_resource_index)1161 emit_createhandle_heap(struct ntd_context *ctx,
1162 const struct dxil_value *resource_range_index,
1163 bool is_sampler,
1164 bool non_uniform_resource_index)
1165 {
1166 if (is_sampler)
1167 ctx->mod.feats.sampler_descriptor_heap_indexing = true;
1168 else
1169 ctx->mod.feats.resource_descriptor_heap_indexing = true;
1170
1171 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE_FROM_HEAP);
1172 const struct dxil_value *sampler = dxil_module_get_int1_const(&ctx->mod, is_sampler);
1173 const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1174 if (!opcode || !sampler || !non_uniform_resource_index_value)
1175 return NULL;
1176
1177 const struct dxil_value *args[] = {
1178 opcode,
1179 resource_range_index,
1180 sampler,
1181 non_uniform_resource_index_value
1182 };
1183
1184 const struct dxil_func *func =
1185 dxil_get_function(&ctx->mod, "dx.op.createHandleFromHeap", DXIL_NONE);
1186
1187 if (!func)
1188 return NULL;
1189
1190 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1191 }
1192
1193 static void
add_resource(struct ntd_context * ctx,enum dxil_resource_type type,enum dxil_resource_kind kind,const resource_array_layout * layout)1194 add_resource(struct ntd_context *ctx, enum dxil_resource_type type,
1195 enum dxil_resource_kind kind,
1196 const resource_array_layout *layout)
1197 {
1198 struct dxil_resource_v0 *resource_v0 = NULL;
1199 struct dxil_resource_v1 *resource_v1 = NULL;
1200 if (ctx->mod.minor_validator >= 6) {
1201 resource_v1 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v1, 1);
1202 resource_v0 = &resource_v1->v0;
1203 } else {
1204 resource_v0 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v0, 1);
1205 }
1206 resource_v0->resource_type = type;
1207 resource_v0->space = layout->space;
1208 resource_v0->lower_bound = layout->binding;
1209 if (layout->size == 0 || (uint64_t)layout->size + layout->binding >= UINT_MAX)
1210 resource_v0->upper_bound = UINT_MAX;
1211 else
1212 resource_v0->upper_bound = layout->binding + layout->size - 1;
1213 if (type == DXIL_RES_UAV_TYPED ||
1214 type == DXIL_RES_UAV_RAW ||
1215 type == DXIL_RES_UAV_STRUCTURED) {
1216 uint32_t new_uav_count = ctx->num_uavs + layout->size;
1217 if (layout->size == 0 || new_uav_count < ctx->num_uavs)
1218 ctx->num_uavs = UINT_MAX;
1219 else
1220 ctx->num_uavs = new_uav_count;
1221 if (ctx->mod.minor_validator >= 6 && ctx->num_uavs > 8)
1222 ctx->mod.feats.use_64uavs = 1;
1223 }
1224
1225 if (resource_v1) {
1226 resource_v1->resource_kind = kind;
1227 /* No flags supported yet */
1228 resource_v1->resource_flags = 0;
1229 }
1230 }
1231
1232 static const struct dxil_value *
emit_createhandle_call_dynamic(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned space,unsigned binding,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1233 emit_createhandle_call_dynamic(struct ntd_context *ctx,
1234 enum dxil_resource_class resource_class,
1235 unsigned space,
1236 unsigned binding,
1237 const struct dxil_value *resource_range_index,
1238 bool non_uniform_resource_index)
1239 {
1240 unsigned offset = 0;
1241 unsigned count = 0;
1242
1243 unsigned num_srvs = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
1244 unsigned num_uavs = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
1245 unsigned num_cbvs = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1246 unsigned num_samplers = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1247
1248 switch (resource_class) {
1249 case DXIL_RESOURCE_CLASS_UAV:
1250 offset = num_srvs + num_samplers + num_cbvs;
1251 count = num_uavs;
1252 break;
1253 case DXIL_RESOURCE_CLASS_SRV:
1254 offset = num_samplers + num_cbvs;
1255 count = num_srvs;
1256 break;
1257 case DXIL_RESOURCE_CLASS_SAMPLER:
1258 offset = num_cbvs;
1259 count = num_samplers;
1260 break;
1261 case DXIL_RESOURCE_CLASS_CBV:
1262 offset = 0;
1263 count = num_cbvs;
1264 break;
1265 }
1266
1267 unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
1268 sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
1269 assert(offset + count <= ctx->resources.size / resource_element_size);
1270 for (unsigned i = offset; i < offset + count; ++i) {
1271 const struct dxil_resource_v0 *resource = (const struct dxil_resource_v0 *)((const char *)ctx->resources.data + resource_element_size * i);
1272 if (resource->space == space &&
1273 resource->lower_bound <= binding &&
1274 resource->upper_bound >= binding) {
1275 return emit_createhandle_call(ctx, resource_class, resource->lower_bound,
1276 resource->upper_bound, space,
1277 i - offset,
1278 resource_range_index,
1279 non_uniform_resource_index);
1280 }
1281 }
1282
1283 unreachable("Resource access for undeclared range");
1284 }
1285
1286 static bool
emit_srv(struct ntd_context * ctx,nir_variable * var,unsigned count)1287 emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned count)
1288 {
1289 unsigned id = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
1290 unsigned binding = var->data.binding;
1291 resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1292
1293 enum dxil_component_type comp_type;
1294 enum dxil_resource_kind res_kind;
1295 enum dxil_resource_type res_type;
1296 if (var->data.mode == nir_var_mem_ssbo) {
1297 comp_type = DXIL_COMP_TYPE_INVALID;
1298 res_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
1299 res_type = DXIL_RES_SRV_RAW;
1300 } else {
1301 comp_type = dxil_get_comp_type(var->type);
1302 res_kind = dxil_get_resource_kind(var->type);
1303 res_type = DXIL_RES_SRV_TYPED;
1304 }
1305 const struct dxil_type *res_type_as_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, 4, false /* readwrite */);
1306
1307 if (glsl_type_is_array(var->type))
1308 res_type_as_type = dxil_module_get_array_type(&ctx->mod, res_type_as_type, count);
1309
1310 const struct dxil_mdnode *srv_meta = emit_srv_metadata(&ctx->mod, res_type_as_type, var->name,
1311 &layout, comp_type, res_kind);
1312
1313 if (!srv_meta)
1314 return false;
1315
1316 util_dynarray_append(&ctx->srv_metadata_nodes, const struct dxil_mdnode *, srv_meta);
1317 add_resource(ctx, res_type, res_kind, &layout);
1318 if (res_type == DXIL_RES_SRV_RAW)
1319 ctx->mod.raw_and_structured_buffers = true;
1320
1321 return true;
1322 }
1323
1324 static bool
emit_uav(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned count,enum dxil_component_type comp_type,unsigned num_comps,enum dxil_resource_kind res_kind,enum gl_access_qualifier access,const char * name)1325 emit_uav(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned count,
1326 enum dxil_component_type comp_type, unsigned num_comps, enum dxil_resource_kind res_kind,
1327 enum gl_access_qualifier access, const char *name)
1328 {
1329 unsigned id = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
1330 resource_array_layout layout = { id, binding, count, space };
1331
1332 const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, num_comps, true /* readwrite */);
1333 res_type = dxil_module_get_array_type(&ctx->mod, res_type, count);
1334 const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, res_type, name,
1335 &layout, comp_type, res_kind, access);
1336
1337 if (!uav_meta)
1338 return false;
1339
1340 util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
1341 if (ctx->mod.minor_validator < 6 &&
1342 util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
1343 ctx->mod.feats.use_64uavs = 1;
1344
1345 add_resource(ctx, res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER ? DXIL_RES_UAV_RAW : DXIL_RES_UAV_TYPED, res_kind, &layout);
1346 if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
1347 ctx->mod.raw_and_structured_buffers = true;
1348 if (ctx->mod.shader_kind != DXIL_PIXEL_SHADER &&
1349 ctx->mod.shader_kind != DXIL_COMPUTE_SHADER)
1350 ctx->mod.feats.uavs_at_every_stage = true;
1351
1352 return true;
1353 }
1354
1355 static bool
emit_globals(struct ntd_context * ctx,unsigned size)1356 emit_globals(struct ntd_context *ctx, unsigned size)
1357 {
1358 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo)
1359 size++;
1360
1361 if (!size)
1362 return true;
1363
1364 if (!emit_uav(ctx, 0, 0, size, DXIL_COMP_TYPE_INVALID, 1, DXIL_RESOURCE_KIND_RAW_BUFFER, 0, "globals"))
1365 return false;
1366
1367 return true;
1368 }
1369
1370 static bool
emit_uav_var(struct ntd_context * ctx,nir_variable * var,unsigned count)1371 emit_uav_var(struct ntd_context *ctx, nir_variable *var, unsigned count)
1372 {
1373 unsigned binding, space;
1374 if (ctx->opts->environment == DXIL_ENVIRONMENT_GL) {
1375 /* For GL, the image intrinsics are already lowered, using driver_location
1376 * as the 0-based image index. Use space 1 so that we can keep using these
1377 * NIR constants without having to remap them, and so they don't overlap
1378 * SSBOs, which are also 0-based UAV bindings.
1379 */
1380 binding = var->data.driver_location;
1381 space = 1;
1382 } else {
1383 binding = var->data.binding;
1384 space = var->data.descriptor_set;
1385 }
1386 enum dxil_component_type comp_type = dxil_get_comp_type(var->type);
1387 enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type);
1388 const char *name = var->name;
1389
1390 return emit_uav(ctx, binding, space, count, comp_type,
1391 util_format_get_nr_components(var->data.image.format),
1392 res_kind, var->data.access, name);
1393 }
1394
1395 static const struct dxil_value *
get_value_for_const(struct dxil_module * mod,nir_const_value * c,const struct dxil_type * type)1396 get_value_for_const(struct dxil_module *mod, nir_const_value *c, const struct dxil_type *type)
1397 {
1398 if (type == mod->int1_type) return dxil_module_get_int1_const(mod, c->b);
1399 if (type == mod->float32_type) return dxil_module_get_float_const(mod, c->f32);
1400 if (type == mod->int32_type) return dxil_module_get_int32_const(mod, c->i32);
1401 if (type == mod->int16_type) {
1402 mod->feats.min_precision = true;
1403 return dxil_module_get_int16_const(mod, c->i16);
1404 }
1405 if (type == mod->int64_type) {
1406 mod->feats.int64_ops = true;
1407 return dxil_module_get_int64_const(mod, c->i64);
1408 }
1409 if (type == mod->float16_type) {
1410 mod->feats.min_precision = true;
1411 return dxil_module_get_float16_const(mod, c->u16);
1412 }
1413 if (type == mod->float64_type) {
1414 mod->feats.doubles = true;
1415 return dxil_module_get_double_const(mod, c->f64);
1416 }
1417 unreachable("Invalid type");
1418 }
1419
1420 static const struct dxil_type *
get_type_for_glsl_base_type(struct dxil_module * mod,enum glsl_base_type type)1421 get_type_for_glsl_base_type(struct dxil_module *mod, enum glsl_base_type type)
1422 {
1423 uint32_t bit_size = glsl_base_type_bit_size(type);
1424 if (nir_alu_type_get_base_type(nir_get_nir_type_for_glsl_base_type(type)) == nir_type_float)
1425 return dxil_module_get_float_type(mod, bit_size);
1426 return dxil_module_get_int_type(mod, bit_size);
1427 }
1428
1429 static const struct dxil_type *
get_type_for_glsl_type(struct dxil_module * mod,const struct glsl_type * type)1430 get_type_for_glsl_type(struct dxil_module *mod, const struct glsl_type *type)
1431 {
1432 if (glsl_type_is_scalar(type))
1433 return get_type_for_glsl_base_type(mod, glsl_get_base_type(type));
1434
1435 if (glsl_type_is_vector(type))
1436 return dxil_module_get_vector_type(mod, get_type_for_glsl_base_type(mod, glsl_get_base_type(type)),
1437 glsl_get_vector_elements(type));
1438
1439 if (glsl_type_is_array(type))
1440 return dxil_module_get_array_type(mod, get_type_for_glsl_type(mod, glsl_get_array_element(type)),
1441 glsl_array_size(type));
1442
1443 assert(glsl_type_is_struct(type));
1444 uint32_t size = glsl_get_length(type);
1445 const struct dxil_type **fields = calloc(sizeof(const struct dxil_type *), size);
1446 for (uint32_t i = 0; i < size; ++i)
1447 fields[i] = get_type_for_glsl_type(mod, glsl_get_struct_field(type, i));
1448 const struct dxil_type *ret = dxil_module_get_struct_type(mod, glsl_get_type_name(type), fields, size);
1449 free((void *)fields);
1450 return ret;
1451 }
1452
1453 static const struct dxil_value *
get_value_for_const_aggregate(struct dxil_module * mod,nir_constant * c,const struct glsl_type * type)1454 get_value_for_const_aggregate(struct dxil_module *mod, nir_constant *c, const struct glsl_type *type)
1455 {
1456 const struct dxil_type *dxil_type = get_type_for_glsl_type(mod, type);
1457 if (glsl_type_is_vector_or_scalar(type)) {
1458 const struct dxil_type *element_type = get_type_for_glsl_base_type(mod, glsl_get_base_type(type));
1459 const struct dxil_value *elements[NIR_MAX_VEC_COMPONENTS];
1460 for (uint32_t i = 0; i < glsl_get_vector_elements(type); ++i)
1461 elements[i] = get_value_for_const(mod, &c->values[i], element_type);
1462 if (glsl_type_is_scalar(type))
1463 return elements[0];
1464 return dxil_module_get_vector_const(mod, dxil_type, elements);
1465 }
1466
1467 uint32_t num_values = glsl_get_length(type);
1468 assert(num_values == c->num_elements);
1469 const struct dxil_value **values = calloc(sizeof(const struct dxil_value *), num_values);
1470 const struct dxil_value *ret;
1471 if (glsl_type_is_array(type)) {
1472 const struct glsl_type *element_type = glsl_get_array_element(type);
1473 for (uint32_t i = 0; i < num_values; ++i)
1474 values[i] = get_value_for_const_aggregate(mod, c->elements[i], element_type);
1475 ret = dxil_module_get_array_const(mod, dxil_type, values);
1476 } else {
1477 for (uint32_t i = 0; i < num_values; ++i)
1478 values[i] = get_value_for_const_aggregate(mod, c->elements[i], glsl_get_struct_field(type, i));
1479 ret = dxil_module_get_struct_const(mod, dxil_type, values);
1480 }
1481 free((void *)values);
1482 return ret;
1483 }
1484
1485 static bool
emit_global_consts(struct ntd_context * ctx)1486 emit_global_consts(struct ntd_context *ctx)
1487 {
1488 uint32_t index = 0;
1489 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_constant) {
1490 assert(var->constant_initializer);
1491 var->data.driver_location = index++;
1492 }
1493
1494 ctx->consts = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
1495
1496 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_constant) {
1497 if (!var->name)
1498 var->name = ralloc_asprintf(var, "const_%d", var->data.driver_location);
1499
1500 const struct dxil_value *agg_vals =
1501 get_value_for_const_aggregate(&ctx->mod, var->constant_initializer, var->type);
1502 if (!agg_vals)
1503 return false;
1504
1505 const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name,
1506 dxil_value_get_type(agg_vals),
1507 DXIL_AS_DEFAULT, 16,
1508 agg_vals);
1509 if (!gvar)
1510 return false;
1511
1512 ctx->consts[var->data.driver_location] = gvar;
1513 }
1514
1515 return true;
1516 }
1517
1518 static bool
emit_shared_vars(struct ntd_context * ctx)1519 emit_shared_vars(struct ntd_context *ctx)
1520 {
1521 uint32_t index = 0;
1522 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_shared)
1523 var->data.driver_location = index++;
1524
1525 ctx->sharedvars = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
1526
1527 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_shared) {
1528 if (!var->name)
1529 var->name = ralloc_asprintf(var, "shared_%d", var->data.driver_location);
1530 const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name,
1531 get_type_for_glsl_type(&ctx->mod, var->type),
1532 DXIL_AS_GROUPSHARED, 16,
1533 NULL);
1534 if (!gvar)
1535 return false;
1536
1537 ctx->sharedvars[var->data.driver_location] = gvar;
1538 }
1539
1540 return true;
1541 }
1542
1543 static bool
emit_cbv(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned size,unsigned count,char * name)1544 emit_cbv(struct ntd_context *ctx, unsigned binding, unsigned space,
1545 unsigned size, unsigned count, char *name)
1546 {
1547 assert(count != 0);
1548
1549 unsigned idx = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1550
1551 const struct dxil_type *float32 = dxil_module_get_float_type(&ctx->mod, 32);
1552 const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, float32, size);
1553 const struct dxil_type *buffer_type = dxil_module_get_struct_type(&ctx->mod, name,
1554 &array_type, 1);
1555 // All ubo[1]s should have been lowered to ubo with static indexing
1556 const struct dxil_type *final_type = count != 1 ? dxil_module_get_array_type(&ctx->mod, buffer_type, count) : buffer_type;
1557 resource_array_layout layout = {idx, binding, count, space};
1558 const struct dxil_mdnode *cbv_meta = emit_cbv_metadata(&ctx->mod, final_type,
1559 name, &layout, 4 * size);
1560
1561 if (!cbv_meta)
1562 return false;
1563
1564 util_dynarray_append(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *, cbv_meta);
1565 add_resource(ctx, DXIL_RES_CBV, DXIL_RESOURCE_KIND_CBUFFER, &layout);
1566
1567 return true;
1568 }
1569
1570 static bool
emit_ubo_var(struct ntd_context * ctx,nir_variable * var)1571 emit_ubo_var(struct ntd_context *ctx, nir_variable *var)
1572 {
1573 unsigned count = 1;
1574 if (glsl_type_is_array(var->type))
1575 count = glsl_get_length(var->type);
1576
1577 char *name = var->name;
1578 char temp_name[30];
1579 if (name && strlen(name) == 0) {
1580 snprintf(temp_name, sizeof(temp_name), "__unnamed_ubo_%d",
1581 ctx->unnamed_ubo_count++);
1582 name = temp_name;
1583 }
1584
1585 const struct glsl_type *type = glsl_without_array(var->type);
1586 assert(glsl_type_is_struct(type) || glsl_type_is_interface(type));
1587 unsigned dwords = ALIGN_POT(glsl_get_explicit_size(type, false), 16) / 4;
1588
1589 return emit_cbv(ctx, var->data.binding, var->data.descriptor_set,
1590 dwords, count, name);
1591 }
1592
1593 static bool
emit_sampler(struct ntd_context * ctx,nir_variable * var,unsigned count)1594 emit_sampler(struct ntd_context *ctx, nir_variable *var, unsigned count)
1595 {
1596 unsigned id = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1597 unsigned binding = var->data.binding;
1598 resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1599 const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
1600 const struct dxil_type *sampler_type = dxil_module_get_struct_type(&ctx->mod, "struct.SamplerState", &int32_type, 1);
1601
1602 if (glsl_type_is_array(var->type))
1603 sampler_type = dxil_module_get_array_type(&ctx->mod, sampler_type, count);
1604
1605 const struct dxil_mdnode *sampler_meta = emit_sampler_metadata(&ctx->mod, sampler_type, var, &layout);
1606
1607 if (!sampler_meta)
1608 return false;
1609
1610 util_dynarray_append(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *, sampler_meta);
1611 add_resource(ctx, DXIL_RES_SAMPLER, DXIL_RESOURCE_KIND_SAMPLER, &layout);
1612
1613 return true;
1614 }
1615
1616 static bool
emit_static_indexing_handles(struct ntd_context * ctx)1617 emit_static_indexing_handles(struct ntd_context *ctx)
1618 {
1619 /* Vulkan always uses dynamic handles, from instructions in the NIR */
1620 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN)
1621 return true;
1622
1623 unsigned last_res_class = -1;
1624 unsigned id = 0;
1625
1626 unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
1627 sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
1628 for (struct dxil_resource_v0 *res = (struct dxil_resource_v0 *)ctx->resources.data;
1629 res < (struct dxil_resource_v0 *)((char *)ctx->resources.data + ctx->resources.size);
1630 res = (struct dxil_resource_v0 *)((char *)res + resource_element_size)) {
1631 enum dxil_resource_class res_class;
1632 const struct dxil_value **handle_array;
1633 switch (res->resource_type) {
1634 case DXIL_RES_SRV_TYPED:
1635 case DXIL_RES_SRV_RAW:
1636 case DXIL_RES_SRV_STRUCTURED:
1637 res_class = DXIL_RESOURCE_CLASS_SRV;
1638 handle_array = ctx->srv_handles;
1639 break;
1640 case DXIL_RES_CBV:
1641 res_class = DXIL_RESOURCE_CLASS_CBV;
1642 handle_array = ctx->cbv_handles;
1643 break;
1644 case DXIL_RES_SAMPLER:
1645 res_class = DXIL_RESOURCE_CLASS_SAMPLER;
1646 handle_array = ctx->sampler_handles;
1647 break;
1648 case DXIL_RES_UAV_RAW:
1649 res_class = DXIL_RESOURCE_CLASS_UAV;
1650 handle_array = ctx->ssbo_handles;
1651 break;
1652 case DXIL_RES_UAV_TYPED:
1653 case DXIL_RES_UAV_STRUCTURED:
1654 case DXIL_RES_UAV_STRUCTURED_WITH_COUNTER:
1655 res_class = DXIL_RESOURCE_CLASS_UAV;
1656 handle_array = ctx->image_handles;
1657 break;
1658 default:
1659 unreachable("Unexpected resource type");
1660 }
1661
1662 if (last_res_class != res_class)
1663 id = 0;
1664 else
1665 id++;
1666 last_res_class = res_class;
1667
1668 if (res->space > 1)
1669 continue;
1670 assert(res->space == 0 ||
1671 (res->space == 1 &&
1672 res->resource_type != DXIL_RES_UAV_RAW &&
1673 ctx->opts->environment == DXIL_ENVIRONMENT_GL));
1674
1675 /* CL uses dynamic handles for the "globals" UAV array, but uses static
1676 * handles for UBOs, textures, and samplers.
1677 */
1678 if (ctx->opts->environment == DXIL_ENVIRONMENT_CL &&
1679 res->resource_type == DXIL_RES_UAV_RAW)
1680 continue;
1681
1682 for (unsigned i = res->lower_bound; i <= res->upper_bound; ++i) {
1683 handle_array[i] = emit_createhandle_call_const_index(ctx,
1684 res_class,
1685 res->lower_bound,
1686 res->upper_bound,
1687 res->space,
1688 id,
1689 i,
1690 false);
1691 if (!handle_array[i])
1692 return false;
1693 }
1694 }
1695 return true;
1696 }
1697
1698 static const struct dxil_mdnode *
emit_gs_state(struct ntd_context * ctx)1699 emit_gs_state(struct ntd_context *ctx)
1700 {
1701 const struct dxil_mdnode *gs_state_nodes[5];
1702 const nir_shader *s = ctx->shader;
1703
1704 gs_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, dxil_get_input_primitive(s->info.gs.input_primitive));
1705 gs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.vertices_out);
1706 gs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.gs.active_stream_mask, 1));
1707 gs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, dxil_get_primitive_topology(s->info.gs.output_primitive));
1708 gs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.invocations);
1709
1710 for (unsigned i = 0; i < ARRAY_SIZE(gs_state_nodes); ++i) {
1711 if (!gs_state_nodes[i])
1712 return NULL;
1713 }
1714
1715 return dxil_get_metadata_node(&ctx->mod, gs_state_nodes, ARRAY_SIZE(gs_state_nodes));
1716 }
1717
1718 static enum dxil_tessellator_domain
get_tessellator_domain(enum tess_primitive_mode primitive_mode)1719 get_tessellator_domain(enum tess_primitive_mode primitive_mode)
1720 {
1721 switch (primitive_mode) {
1722 case TESS_PRIMITIVE_QUADS: return DXIL_TESSELLATOR_DOMAIN_QUAD;
1723 case TESS_PRIMITIVE_TRIANGLES: return DXIL_TESSELLATOR_DOMAIN_TRI;
1724 case TESS_PRIMITIVE_ISOLINES: return DXIL_TESSELLATOR_DOMAIN_ISOLINE;
1725 default:
1726 unreachable("Invalid tessellator primitive mode");
1727 }
1728 }
1729
1730 static enum dxil_tessellator_partitioning
get_tessellator_partitioning(enum gl_tess_spacing spacing)1731 get_tessellator_partitioning(enum gl_tess_spacing spacing)
1732 {
1733 switch (spacing) {
1734 default:
1735 case TESS_SPACING_EQUAL:
1736 return DXIL_TESSELLATOR_PARTITIONING_INTEGER;
1737 case TESS_SPACING_FRACTIONAL_EVEN:
1738 return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
1739 case TESS_SPACING_FRACTIONAL_ODD:
1740 return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
1741 }
1742 }
1743
1744 static enum dxil_tessellator_output_primitive
get_tessellator_output_primitive(const struct shader_info * info)1745 get_tessellator_output_primitive(const struct shader_info *info)
1746 {
1747 if (info->tess.point_mode)
1748 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_POINT;
1749 if (info->tess._primitive_mode == TESS_PRIMITIVE_ISOLINES)
1750 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_LINE;
1751 /* Note: GL tessellation domain is inverted from D3D, which means triangle
1752 * winding needs to be inverted.
1753 */
1754 if (info->tess.ccw)
1755 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CW;
1756 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CCW;
1757 }
1758
1759 static const struct dxil_mdnode *
emit_hs_state(struct ntd_context * ctx)1760 emit_hs_state(struct ntd_context *ctx)
1761 {
1762 const struct dxil_mdnode *hs_state_nodes[7];
1763
1764 hs_state_nodes[0] = dxil_get_metadata_func(&ctx->mod, ctx->tess_ctrl_patch_constant_func_def->func);
1765 hs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->tess_input_control_point_count);
1766 hs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1767 hs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1768 hs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_partitioning(ctx->shader->info.tess.spacing));
1769 hs_state_nodes[5] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_output_primitive(&ctx->shader->info));
1770 hs_state_nodes[6] = dxil_get_metadata_float32(&ctx->mod, 64.0f);
1771
1772 return dxil_get_metadata_node(&ctx->mod, hs_state_nodes, ARRAY_SIZE(hs_state_nodes));
1773 }
1774
1775 static const struct dxil_mdnode *
emit_ds_state(struct ntd_context * ctx)1776 emit_ds_state(struct ntd_context *ctx)
1777 {
1778 const struct dxil_mdnode *ds_state_nodes[2];
1779
1780 ds_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1781 ds_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1782
1783 return dxil_get_metadata_node(&ctx->mod, ds_state_nodes, ARRAY_SIZE(ds_state_nodes));
1784 }
1785
1786 static const struct dxil_mdnode *
emit_threads(struct ntd_context * ctx)1787 emit_threads(struct ntd_context *ctx)
1788 {
1789 const nir_shader *s = ctx->shader;
1790 const struct dxil_mdnode *threads_x = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[0], 1));
1791 const struct dxil_mdnode *threads_y = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[1], 1));
1792 const struct dxil_mdnode *threads_z = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[2], 1));
1793 if (!threads_x || !threads_y || !threads_z)
1794 return false;
1795
1796 const struct dxil_mdnode *threads_nodes[] = { threads_x, threads_y, threads_z };
1797 return dxil_get_metadata_node(&ctx->mod, threads_nodes, ARRAY_SIZE(threads_nodes));
1798 }
1799
1800 static const struct dxil_mdnode *
emit_wave_size(struct ntd_context * ctx)1801 emit_wave_size(struct ntd_context *ctx)
1802 {
1803 const nir_shader *s = ctx->shader;
1804 const struct dxil_mdnode *wave_size_node = dxil_get_metadata_int32(&ctx->mod, s->info.subgroup_size);
1805 return dxil_get_metadata_node(&ctx->mod, &wave_size_node, 1);
1806 }
1807
1808 static const struct dxil_mdnode *
emit_wave_size_range(struct ntd_context * ctx)1809 emit_wave_size_range(struct ntd_context *ctx)
1810 {
1811 const nir_shader *s = ctx->shader;
1812 const struct dxil_mdnode *wave_size_nodes[3];
1813 wave_size_nodes[0] = dxil_get_metadata_int32(&ctx->mod, s->info.subgroup_size);
1814 wave_size_nodes[1] = wave_size_nodes[0];
1815 wave_size_nodes[2] = wave_size_nodes[0];
1816 return dxil_get_metadata_node(&ctx->mod, wave_size_nodes, ARRAY_SIZE(wave_size_nodes));
1817 }
1818
1819 static int64_t
get_module_flags(struct ntd_context * ctx)1820 get_module_flags(struct ntd_context *ctx)
1821 {
1822 /* See the DXIL documentation for the definition of these flags:
1823 *
1824 * https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-flags
1825 */
1826
1827 uint64_t flags = 0;
1828 if (ctx->mod.feats.doubles)
1829 flags |= (1 << 2);
1830 if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
1831 ctx->shader->info.fs.early_fragment_tests)
1832 flags |= (1 << 3);
1833 if (ctx->mod.raw_and_structured_buffers)
1834 flags |= (1 << 4);
1835 if (ctx->mod.feats.min_precision)
1836 flags |= (1 << 5);
1837 if (ctx->mod.feats.dx11_1_double_extensions)
1838 flags |= (1 << 6);
1839 if (ctx->mod.feats.array_layer_from_vs_or_ds)
1840 flags |= (1 << 9);
1841 if (ctx->mod.feats.inner_coverage)
1842 flags |= (1 << 10);
1843 if (ctx->mod.feats.stencil_ref)
1844 flags |= (1 << 11);
1845 if (ctx->mod.feats.tiled_resources)
1846 flags |= (1 << 12);
1847 if (ctx->mod.feats.typed_uav_load_additional_formats)
1848 flags |= (1 << 13);
1849 if (ctx->mod.feats.use_64uavs)
1850 flags |= (1 << 15);
1851 if (ctx->mod.feats.uavs_at_every_stage)
1852 flags |= (1 << 16);
1853 if (ctx->mod.feats.cs_4x_raw_sb)
1854 flags |= (1 << 17);
1855 if (ctx->mod.feats.rovs)
1856 flags |= (1 << 18);
1857 if (ctx->mod.feats.wave_ops)
1858 flags |= (1 << 19);
1859 if (ctx->mod.feats.int64_ops)
1860 flags |= (1 << 20);
1861 if (ctx->mod.feats.view_id)
1862 flags |= (1 << 21);
1863 if (ctx->mod.feats.barycentrics)
1864 flags |= (1 << 22);
1865 if (ctx->mod.feats.native_low_precision)
1866 flags |= (1 << 23) | (1 << 5);
1867 if (ctx->mod.feats.shading_rate)
1868 flags |= (1 << 24);
1869 if (ctx->mod.feats.raytracing_tier_1_1)
1870 flags |= (1 << 25);
1871 if (ctx->mod.feats.sampler_feedback)
1872 flags |= (1 << 26);
1873 if (ctx->mod.feats.atomic_int64_typed)
1874 flags |= (1 << 27);
1875 if (ctx->mod.feats.atomic_int64_tgsm)
1876 flags |= (1 << 28);
1877 if (ctx->mod.feats.derivatives_in_mesh_or_amp)
1878 flags |= (1 << 29);
1879 if (ctx->mod.feats.resource_descriptor_heap_indexing)
1880 flags |= (1 << 30);
1881 if (ctx->mod.feats.sampler_descriptor_heap_indexing)
1882 flags |= (1ull << 31);
1883 if (ctx->mod.feats.atomic_int64_heap_resource)
1884 flags |= (1ull << 32);
1885 if (ctx->mod.feats.advanced_texture_ops)
1886 flags |= (1ull << 34);
1887 if (ctx->mod.feats.writable_msaa)
1888 flags |= (1ull << 35);
1889 // Bit 36 is wave MMA
1890 if (ctx->mod.feats.sample_cmp_bias_gradient)
1891 flags |= (1ull << 37);
1892 if (ctx->mod.feats.extended_command_info)
1893 flags |= (1ull << 38);
1894
1895 if (ctx->opts->disable_math_refactoring)
1896 flags |= (1 << 1);
1897
1898 /* Work around https://github.com/microsoft/DirectXShaderCompiler/issues/4616
1899 * When targeting SM6.7 and with at least one UAV, if no other flags are present,
1900 * set the resources-may-not-alias flag, or else the DXIL validator may end up
1901 * with uninitialized memory which will fail validation, due to missing that flag.
1902 */
1903 if (flags == 0 && ctx->mod.minor_version >= 7 && ctx->num_uavs > 0)
1904 flags |= (1ull << 33);
1905
1906 return flags;
1907 }
1908
1909 static const struct dxil_mdnode *
emit_entrypoint(struct ntd_context * ctx,const struct dxil_func * func,const char * name,const struct dxil_mdnode * signatures,const struct dxil_mdnode * resources,const struct dxil_mdnode * shader_props)1910 emit_entrypoint(struct ntd_context *ctx,
1911 const struct dxil_func *func, const char *name,
1912 const struct dxil_mdnode *signatures,
1913 const struct dxil_mdnode *resources,
1914 const struct dxil_mdnode *shader_props)
1915 {
1916 char truncated_name[254] = { 0 };
1917 strncpy(truncated_name, name, ARRAY_SIZE(truncated_name) - 1);
1918
1919 const struct dxil_mdnode *func_md = dxil_get_metadata_func(&ctx->mod, func);
1920 const struct dxil_mdnode *name_md = dxil_get_metadata_string(&ctx->mod, truncated_name);
1921 const struct dxil_mdnode *nodes[] = {
1922 func_md,
1923 name_md,
1924 signatures,
1925 resources,
1926 shader_props
1927 };
1928 return dxil_get_metadata_node(&ctx->mod, nodes,
1929 ARRAY_SIZE(nodes));
1930 }
1931
1932 static const struct dxil_mdnode *
emit_resources(struct ntd_context * ctx)1933 emit_resources(struct ntd_context *ctx)
1934 {
1935 bool emit_resources = false;
1936 const struct dxil_mdnode *resources_nodes[] = {
1937 NULL, NULL, NULL, NULL
1938 };
1939
1940 #define ARRAY_AND_SIZE(arr) arr.data, util_dynarray_num_elements(&arr, const struct dxil_mdnode *)
1941
1942 if (ctx->srv_metadata_nodes.size) {
1943 resources_nodes[0] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->srv_metadata_nodes));
1944 emit_resources = true;
1945 }
1946
1947 if (ctx->uav_metadata_nodes.size) {
1948 resources_nodes[1] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->uav_metadata_nodes));
1949 emit_resources = true;
1950 }
1951
1952 if (ctx->cbv_metadata_nodes.size) {
1953 resources_nodes[2] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->cbv_metadata_nodes));
1954 emit_resources = true;
1955 }
1956
1957 if (ctx->sampler_metadata_nodes.size) {
1958 resources_nodes[3] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->sampler_metadata_nodes));
1959 emit_resources = true;
1960 }
1961
1962 #undef ARRAY_AND_SIZE
1963
1964 return emit_resources ?
1965 dxil_get_metadata_node(&ctx->mod, resources_nodes, ARRAY_SIZE(resources_nodes)): NULL;
1966 }
1967
1968 static bool
emit_tag(struct ntd_context * ctx,enum dxil_shader_tag tag,const struct dxil_mdnode * value_node)1969 emit_tag(struct ntd_context *ctx, enum dxil_shader_tag tag,
1970 const struct dxil_mdnode *value_node)
1971 {
1972 const struct dxil_mdnode *tag_node = dxil_get_metadata_int32(&ctx->mod, tag);
1973 if (!tag_node || !value_node)
1974 return false;
1975 assert(ctx->num_shader_property_nodes <= ARRAY_SIZE(ctx->shader_property_nodes) - 2);
1976 ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = tag_node;
1977 ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = value_node;
1978
1979 return true;
1980 }
1981
1982 static bool
emit_metadata(struct ntd_context * ctx)1983 emit_metadata(struct ntd_context *ctx)
1984 {
1985 /* DXIL versions are 1.x for shader model 6.x */
1986 assert(ctx->mod.major_version == 6);
1987 unsigned dxilMajor = 1;
1988 unsigned dxilMinor = ctx->mod.minor_version;
1989 unsigned valMajor = ctx->mod.major_validator;
1990 unsigned valMinor = ctx->mod.minor_validator;
1991 if (!emit_llvm_ident(&ctx->mod) ||
1992 !emit_named_version(&ctx->mod, "dx.version", dxilMajor, dxilMinor) ||
1993 !emit_named_version(&ctx->mod, "dx.valver", valMajor, valMinor) ||
1994 !emit_dx_shader_model(&ctx->mod))
1995 return false;
1996
1997 const struct dxil_func_def *main_func_def = ctx->main_func_def;
1998 if (!main_func_def)
1999 return false;
2000 const struct dxil_func *main_func = main_func_def->func;
2001
2002 const struct dxil_mdnode *resources_node = emit_resources(ctx);
2003
2004 const struct dxil_mdnode *main_entrypoint = dxil_get_metadata_func(&ctx->mod, main_func);
2005 const struct dxil_mdnode *node27 = dxil_get_metadata_node(&ctx->mod, NULL, 0);
2006
2007 const struct dxil_mdnode *node4 = dxil_get_metadata_int32(&ctx->mod, 0);
2008 const struct dxil_mdnode *nodes_4_27_27[] = {
2009 node4, node27, node27
2010 };
2011 const struct dxil_mdnode *node28 = dxil_get_metadata_node(&ctx->mod, nodes_4_27_27,
2012 ARRAY_SIZE(nodes_4_27_27));
2013
2014 const struct dxil_mdnode *node29 = dxil_get_metadata_node(&ctx->mod, &node28, 1);
2015
2016 const struct dxil_mdnode *node3 = dxil_get_metadata_int32(&ctx->mod, 1);
2017 const struct dxil_mdnode *main_type_annotation_nodes[] = {
2018 node3, main_entrypoint, node29
2019 };
2020 const struct dxil_mdnode *main_type_annotation = dxil_get_metadata_node(&ctx->mod, main_type_annotation_nodes,
2021 ARRAY_SIZE(main_type_annotation_nodes));
2022
2023 if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {
2024 if (!emit_tag(ctx, DXIL_SHADER_TAG_GS_STATE, emit_gs_state(ctx)))
2025 return false;
2026 } else if (ctx->mod.shader_kind == DXIL_HULL_SHADER) {
2027 ctx->tess_input_control_point_count = 32;
2028 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
2029 if (nir_is_arrayed_io(var, MESA_SHADER_TESS_CTRL)) {
2030 ctx->tess_input_control_point_count = glsl_array_size(var->type);
2031 break;
2032 }
2033 }
2034
2035 if (!emit_tag(ctx, DXIL_SHADER_TAG_HS_STATE, emit_hs_state(ctx)))
2036 return false;
2037 } else if (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
2038 if (!emit_tag(ctx, DXIL_SHADER_TAG_DS_STATE, emit_ds_state(ctx)))
2039 return false;
2040 } else if (ctx->mod.shader_kind == DXIL_COMPUTE_SHADER) {
2041 if (!emit_tag(ctx, DXIL_SHADER_TAG_NUM_THREADS, emit_threads(ctx)))
2042 return false;
2043 if (ctx->mod.minor_version >= 6 &&
2044 ctx->shader->info.subgroup_size >= SUBGROUP_SIZE_REQUIRE_4) {
2045 if (ctx->mod.minor_version < 8) {
2046 if (!emit_tag(ctx, DXIL_SHADER_TAG_WAVE_SIZE, emit_wave_size(ctx)))
2047 return false;
2048 } else {
2049 if (!emit_tag(ctx, DXIL_SHADER_TAG_WAVE_SIZE_RANGE, emit_wave_size_range(ctx)))
2050 return false;
2051 }
2052 }
2053 }
2054
2055 uint64_t flags = get_module_flags(ctx);
2056 if (flags != 0) {
2057 if (!emit_tag(ctx, DXIL_SHADER_TAG_FLAGS, dxil_get_metadata_int64(&ctx->mod, flags)))
2058 return false;
2059 }
2060 const struct dxil_mdnode *shader_properties = NULL;
2061 if (ctx->num_shader_property_nodes > 0) {
2062 shader_properties = dxil_get_metadata_node(&ctx->mod, ctx->shader_property_nodes,
2063 ctx->num_shader_property_nodes);
2064 if (!shader_properties)
2065 return false;
2066 }
2067
2068 nir_function_impl *entry_func_impl = nir_shader_get_entrypoint(ctx->shader);
2069 const struct dxil_mdnode *dx_entry_point = emit_entrypoint(ctx, main_func,
2070 entry_func_impl->function->name, get_signatures(&ctx->mod), resources_node, shader_properties);
2071 if (!dx_entry_point)
2072 return false;
2073
2074 if (resources_node) {
2075 const struct dxil_mdnode *dx_resources = resources_node;
2076 dxil_add_metadata_named_node(&ctx->mod, "dx.resources",
2077 &dx_resources, 1);
2078 }
2079
2080 if (ctx->mod.minor_version >= 2 &&
2081 dxil_nir_analyze_io_dependencies(&ctx->mod, ctx->shader)) {
2082 const struct dxil_type *i32_type = dxil_module_get_int_type(&ctx->mod, 32);
2083 if (!i32_type)
2084 return false;
2085
2086 const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, i32_type, ctx->mod.serialized_dependency_table_size);
2087 if (!array_type)
2088 return false;
2089
2090 const struct dxil_value **array_entries = malloc(sizeof(const struct value *) * ctx->mod.serialized_dependency_table_size);
2091 if (!array_entries)
2092 return false;
2093
2094 for (uint32_t i = 0; i < ctx->mod.serialized_dependency_table_size; ++i)
2095 array_entries[i] = dxil_module_get_int32_const(&ctx->mod, ctx->mod.serialized_dependency_table[i]);
2096 const struct dxil_value *array_val = dxil_module_get_array_const(&ctx->mod, array_type, array_entries);
2097 free((void *)array_entries);
2098
2099 const struct dxil_mdnode *view_id_state_val = dxil_get_metadata_value(&ctx->mod, array_type, array_val);
2100 if (!view_id_state_val)
2101 return false;
2102
2103 const struct dxil_mdnode *view_id_state_node = dxil_get_metadata_node(&ctx->mod, &view_id_state_val, 1);
2104
2105 dxil_add_metadata_named_node(&ctx->mod, "dx.viewIdState", &view_id_state_node, 1);
2106 }
2107
2108 const struct dxil_mdnode *dx_type_annotations[] = { main_type_annotation };
2109 return dxil_add_metadata_named_node(&ctx->mod, "dx.typeAnnotations",
2110 dx_type_annotations,
2111 ARRAY_SIZE(dx_type_annotations)) &&
2112 dxil_add_metadata_named_node(&ctx->mod, "dx.entryPoints",
2113 &dx_entry_point, 1);
2114 }
2115
2116 static const struct dxil_value *
bitcast_to_int(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)2117 bitcast_to_int(struct ntd_context *ctx, unsigned bit_size,
2118 const struct dxil_value *value)
2119 {
2120 const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size);
2121 if (!type)
2122 return NULL;
2123
2124 return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
2125 }
2126
2127 static const struct dxil_value *
bitcast_to_float(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)2128 bitcast_to_float(struct ntd_context *ctx, unsigned bit_size,
2129 const struct dxil_value *value)
2130 {
2131 const struct dxil_type *type = dxil_module_get_float_type(&ctx->mod, bit_size);
2132 if (!type)
2133 return NULL;
2134
2135 return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
2136 }
2137
2138 static bool
is_phi_src(nir_def * ssa)2139 is_phi_src(nir_def *ssa)
2140 {
2141 nir_foreach_use(src, ssa)
2142 if (nir_src_parent_instr(src)->type == nir_instr_type_phi)
2143 return true;
2144 return false;
2145 }
2146
2147 static void
store_ssa_def(struct ntd_context * ctx,nir_def * ssa,unsigned chan,const struct dxil_value * value)2148 store_ssa_def(struct ntd_context *ctx, nir_def *ssa, unsigned chan,
2149 const struct dxil_value *value)
2150 {
2151 assert(ssa->index < ctx->num_defs);
2152 assert(chan < ssa->num_components);
2153 /* Insert bitcasts for phi srcs in the parent block */
2154 if (is_phi_src(ssa)) {
2155 /* Prefer ints over floats if it could be both or if we have no type info */
2156 nir_alu_type expect_type =
2157 BITSET_TEST(ctx->int_types, ssa->index) ? nir_type_int :
2158 (BITSET_TEST(ctx->float_types, ssa->index) ? nir_type_float :
2159 nir_type_int);
2160 assert(ssa->bit_size != 1 || expect_type == nir_type_int);
2161 if (ssa->bit_size != 1 && expect_type != dxil_type_to_nir_type(dxil_value_get_type(value)))
2162 value = dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST,
2163 expect_type == nir_type_int ?
2164 dxil_module_get_int_type(&ctx->mod, ssa->bit_size) :
2165 dxil_module_get_float_type(&ctx->mod, ssa->bit_size), value);
2166 if (ssa->bit_size == 64) {
2167 if (expect_type == nir_type_int)
2168 ctx->mod.feats.int64_ops = true;
2169 if (expect_type == nir_type_float)
2170 ctx->mod.feats.doubles = true;
2171 }
2172 }
2173 ctx->defs[ssa->index].chans[chan] = value;
2174 }
2175
2176 static void
store_def(struct ntd_context * ctx,nir_def * def,unsigned chan,const struct dxil_value * value)2177 store_def(struct ntd_context *ctx, nir_def *def, unsigned chan,
2178 const struct dxil_value *value)
2179 {
2180 const struct dxil_type *type = dxil_value_get_type(value);
2181 if (type == ctx->mod.float64_type)
2182 ctx->mod.feats.doubles = true;
2183 if (type == ctx->mod.float16_type ||
2184 type == ctx->mod.int16_type)
2185 ctx->mod.feats.min_precision = true;
2186 if (type == ctx->mod.int64_type)
2187 ctx->mod.feats.int64_ops = true;
2188 store_ssa_def(ctx, def, chan, value);
2189 }
2190
2191 static void
store_alu_dest(struct ntd_context * ctx,nir_alu_instr * alu,unsigned chan,const struct dxil_value * value)2192 store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan,
2193 const struct dxil_value *value)
2194 {
2195 store_def(ctx, &alu->def, chan, value);
2196 }
2197
2198 static const struct dxil_value *
get_src_ssa(struct ntd_context * ctx,const nir_def * ssa,unsigned chan)2199 get_src_ssa(struct ntd_context *ctx, const nir_def *ssa, unsigned chan)
2200 {
2201 assert(ssa->index < ctx->num_defs);
2202 assert(chan < ssa->num_components);
2203 assert(ctx->defs[ssa->index].chans[chan]);
2204 return ctx->defs[ssa->index].chans[chan];
2205 }
2206
2207 static const struct dxil_value *
get_src(struct ntd_context * ctx,nir_src * src,unsigned chan,nir_alu_type type)2208 get_src(struct ntd_context *ctx, nir_src *src, unsigned chan,
2209 nir_alu_type type)
2210 {
2211 const struct dxil_value *value = get_src_ssa(ctx, src->ssa, chan);
2212
2213 const int bit_size = nir_src_bit_size(*src);
2214
2215 switch (nir_alu_type_get_base_type(type)) {
2216 case nir_type_int:
2217 case nir_type_uint: {
2218 const struct dxil_type *expect_type = dxil_module_get_int_type(&ctx->mod, bit_size);
2219 /* nohing to do */
2220 if (dxil_value_type_equal_to(value, expect_type)) {
2221 assert(bit_size != 64 || ctx->mod.feats.int64_ops);
2222 return value;
2223 }
2224 if (bit_size == 64) {
2225 assert(ctx->mod.feats.doubles);
2226 ctx->mod.feats.int64_ops = true;
2227 }
2228 if (bit_size == 16)
2229 ctx->mod.feats.native_low_precision = true;
2230 assert(dxil_value_type_bitsize_equal_to(value, bit_size));
2231 return bitcast_to_int(ctx, bit_size, value);
2232 }
2233
2234 case nir_type_float:
2235 assert(nir_src_bit_size(*src) >= 16);
2236 if (dxil_value_type_equal_to(value, dxil_module_get_float_type(&ctx->mod, bit_size))) {
2237 assert(nir_src_bit_size(*src) != 64 || ctx->mod.feats.doubles);
2238 return value;
2239 }
2240 if (bit_size == 64) {
2241 assert(ctx->mod.feats.int64_ops);
2242 ctx->mod.feats.doubles = true;
2243 }
2244 if (bit_size == 16)
2245 ctx->mod.feats.native_low_precision = true;
2246 assert(dxil_value_type_bitsize_equal_to(value, bit_size));
2247 return bitcast_to_float(ctx, bit_size, value);
2248
2249 case nir_type_bool:
2250 if (!dxil_value_type_bitsize_equal_to(value, 1)) {
2251 return dxil_emit_cast(&ctx->mod, DXIL_CAST_TRUNC,
2252 dxil_module_get_int_type(&ctx->mod, 1), value);
2253 }
2254 return value;
2255
2256 default:
2257 unreachable("unexpected nir_alu_type");
2258 }
2259 }
2260
2261 static const struct dxil_value *
get_alu_src(struct ntd_context * ctx,nir_alu_instr * alu,unsigned src)2262 get_alu_src(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)
2263 {
2264 unsigned chan = alu->src[src].swizzle[0];
2265 return get_src(ctx, &alu->src[src].src, chan,
2266 nir_op_infos[alu->op].input_types[src]);
2267 }
2268
2269 static bool
emit_binop(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)2270 emit_binop(struct ntd_context *ctx, nir_alu_instr *alu,
2271 enum dxil_bin_opcode opcode,
2272 const struct dxil_value *op0, const struct dxil_value *op1)
2273 {
2274 bool is_float_op = nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float;
2275
2276 enum dxil_opt_flags flags = 0;
2277 if (is_float_op && !alu->exact)
2278 flags |= DXIL_UNSAFE_ALGEBRA;
2279
2280 const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, flags);
2281 if (!v)
2282 return false;
2283 store_alu_dest(ctx, alu, 0, v);
2284 return true;
2285 }
2286
2287 static bool
emit_shift(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)2288 emit_shift(struct ntd_context *ctx, nir_alu_instr *alu,
2289 enum dxil_bin_opcode opcode,
2290 const struct dxil_value *op0, const struct dxil_value *op1)
2291 {
2292 unsigned op0_bit_size = nir_src_bit_size(alu->src[0].src);
2293 unsigned op1_bit_size = nir_src_bit_size(alu->src[1].src);
2294
2295 uint64_t shift_mask = op0_bit_size - 1;
2296 if (!nir_src_is_const(alu->src[1].src)) {
2297 if (op0_bit_size != op1_bit_size) {
2298 const struct dxil_type *type =
2299 dxil_module_get_int_type(&ctx->mod, op0_bit_size);
2300 enum dxil_cast_opcode cast_op =
2301 op1_bit_size < op0_bit_size ? DXIL_CAST_ZEXT : DXIL_CAST_TRUNC;
2302 op1 = dxil_emit_cast(&ctx->mod, cast_op, type, op1);
2303 }
2304 op1 = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND,
2305 op1,
2306 dxil_module_get_int_const(&ctx->mod, shift_mask, op0_bit_size),
2307 0);
2308 } else {
2309 uint64_t val = nir_scalar_as_uint(
2310 nir_scalar_chase_alu_src(nir_get_scalar(&alu->def, 0), 1));
2311 op1 = dxil_module_get_int_const(&ctx->mod, val & shift_mask, op0_bit_size);
2312 }
2313
2314 const struct dxil_value *v =
2315 dxil_emit_binop(&ctx->mod, opcode, op0, op1, 0);
2316 if (!v)
2317 return false;
2318 store_alu_dest(ctx, alu, 0, v);
2319 return true;
2320 }
2321
2322 static bool
emit_cmp(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_cmp_pred pred,const struct dxil_value * op0,const struct dxil_value * op1)2323 emit_cmp(struct ntd_context *ctx, nir_alu_instr *alu,
2324 enum dxil_cmp_pred pred,
2325 const struct dxil_value *op0, const struct dxil_value *op1)
2326 {
2327 const struct dxil_value *v = dxil_emit_cmp(&ctx->mod, pred, op0, op1);
2328 if (!v)
2329 return false;
2330 store_alu_dest(ctx, alu, 0, v);
2331 return true;
2332 }
2333
2334 static enum dxil_cast_opcode
get_cast_op(nir_alu_instr * alu)2335 get_cast_op(nir_alu_instr *alu)
2336 {
2337 unsigned dst_bits = alu->def.bit_size;
2338 unsigned src_bits = nir_src_bit_size(alu->src[0].src);
2339
2340 switch (alu->op) {
2341 /* bool -> int */
2342 case nir_op_b2i16:
2343 case nir_op_b2i32:
2344 case nir_op_b2i64:
2345 return DXIL_CAST_ZEXT;
2346
2347 /* float -> float */
2348 case nir_op_f2f16_rtz:
2349 case nir_op_f2f16:
2350 case nir_op_f2fmp:
2351 case nir_op_f2f32:
2352 case nir_op_f2f64:
2353 assert(dst_bits != src_bits);
2354 if (dst_bits < src_bits)
2355 return DXIL_CAST_FPTRUNC;
2356 else
2357 return DXIL_CAST_FPEXT;
2358
2359 /* int -> int */
2360 case nir_op_i2i1:
2361 case nir_op_i2i16:
2362 case nir_op_i2imp:
2363 case nir_op_i2i32:
2364 case nir_op_i2i64:
2365 assert(dst_bits != src_bits);
2366 if (dst_bits < src_bits)
2367 return DXIL_CAST_TRUNC;
2368 else
2369 return DXIL_CAST_SEXT;
2370
2371 /* uint -> uint */
2372 case nir_op_u2u1:
2373 case nir_op_u2u16:
2374 case nir_op_u2u32:
2375 case nir_op_u2u64:
2376 assert(dst_bits != src_bits);
2377 if (dst_bits < src_bits)
2378 return DXIL_CAST_TRUNC;
2379 else
2380 return DXIL_CAST_ZEXT;
2381
2382 /* float -> int */
2383 case nir_op_f2i16:
2384 case nir_op_f2imp:
2385 case nir_op_f2i32:
2386 case nir_op_f2i64:
2387 return DXIL_CAST_FPTOSI;
2388
2389 /* float -> uint */
2390 case nir_op_f2u16:
2391 case nir_op_f2ump:
2392 case nir_op_f2u32:
2393 case nir_op_f2u64:
2394 return DXIL_CAST_FPTOUI;
2395
2396 /* int -> float */
2397 case nir_op_i2f16:
2398 case nir_op_i2fmp:
2399 case nir_op_i2f32:
2400 case nir_op_i2f64:
2401 return DXIL_CAST_SITOFP;
2402
2403 /* uint -> float */
2404 case nir_op_u2f16:
2405 case nir_op_u2fmp:
2406 case nir_op_u2f32:
2407 case nir_op_u2f64:
2408 return DXIL_CAST_UITOFP;
2409
2410 default:
2411 unreachable("unexpected cast op");
2412 }
2413 }
2414
2415 static const struct dxil_type *
get_cast_dest_type(struct ntd_context * ctx,nir_alu_instr * alu)2416 get_cast_dest_type(struct ntd_context *ctx, nir_alu_instr *alu)
2417 {
2418 unsigned dst_bits = alu->def.bit_size;
2419 switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type)) {
2420 case nir_type_bool:
2421 assert(dst_bits == 1);
2422 FALLTHROUGH;
2423 case nir_type_int:
2424 case nir_type_uint:
2425 return dxil_module_get_int_type(&ctx->mod, dst_bits);
2426
2427 case nir_type_float:
2428 return dxil_module_get_float_type(&ctx->mod, dst_bits);
2429
2430 default:
2431 unreachable("unknown nir_alu_type");
2432 }
2433 }
2434
2435 static bool
is_double(nir_alu_type alu_type,unsigned bit_size)2436 is_double(nir_alu_type alu_type, unsigned bit_size)
2437 {
2438 return nir_alu_type_get_base_type(alu_type) == nir_type_float &&
2439 bit_size == 64;
2440 }
2441
2442 static bool
emit_cast(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * value)2443 emit_cast(struct ntd_context *ctx, nir_alu_instr *alu,
2444 const struct dxil_value *value)
2445 {
2446 enum dxil_cast_opcode opcode = get_cast_op(alu);
2447 const struct dxil_type *type = get_cast_dest_type(ctx, alu);
2448 if (!type)
2449 return false;
2450
2451 const nir_op_info *info = &nir_op_infos[alu->op];
2452 switch (opcode) {
2453 case DXIL_CAST_UITOFP:
2454 case DXIL_CAST_SITOFP:
2455 if (is_double(info->output_type, alu->def.bit_size))
2456 ctx->mod.feats.dx11_1_double_extensions = true;
2457 break;
2458 case DXIL_CAST_FPTOUI:
2459 case DXIL_CAST_FPTOSI:
2460 if (is_double(info->input_types[0], nir_src_bit_size(alu->src[0].src)))
2461 ctx->mod.feats.dx11_1_double_extensions = true;
2462 break;
2463 default:
2464 break;
2465 }
2466
2467 if (alu->def.bit_size == 16) {
2468 switch (alu->op) {
2469 case nir_op_f2fmp:
2470 case nir_op_i2imp:
2471 case nir_op_f2imp:
2472 case nir_op_f2ump:
2473 case nir_op_i2fmp:
2474 case nir_op_u2fmp:
2475 break;
2476 default:
2477 ctx->mod.feats.native_low_precision = true;
2478 }
2479 }
2480
2481 const struct dxil_value *v = dxil_emit_cast(&ctx->mod, opcode, type,
2482 value);
2483 if (!v)
2484 return false;
2485 store_alu_dest(ctx, alu, 0, v);
2486 return true;
2487 }
2488
2489 static enum overload_type
get_overload(nir_alu_type alu_type,unsigned bit_size)2490 get_overload(nir_alu_type alu_type, unsigned bit_size)
2491 {
2492 switch (nir_alu_type_get_base_type(alu_type)) {
2493 case nir_type_int:
2494 case nir_type_uint:
2495 case nir_type_bool:
2496 switch (bit_size) {
2497 case 1: return DXIL_I1;
2498 case 16: return DXIL_I16;
2499 case 32: return DXIL_I32;
2500 case 64: return DXIL_I64;
2501 default:
2502 unreachable("unexpected bit_size");
2503 }
2504 case nir_type_float:
2505 switch (bit_size) {
2506 case 16: return DXIL_F16;
2507 case 32: return DXIL_F32;
2508 case 64: return DXIL_F64;
2509 default:
2510 unreachable("unexpected bit_size");
2511 }
2512 case nir_type_invalid:
2513 return DXIL_NONE;
2514 default:
2515 unreachable("unexpected output type");
2516 }
2517 }
2518
2519 static enum overload_type
get_ambiguous_overload(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum overload_type default_type)2520 get_ambiguous_overload(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2521 enum overload_type default_type)
2522 {
2523 if (BITSET_TEST(ctx->int_types, intr->def.index))
2524 return get_overload(nir_type_int, intr->def.bit_size);
2525 if (BITSET_TEST(ctx->float_types, intr->def.index))
2526 return get_overload(nir_type_float, intr->def.bit_size);
2527 return default_type;
2528 }
2529
2530 static enum overload_type
get_ambiguous_overload_alu_type(struct ntd_context * ctx,nir_intrinsic_instr * intr,nir_alu_type alu_type)2531 get_ambiguous_overload_alu_type(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2532 nir_alu_type alu_type)
2533 {
2534 return get_ambiguous_overload(ctx, intr, get_overload(alu_type, intr->def.bit_size));
2535 }
2536
2537 static bool
emit_unary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op)2538 emit_unary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2539 enum dxil_intr intr, const struct dxil_value *op)
2540 {
2541 const nir_op_info *info = &nir_op_infos[alu->op];
2542 unsigned src_bits = nir_src_bit_size(alu->src[0].src);
2543 enum overload_type overload = get_overload(info->input_types[0], src_bits);
2544
2545 const struct dxil_value *v = emit_unary_call(ctx, overload, intr, op);
2546 if (!v)
2547 return false;
2548 store_alu_dest(ctx, alu, 0, v);
2549 return true;
2550 }
2551
2552 static bool
emit_binary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)2553 emit_binary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2554 enum dxil_intr intr,
2555 const struct dxil_value *op0, const struct dxil_value *op1)
2556 {
2557 const nir_op_info *info = &nir_op_infos[alu->op];
2558 assert(info->output_type == info->input_types[0]);
2559 assert(info->output_type == info->input_types[1]);
2560 unsigned dst_bits = alu->def.bit_size;
2561 assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2562 assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2563 enum overload_type overload = get_overload(info->output_type, dst_bits);
2564
2565 const struct dxil_value *v = emit_binary_call(ctx, overload, intr,
2566 op0, op1);
2567 if (!v)
2568 return false;
2569 store_alu_dest(ctx, alu, 0, v);
2570 return true;
2571 }
2572
2573 static bool
emit_tertiary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)2574 emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2575 enum dxil_intr intr,
2576 const struct dxil_value *op0,
2577 const struct dxil_value *op1,
2578 const struct dxil_value *op2)
2579 {
2580 const nir_op_info *info = &nir_op_infos[alu->op];
2581 unsigned dst_bits = alu->def.bit_size;
2582 assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2583 assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2584 assert(nir_src_bit_size(alu->src[2].src) == dst_bits);
2585
2586 assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[0], dst_bits));
2587 assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[1], dst_bits));
2588 assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[2], dst_bits));
2589
2590 enum overload_type overload = get_overload(info->output_type, dst_bits);
2591
2592 const struct dxil_value *v = emit_tertiary_call(ctx, overload, intr,
2593 op0, op1, op2);
2594 if (!v)
2595 return false;
2596 store_alu_dest(ctx, alu, 0, v);
2597 return true;
2598 }
2599
2600 static bool
emit_derivative(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_intr dxil_intr)2601 emit_derivative(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2602 enum dxil_intr dxil_intr)
2603 {
2604 const struct dxil_value *src = get_src(ctx, &intr->src[0], 0, nir_type_float);
2605 enum overload_type overload = get_overload(nir_type_float, intr->src[0].ssa->bit_size);
2606 const struct dxil_value *v = emit_unary_call(ctx, overload, dxil_intr, src);
2607 if (!v)
2608 return false;
2609 store_def(ctx, &intr->def, 0, v);
2610 return true;
2611 }
2612
2613 static bool
emit_bitfield_insert(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * base,const struct dxil_value * insert,const struct dxil_value * offset,const struct dxil_value * width)2614 emit_bitfield_insert(struct ntd_context *ctx, nir_alu_instr *alu,
2615 const struct dxil_value *base,
2616 const struct dxil_value *insert,
2617 const struct dxil_value *offset,
2618 const struct dxil_value *width)
2619 {
2620 /* DXIL is width, offset, insert, base, NIR is base, insert, offset, width */
2621 const struct dxil_value *v = emit_quaternary_call(ctx, DXIL_I32, DXIL_INTR_BFI,
2622 width, offset, insert, base);
2623 if (!v)
2624 return false;
2625
2626 /* DXIL uses the 5 LSB from width/offset. Special-case width >= 32 == copy insert. */
2627 const struct dxil_value *compare_width = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_SGE,
2628 width, dxil_module_get_int32_const(&ctx->mod, 32));
2629 v = dxil_emit_select(&ctx->mod, compare_width, insert, v);
2630 store_alu_dest(ctx, alu, 0, v);
2631 return true;
2632 }
2633
2634 static bool
emit_dot4add_packed(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * src0,const struct dxil_value * src1,const struct dxil_value * accum)2635 emit_dot4add_packed(struct ntd_context *ctx, nir_alu_instr *alu,
2636 enum dxil_intr intr,
2637 const struct dxil_value *src0,
2638 const struct dxil_value *src1,
2639 const struct dxil_value *accum)
2640 {
2641 const struct dxil_func *f = dxil_get_function(&ctx->mod, "dx.op.dot4AddPacked", DXIL_I32);
2642 if (!f)
2643 return false;
2644 const struct dxil_value *srcs[] = { dxil_module_get_int32_const(&ctx->mod, intr), accum, src0, src1 };
2645 const struct dxil_value *v = dxil_emit_call(&ctx->mod, f, srcs, ARRAY_SIZE(srcs));
2646 if (!v)
2647 return false;
2648
2649 store_alu_dest(ctx, alu, 0, v);
2650 return true;
2651 }
2652
emit_select(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * sel,const struct dxil_value * val_true,const struct dxil_value * val_false)2653 static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu,
2654 const struct dxil_value *sel,
2655 const struct dxil_value *val_true,
2656 const struct dxil_value *val_false)
2657 {
2658 assert(sel);
2659 assert(val_true);
2660 assert(val_false);
2661
2662 const struct dxil_value *v = dxil_emit_select(&ctx->mod, sel, val_true, val_false);
2663 if (!v)
2664 return false;
2665
2666 store_alu_dest(ctx, alu, 0, v);
2667 return true;
2668 }
2669
2670 static bool
emit_b2f16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2671 emit_b2f16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2672 {
2673 assert(val);
2674
2675 struct dxil_module *m = &ctx->mod;
2676
2677 const struct dxil_value *c1 = dxil_module_get_float16_const(m, 0x3C00);
2678 const struct dxil_value *c0 = dxil_module_get_float16_const(m, 0);
2679
2680 if (!c0 || !c1)
2681 return false;
2682
2683 return emit_select(ctx, alu, val, c1, c0);
2684 }
2685
2686 static bool
emit_b2f32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2687 emit_b2f32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2688 {
2689 assert(val);
2690
2691 struct dxil_module *m = &ctx->mod;
2692
2693 const struct dxil_value *c1 = dxil_module_get_float_const(m, 1.0f);
2694 const struct dxil_value *c0 = dxil_module_get_float_const(m, 0.0f);
2695
2696 if (!c0 || !c1)
2697 return false;
2698
2699 return emit_select(ctx, alu, val, c1, c0);
2700 }
2701
2702 static bool
emit_b2f64(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2703 emit_b2f64(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2704 {
2705 assert(val);
2706
2707 struct dxil_module *m = &ctx->mod;
2708
2709 const struct dxil_value *c1 = dxil_module_get_double_const(m, 1.0);
2710 const struct dxil_value *c0 = dxil_module_get_double_const(m, 0.0);
2711
2712 if (!c0 || !c1)
2713 return false;
2714
2715 ctx->mod.feats.doubles = 1;
2716 return emit_select(ctx, alu, val, c1, c0);
2717 }
2718
2719 static bool
emit_f16tof32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val,bool shift)2720 emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val, bool shift)
2721 {
2722 if (shift) {
2723 val = dxil_emit_binop(&ctx->mod, DXIL_BINOP_LSHR, val,
2724 dxil_module_get_int32_const(&ctx->mod, 16), 0);
2725 if (!val)
2726 return false;
2727 }
2728
2729 const struct dxil_func *func = dxil_get_function(&ctx->mod,
2730 "dx.op.legacyF16ToF32",
2731 DXIL_NONE);
2732 if (!func)
2733 return false;
2734
2735 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F16TOF32);
2736 if (!opcode)
2737 return false;
2738
2739 const struct dxil_value *args[] = {
2740 opcode,
2741 val
2742 };
2743
2744 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2745 if (!v)
2746 return false;
2747 store_alu_dest(ctx, alu, 0, v);
2748 return true;
2749 }
2750
2751 static bool
emit_f32tof16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val0,const struct dxil_value * val1)2752 emit_f32tof16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val0, const struct dxil_value *val1)
2753 {
2754 const struct dxil_func *func = dxil_get_function(&ctx->mod,
2755 "dx.op.legacyF32ToF16",
2756 DXIL_NONE);
2757 if (!func)
2758 return false;
2759
2760 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F32TOF16);
2761 if (!opcode)
2762 return false;
2763
2764 const struct dxil_value *args[] = {
2765 opcode,
2766 val0
2767 };
2768
2769 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2770 if (!v)
2771 return false;
2772
2773 if (!nir_src_is_const(alu->src[1].src) || nir_src_as_int(alu->src[1].src) != 0) {
2774 args[1] = val1;
2775 const struct dxil_value *v_high = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2776 if (!v_high)
2777 return false;
2778
2779 v_high = dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL, v_high,
2780 dxil_module_get_int32_const(&ctx->mod, 16), 0);
2781 if (!v_high)
2782 return false;
2783
2784 v = dxil_emit_binop(&ctx->mod, DXIL_BINOP_OR, v, v_high, 0);
2785 if (!v)
2786 return false;
2787 }
2788
2789 store_alu_dest(ctx, alu, 0, v);
2790 return true;
2791 }
2792
2793 static bool
emit_vec(struct ntd_context * ctx,nir_alu_instr * alu,unsigned num_inputs)2794 emit_vec(struct ntd_context *ctx, nir_alu_instr *alu, unsigned num_inputs)
2795 {
2796 for (unsigned i = 0; i < num_inputs; i++) {
2797 const struct dxil_value *src =
2798 get_src_ssa(ctx, alu->src[i].src.ssa, alu->src[i].swizzle[0]);
2799 if (!src)
2800 return false;
2801
2802 store_alu_dest(ctx, alu, i, src);
2803 }
2804 return true;
2805 }
2806
2807 static bool
emit_make_double(struct ntd_context * ctx,nir_alu_instr * alu)2808 emit_make_double(struct ntd_context *ctx, nir_alu_instr *alu)
2809 {
2810 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.makeDouble", DXIL_F64);
2811 if (!func)
2812 return false;
2813
2814 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_MAKE_DOUBLE);
2815 if (!opcode)
2816 return false;
2817
2818 const struct dxil_value *args[3] = {
2819 opcode,
2820 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_uint32),
2821 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[1], nir_type_uint32),
2822 };
2823 if (!args[1] || !args[2])
2824 return false;
2825
2826 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2827 if (!v)
2828 return false;
2829 store_def(ctx, &alu->def, 0, v);
2830 return true;
2831 }
2832
2833 static bool
emit_split_double(struct ntd_context * ctx,nir_alu_instr * alu)2834 emit_split_double(struct ntd_context *ctx, nir_alu_instr *alu)
2835 {
2836 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.splitDouble", DXIL_F64);
2837 if (!func)
2838 return false;
2839
2840 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SPLIT_DOUBLE);
2841 if (!opcode)
2842 return false;
2843
2844 const struct dxil_value *args[] = {
2845 opcode,
2846 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_float64)
2847 };
2848 if (!args[1])
2849 return false;
2850
2851 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2852 if (!v)
2853 return false;
2854
2855 const struct dxil_value *hi = dxil_emit_extractval(&ctx->mod, v, 0);
2856 const struct dxil_value *lo = dxil_emit_extractval(&ctx->mod, v, 1);
2857 if (!hi || !lo)
2858 return false;
2859
2860 store_def(ctx, &alu->def, 0, hi);
2861 store_def(ctx, &alu->def, 1, lo);
2862 return true;
2863 }
2864
2865 static bool
emit_alu(struct ntd_context * ctx,nir_alu_instr * alu)2866 emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
2867 {
2868 /* handle vec-instructions first; they are the only ones that produce
2869 * vector results.
2870 */
2871 switch (alu->op) {
2872 case nir_op_vec2:
2873 case nir_op_vec3:
2874 case nir_op_vec4:
2875 case nir_op_vec8:
2876 case nir_op_vec16:
2877 return emit_vec(ctx, alu, nir_op_infos[alu->op].num_inputs);
2878 case nir_op_mov: {
2879 assert(alu->def.num_components == 1);
2880 store_ssa_def(ctx, &alu->def, 0, get_src_ssa(ctx,
2881 alu->src->src.ssa, alu->src->swizzle[0]));
2882 return true;
2883 }
2884 case nir_op_pack_double_2x32_dxil:
2885 return emit_make_double(ctx, alu);
2886 case nir_op_unpack_double_2x32_dxil:
2887 return emit_split_double(ctx, alu);
2888 case nir_op_bcsel: {
2889 /* Handled here to avoid type forced bitcast to int, since bcsel is used for ints and floats.
2890 * Ideally, the back-typing got both sources to match, but if it didn't, explicitly get src1's type */
2891 const struct dxil_value *src1 = get_src_ssa(ctx, alu->src[1].src.ssa, alu->src[1].swizzle[0]);
2892 nir_alu_type src1_type = dxil_type_to_nir_type(dxil_value_get_type(src1));
2893 return emit_select(ctx, alu,
2894 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_bool),
2895 src1,
2896 get_src(ctx, &alu->src[2].src, alu->src[2].swizzle[0], src1_type));
2897 }
2898 default:
2899 /* silence warnings */
2900 ;
2901 }
2902
2903 /* other ops should be scalar */
2904 const struct dxil_value *src[4];
2905 assert(nir_op_infos[alu->op].num_inputs <= 4);
2906 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
2907 src[i] = get_alu_src(ctx, alu, i);
2908 if (!src[i])
2909 return false;
2910 }
2911
2912 switch (alu->op) {
2913 case nir_op_iadd:
2914 case nir_op_fadd: return emit_binop(ctx, alu, DXIL_BINOP_ADD, src[0], src[1]);
2915
2916 case nir_op_isub:
2917 case nir_op_fsub: return emit_binop(ctx, alu, DXIL_BINOP_SUB, src[0], src[1]);
2918
2919 case nir_op_imul:
2920 case nir_op_fmul: return emit_binop(ctx, alu, DXIL_BINOP_MUL, src[0], src[1]);
2921
2922 case nir_op_fdiv:
2923 if (alu->def.bit_size == 64)
2924 ctx->mod.feats.dx11_1_double_extensions = 1;
2925 return emit_binop(ctx, alu, DXIL_BINOP_SDIV, src[0], src[1]);
2926
2927 case nir_op_idiv:
2928 case nir_op_udiv:
2929 if (nir_src_is_const(alu->src[1].src)) {
2930 /* It's illegal to emit a literal divide by 0 in DXIL */
2931 nir_scalar divisor = nir_scalar_chase_alu_src(nir_get_scalar(&alu->def, 0), 1);
2932 if (nir_scalar_as_int(divisor) == 0) {
2933 store_alu_dest(ctx, alu, 0,
2934 dxil_module_get_int_const(&ctx->mod, 0, alu->def.bit_size));
2935 return true;
2936 }
2937 }
2938 return emit_binop(ctx, alu, alu->op == nir_op_idiv ? DXIL_BINOP_SDIV : DXIL_BINOP_UDIV, src[0], src[1]);
2939
2940 case nir_op_irem: return emit_binop(ctx, alu, DXIL_BINOP_SREM, src[0], src[1]);
2941 case nir_op_imod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2942 case nir_op_umod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2943 case nir_op_ishl: return emit_shift(ctx, alu, DXIL_BINOP_SHL, src[0], src[1]);
2944 case nir_op_ishr: return emit_shift(ctx, alu, DXIL_BINOP_ASHR, src[0], src[1]);
2945 case nir_op_ushr: return emit_shift(ctx, alu, DXIL_BINOP_LSHR, src[0], src[1]);
2946 case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]);
2947 case nir_op_ior: return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]);
2948 case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]);
2949 case nir_op_inot: {
2950 unsigned bit_size = alu->def.bit_size;
2951 intmax_t val = bit_size == 1 ? 1 : -1;
2952 const struct dxil_value *negative_one = dxil_module_get_int_const(&ctx->mod, val, bit_size);
2953 return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], negative_one);
2954 }
2955 case nir_op_ieq: return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]);
2956 case nir_op_ine: return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]);
2957 case nir_op_ige: return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]);
2958 case nir_op_uge: return emit_cmp(ctx, alu, DXIL_ICMP_UGE, src[0], src[1]);
2959 case nir_op_ilt: return emit_cmp(ctx, alu, DXIL_ICMP_SLT, src[0], src[1]);
2960 case nir_op_ult: return emit_cmp(ctx, alu, DXIL_ICMP_ULT, src[0], src[1]);
2961 case nir_op_feq: return emit_cmp(ctx, alu, DXIL_FCMP_OEQ, src[0], src[1]);
2962 case nir_op_fneu: return emit_cmp(ctx, alu, DXIL_FCMP_UNE, src[0], src[1]);
2963 case nir_op_flt: return emit_cmp(ctx, alu, DXIL_FCMP_OLT, src[0], src[1]);
2964 case nir_op_fge: return emit_cmp(ctx, alu, DXIL_FCMP_OGE, src[0], src[1]);
2965 case nir_op_ftrunc: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_Z, src[0]);
2966 case nir_op_fabs: return emit_unary_intin(ctx, alu, DXIL_INTR_FABS, src[0]);
2967 case nir_op_fcos: return emit_unary_intin(ctx, alu, DXIL_INTR_FCOS, src[0]);
2968 case nir_op_fsin: return emit_unary_intin(ctx, alu, DXIL_INTR_FSIN, src[0]);
2969 case nir_op_fceil: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_PI, src[0]);
2970 case nir_op_fexp2: return emit_unary_intin(ctx, alu, DXIL_INTR_FEXP2, src[0]);
2971 case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]);
2972 case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]);
2973 case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]);
2974 case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]);
2975 case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]);
2976
2977 case nir_op_fround_even: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NE, src[0]);
2978 case nir_op_frcp: {
2979 const struct dxil_value *one;
2980 switch (alu->def.bit_size) {
2981 case 16:
2982 one = dxil_module_get_float16_const(&ctx->mod, 0x3C00);
2983 break;
2984 case 32:
2985 one = dxil_module_get_float_const(&ctx->mod, 1.0f);
2986 break;
2987 case 64:
2988 one = dxil_module_get_double_const(&ctx->mod, 1.0);
2989 break;
2990 default: unreachable("Invalid float size");
2991 }
2992 return emit_binop(ctx, alu, DXIL_BINOP_SDIV, one, src[0]);
2993 }
2994 case nir_op_fsat: return emit_unary_intin(ctx, alu, DXIL_INTR_SATURATE, src[0]);
2995 case nir_op_bit_count: return emit_unary_intin(ctx, alu, DXIL_INTR_COUNTBITS, src[0]);
2996 case nir_op_bitfield_reverse: return emit_unary_intin(ctx, alu, DXIL_INTR_BFREV, src[0]);
2997 case nir_op_ufind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_HI, src[0]);
2998 case nir_op_ifind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_SHI, src[0]);
2999 case nir_op_find_lsb: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_LO, src[0]);
3000 case nir_op_imax: return emit_binary_intin(ctx, alu, DXIL_INTR_IMAX, src[0], src[1]);
3001 case nir_op_imin: return emit_binary_intin(ctx, alu, DXIL_INTR_IMIN, src[0], src[1]);
3002 case nir_op_umax: return emit_binary_intin(ctx, alu, DXIL_INTR_UMAX, src[0], src[1]);
3003 case nir_op_umin: return emit_binary_intin(ctx, alu, DXIL_INTR_UMIN, src[0], src[1]);
3004 case nir_op_frsq: return emit_unary_intin(ctx, alu, DXIL_INTR_RSQRT, src[0]);
3005 case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]);
3006 case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]);
3007 case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]);
3008 case nir_op_ffma:
3009 if (alu->def.bit_size == 64)
3010 ctx->mod.feats.dx11_1_double_extensions = 1;
3011 return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]);
3012
3013 case nir_op_ibfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_IBFE, src[2], src[1], src[0]);
3014 case nir_op_ubfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_UBFE, src[2], src[1], src[0]);
3015 case nir_op_bitfield_insert: return emit_bitfield_insert(ctx, alu, src[0], src[1], src[2], src[3]);
3016
3017 case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0], false);
3018 case nir_op_unpack_half_2x16_split_y: return emit_f16tof32(ctx, alu, src[0], true);
3019 case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0], src[1]);
3020
3021 case nir_op_sdot_4x8_iadd: return emit_dot4add_packed(ctx, alu, DXIL_INTR_DOT4_ADD_I8_PACKED, src[0], src[1], src[2]);
3022 case nir_op_udot_4x8_uadd: return emit_dot4add_packed(ctx, alu, DXIL_INTR_DOT4_ADD_U8_PACKED, src[0], src[1], src[2]);
3023
3024 case nir_op_i2i1:
3025 case nir_op_u2u1:
3026 case nir_op_b2i16:
3027 case nir_op_i2i16:
3028 case nir_op_i2imp:
3029 case nir_op_f2i16:
3030 case nir_op_f2imp:
3031 case nir_op_f2u16:
3032 case nir_op_f2ump:
3033 case nir_op_u2u16:
3034 case nir_op_u2f16:
3035 case nir_op_u2fmp:
3036 case nir_op_i2f16:
3037 case nir_op_i2fmp:
3038 case nir_op_f2f16_rtz:
3039 case nir_op_f2f16:
3040 case nir_op_f2fmp:
3041 case nir_op_b2i32:
3042 case nir_op_f2f32:
3043 case nir_op_f2i32:
3044 case nir_op_f2u32:
3045 case nir_op_i2f32:
3046 case nir_op_i2i32:
3047 case nir_op_u2f32:
3048 case nir_op_u2u32:
3049 case nir_op_b2i64:
3050 case nir_op_f2f64:
3051 case nir_op_f2i64:
3052 case nir_op_f2u64:
3053 case nir_op_i2f64:
3054 case nir_op_i2i64:
3055 case nir_op_u2f64:
3056 case nir_op_u2u64:
3057 return emit_cast(ctx, alu, src[0]);
3058
3059 case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]);
3060 case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]);
3061 case nir_op_b2f64: return emit_b2f64(ctx, alu, src[0]);
3062 default:
3063 log_nir_instr_unsupported(ctx->logger, "Unimplemented ALU instruction",
3064 &alu->instr);
3065 return false;
3066 }
3067 }
3068
3069 static const struct dxil_value *
load_ubo(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * offset,enum overload_type overload)3070 load_ubo(struct ntd_context *ctx, const struct dxil_value *handle,
3071 const struct dxil_value *offset, enum overload_type overload)
3072 {
3073 assert(handle && offset);
3074
3075 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CBUFFER_LOAD_LEGACY);
3076 if (!opcode)
3077 return NULL;
3078
3079 const struct dxil_value *args[] = {
3080 opcode, handle, offset
3081 };
3082
3083 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cbufferLoadLegacy", overload);
3084 if (!func)
3085 return NULL;
3086 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3087 }
3088
3089 static bool
emit_barrier_impl(struct ntd_context * ctx,nir_variable_mode modes,mesa_scope execution_scope,mesa_scope mem_scope)3090 emit_barrier_impl(struct ntd_context *ctx, nir_variable_mode modes, mesa_scope execution_scope, mesa_scope mem_scope)
3091 {
3092 const struct dxil_value *opcode, *mode;
3093 const struct dxil_func *func;
3094 uint32_t flags = 0;
3095
3096 if (execution_scope == SCOPE_WORKGROUP)
3097 flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP;
3098
3099 bool is_compute = ctx->mod.shader_kind == DXIL_COMPUTE_SHADER;
3100
3101 if ((modes & (nir_var_mem_ssbo | nir_var_mem_global | nir_var_image)) &&
3102 (mem_scope > SCOPE_WORKGROUP || !is_compute)) {
3103 flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL;
3104 } else {
3105 flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP;
3106 }
3107
3108 if ((modes & nir_var_mem_shared) && is_compute)
3109 flags |= DXIL_BARRIER_MODE_GROUPSHARED_MEM_FENCE;
3110
3111 func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE);
3112 if (!func)
3113 return false;
3114
3115 opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER);
3116 if (!opcode)
3117 return false;
3118
3119 mode = dxil_module_get_int32_const(&ctx->mod, flags);
3120 if (!mode)
3121 return false;
3122
3123 const struct dxil_value *args[] = { opcode, mode };
3124
3125 return dxil_emit_call_void(&ctx->mod, func,
3126 args, ARRAY_SIZE(args));
3127 }
3128
3129 static bool
emit_barrier(struct ntd_context * ctx,nir_intrinsic_instr * intr)3130 emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3131 {
3132 return emit_barrier_impl(ctx,
3133 nir_intrinsic_memory_modes(intr),
3134 nir_intrinsic_execution_scope(intr),
3135 nir_intrinsic_memory_scope(intr));
3136 }
3137
3138 static bool
emit_load_global_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3139 emit_load_global_invocation_id(struct ntd_context *ctx,
3140 nir_intrinsic_instr *intr)
3141 {
3142 nir_component_mask_t comps = nir_def_components_read(&intr->def);
3143
3144 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3145 if (comps & (1 << i)) {
3146 const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
3147 if (!idx)
3148 return false;
3149 const struct dxil_value *globalid = emit_threadid_call(ctx, idx);
3150
3151 if (!globalid)
3152 return false;
3153
3154 store_def(ctx, &intr->def, i, globalid);
3155 }
3156 }
3157 return true;
3158 }
3159
3160 static bool
emit_load_local_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3161 emit_load_local_invocation_id(struct ntd_context *ctx,
3162 nir_intrinsic_instr *intr)
3163 {
3164 nir_component_mask_t comps = nir_def_components_read(&intr->def);
3165
3166 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3167 if (comps & (1 << i)) {
3168 const struct dxil_value
3169 *idx = dxil_module_get_int32_const(&ctx->mod, i);
3170 if (!idx)
3171 return false;
3172 const struct dxil_value
3173 *threadidingroup = emit_threadidingroup_call(ctx, idx);
3174 if (!threadidingroup)
3175 return false;
3176 store_def(ctx, &intr->def, i, threadidingroup);
3177 }
3178 }
3179 return true;
3180 }
3181
3182 static bool
emit_load_local_invocation_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)3183 emit_load_local_invocation_index(struct ntd_context *ctx,
3184 nir_intrinsic_instr *intr)
3185 {
3186 const struct dxil_value
3187 *flattenedthreadidingroup = emit_flattenedthreadidingroup_call(ctx);
3188 if (!flattenedthreadidingroup)
3189 return false;
3190 store_def(ctx, &intr->def, 0, flattenedthreadidingroup);
3191
3192 return true;
3193 }
3194
3195 static bool
emit_load_local_workgroup_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3196 emit_load_local_workgroup_id(struct ntd_context *ctx,
3197 nir_intrinsic_instr *intr)
3198 {
3199 nir_component_mask_t comps = nir_def_components_read(&intr->def);
3200
3201 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3202 if (comps & (1 << i)) {
3203 const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
3204 if (!idx)
3205 return false;
3206 const struct dxil_value *groupid = emit_groupid_call(ctx, idx);
3207 if (!groupid)
3208 return false;
3209 store_def(ctx, &intr->def, i, groupid);
3210 }
3211 }
3212 return true;
3213 }
3214
3215 static const struct dxil_value *
call_unary_external_function(struct ntd_context * ctx,const char * name,int32_t dxil_intr,enum overload_type overload)3216 call_unary_external_function(struct ntd_context *ctx,
3217 const char *name,
3218 int32_t dxil_intr,
3219 enum overload_type overload)
3220 {
3221 const struct dxil_func *func =
3222 dxil_get_function(&ctx->mod, name, overload);
3223 if (!func)
3224 return false;
3225
3226 const struct dxil_value *opcode =
3227 dxil_module_get_int32_const(&ctx->mod, dxil_intr);
3228 if (!opcode)
3229 return false;
3230
3231 const struct dxil_value *args[] = {opcode};
3232
3233 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3234 }
3235
3236 static bool
emit_load_unary_external_function(struct ntd_context * ctx,nir_intrinsic_instr * intr,const char * name,int32_t dxil_intr,nir_alu_type type)3237 emit_load_unary_external_function(struct ntd_context *ctx,
3238 nir_intrinsic_instr *intr, const char *name,
3239 int32_t dxil_intr,
3240 nir_alu_type type)
3241 {
3242 const struct dxil_value *value = call_unary_external_function(ctx, name, dxil_intr,
3243 get_overload(type, intr->def.bit_size));
3244 store_def(ctx, &intr->def, 0, value);
3245
3246 return true;
3247 }
3248
3249 static bool
emit_load_sample_mask_in(struct ntd_context * ctx,nir_intrinsic_instr * intr)3250 emit_load_sample_mask_in(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3251 {
3252 const struct dxil_value *value = call_unary_external_function(ctx,
3253 "dx.op.coverage", DXIL_INTR_COVERAGE, DXIL_I32);
3254
3255 /* Mask coverage with (1 << sample index). Note, done as an AND to handle extrapolation cases. */
3256 if (ctx->mod.info.has_per_sample_input) {
3257 value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND, value,
3258 dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL,
3259 dxil_module_get_int32_const(&ctx->mod, 1),
3260 call_unary_external_function(ctx, "dx.op.sampleIndex", DXIL_INTR_SAMPLE_INDEX, DXIL_I32), 0), 0);
3261 }
3262
3263 store_def(ctx, &intr->def, 0, value);
3264 return true;
3265 }
3266
3267 static bool
emit_load_tess_coord(struct ntd_context * ctx,nir_intrinsic_instr * intr)3268 emit_load_tess_coord(struct ntd_context *ctx,
3269 nir_intrinsic_instr *intr)
3270 {
3271 const struct dxil_func *func =
3272 dxil_get_function(&ctx->mod, "dx.op.domainLocation", DXIL_F32);
3273 if (!func)
3274 return false;
3275
3276 const struct dxil_value *opcode =
3277 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DOMAIN_LOCATION);
3278 if (!opcode)
3279 return false;
3280
3281 unsigned num_coords = ctx->shader->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES ? 3 : 2;
3282 for (unsigned i = 0; i < num_coords; ++i) {
3283 unsigned component_idx = i;
3284
3285 const struct dxil_value *component = dxil_module_get_int8_const(&ctx->mod, component_idx);
3286 if (!component)
3287 return false;
3288
3289 const struct dxil_value *args[] = { opcode, component };
3290
3291 const struct dxil_value *value =
3292 dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3293 store_def(ctx, &intr->def, i, value);
3294 }
3295
3296 for (unsigned i = num_coords; i < intr->def.num_components; ++i) {
3297 const struct dxil_value *value = dxil_module_get_float_const(&ctx->mod, 0.0f);
3298 store_def(ctx, &intr->def, i, value);
3299 }
3300
3301 return true;
3302 }
3303
3304 static const struct dxil_value *
get_int32_undef(struct dxil_module * m)3305 get_int32_undef(struct dxil_module *m)
3306 {
3307 const struct dxil_type *int32_type =
3308 dxil_module_get_int_type(m, 32);
3309 if (!int32_type)
3310 return NULL;
3311
3312 return dxil_module_get_undef(m, int32_type);
3313 }
3314
3315 static const struct dxil_value *
get_resource_handle(struct ntd_context * ctx,nir_src * src,enum dxil_resource_class class,enum dxil_resource_kind kind)3316 get_resource_handle(struct ntd_context *ctx, nir_src *src, enum dxil_resource_class class,
3317 enum dxil_resource_kind kind)
3318 {
3319 /* This source might be one of:
3320 * 1. Constant resource index - just look it up in precomputed handle arrays
3321 * If it's null in that array, create a handle
3322 * 2. A handle from load_vulkan_descriptor - just get the stored SSA value
3323 * 3. Dynamic resource index - create a handle for it here
3324 */
3325 assert(src->ssa->num_components == 1 && src->ssa->bit_size == 32);
3326 nir_const_value *const_block_index = nir_src_as_const_value(*src);
3327 const struct dxil_value *handle_entry = NULL;
3328 if (const_block_index) {
3329 assert(ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN);
3330 switch (kind) {
3331 case DXIL_RESOURCE_KIND_CBUFFER:
3332 handle_entry = ctx->cbv_handles[const_block_index->u32];
3333 break;
3334 case DXIL_RESOURCE_KIND_RAW_BUFFER:
3335 if (class == DXIL_RESOURCE_CLASS_UAV)
3336 handle_entry = ctx->ssbo_handles[const_block_index->u32];
3337 else
3338 handle_entry = ctx->srv_handles[const_block_index->u32];
3339 break;
3340 case DXIL_RESOURCE_KIND_SAMPLER:
3341 handle_entry = ctx->sampler_handles[const_block_index->u32];
3342 break;
3343 default:
3344 if (class == DXIL_RESOURCE_CLASS_UAV)
3345 handle_entry = ctx->image_handles[const_block_index->u32];
3346 else
3347 handle_entry = ctx->srv_handles[const_block_index->u32];
3348 break;
3349 }
3350 }
3351
3352 if (handle_entry)
3353 return handle_entry;
3354
3355 if (nir_src_as_deref(*src) ||
3356 ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
3357 return get_src_ssa(ctx, src->ssa, 0);
3358 }
3359
3360 unsigned space = 0;
3361 if (ctx->opts->environment == DXIL_ENVIRONMENT_GL &&
3362 class == DXIL_RESOURCE_CLASS_UAV) {
3363 if (kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
3364 space = 2;
3365 else
3366 space = 1;
3367 }
3368
3369 /* The base binding here will almost always be zero. The only cases where we end
3370 * up in this type of dynamic indexing are:
3371 * 1. GL UBOs
3372 * 2. GL SSBOs
3373 * 3. CL SSBOs
3374 * In all cases except GL UBOs, the resources are a single zero-based array.
3375 * In that case, the base is 1, because uniforms use 0 and cannot by dynamically
3376 * indexed. All other cases should either fall into static indexing (first early return),
3377 * deref-based dynamic handle creation (images, or Vulkan textures/samplers), or
3378 * load_vulkan_descriptor handle creation.
3379 */
3380 unsigned base_binding = 0;
3381 if (ctx->shader->info.first_ubo_is_default_ubo &&
3382 class == DXIL_RESOURCE_CLASS_CBV)
3383 base_binding = 1;
3384
3385 const struct dxil_value *value = get_src(ctx, src, 0, nir_type_uint);
3386 const struct dxil_value *handle = emit_createhandle_call_dynamic(ctx, class,
3387 space, base_binding, value, !const_block_index);
3388
3389 return handle;
3390 }
3391
3392 static const struct dxil_value *
create_image_handle(struct ntd_context * ctx,nir_intrinsic_instr * image_intr)3393 create_image_handle(struct ntd_context *ctx, nir_intrinsic_instr *image_intr)
3394 {
3395 const struct dxil_value *unannotated_handle =
3396 emit_createhandle_heap(ctx, get_src(ctx, &image_intr->src[0], 0, nir_type_uint32), false, true /*TODO: divergence*/);
3397 const struct dxil_value *res_props =
3398 dxil_module_get_uav_res_props_const(&ctx->mod, image_intr);
3399
3400 if (!unannotated_handle || !res_props)
3401 return NULL;
3402
3403 return emit_annotate_handle(ctx, unannotated_handle, res_props);
3404 }
3405
3406 static const struct dxil_value *
create_srv_handle(struct ntd_context * ctx,nir_tex_instr * tex,nir_src * src)3407 create_srv_handle(struct ntd_context *ctx, nir_tex_instr *tex, nir_src *src)
3408 {
3409 const struct dxil_value *unannotated_handle =
3410 emit_createhandle_heap(ctx, get_src(ctx, src, 0, nir_type_uint32), false, true /*TODO: divergence*/);
3411 const struct dxil_value *res_props =
3412 dxil_module_get_srv_res_props_const(&ctx->mod, tex);
3413
3414 if (!unannotated_handle || !res_props)
3415 return NULL;
3416
3417 return emit_annotate_handle(ctx, unannotated_handle, res_props);
3418 }
3419
3420 static const struct dxil_value *
create_sampler_handle(struct ntd_context * ctx,bool is_shadow,nir_src * src)3421 create_sampler_handle(struct ntd_context *ctx, bool is_shadow, nir_src *src)
3422 {
3423 const struct dxil_value *unannotated_handle =
3424 emit_createhandle_heap(ctx, get_src(ctx, src, 0, nir_type_uint32), true, true /*TODO: divergence*/);
3425 const struct dxil_value *res_props =
3426 dxil_module_get_sampler_res_props_const(&ctx->mod, is_shadow);
3427
3428 if (!unannotated_handle || !res_props)
3429 return NULL;
3430
3431 return emit_annotate_handle(ctx, unannotated_handle, res_props);
3432 }
3433
3434 static bool
emit_load_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)3435 emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3436 {
3437 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3438
3439 enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
3440 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
3441 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
3442 if (var && var->data.access & ACCESS_NON_WRITEABLE)
3443 class = DXIL_RESOURCE_CLASS_SRV;
3444 }
3445
3446 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
3447 const struct dxil_value *offset =
3448 get_src(ctx, &intr->src[1], 0, nir_type_uint);
3449 if (!int32_undef || !handle || !offset)
3450 return false;
3451
3452 assert(nir_src_bit_size(intr->src[0]) == 32);
3453 assert(nir_intrinsic_dest_components(intr) <= 4);
3454
3455 const struct dxil_value *coord[2] = {
3456 offset,
3457 int32_undef
3458 };
3459
3460 enum overload_type overload = get_ambiguous_overload_alu_type(ctx, intr, nir_type_uint);
3461 const struct dxil_value *load = ctx->mod.minor_version >= 2 ?
3462 emit_raw_bufferload_call(ctx, handle, coord,
3463 overload,
3464 nir_intrinsic_dest_components(intr),
3465 intr->def.bit_size / 8) :
3466 emit_bufferload_call(ctx, handle, coord, overload);
3467 if (!load)
3468 return false;
3469
3470 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3471 const struct dxil_value *val =
3472 dxil_emit_extractval(&ctx->mod, load, i);
3473 if (!val)
3474 return false;
3475 store_def(ctx, &intr->def, i, val);
3476 }
3477 if (intr->def.bit_size == 16)
3478 ctx->mod.feats.native_low_precision = true;
3479 return true;
3480 }
3481
3482 static bool
emit_store_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)3483 emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3484 {
3485 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[1], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
3486 const struct dxil_value *offset =
3487 get_src(ctx, &intr->src[2], 0, nir_type_uint);
3488 if (!handle || !offset)
3489 return false;
3490
3491 unsigned num_components = nir_src_num_components(intr->src[0]);
3492 assert(num_components <= 4);
3493 if (nir_src_bit_size(intr->src[0]) == 16)
3494 ctx->mod.feats.native_low_precision = true;
3495
3496 nir_alu_type type =
3497 dxil_type_to_nir_type(dxil_value_get_type(get_src_ssa(ctx, intr->src[0].ssa, 0)));
3498 const struct dxil_value *value[4] = { 0 };
3499 for (unsigned i = 0; i < num_components; ++i) {
3500 value[i] = get_src(ctx, &intr->src[0], i, type);
3501 if (!value[i])
3502 return false;
3503 }
3504
3505 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3506 if (!int32_undef)
3507 return false;
3508
3509 const struct dxil_value *coord[2] = {
3510 offset,
3511 int32_undef
3512 };
3513
3514 enum overload_type overload = get_overload(type, intr->src[0].ssa->bit_size);
3515 if (num_components < 4) {
3516 const struct dxil_value *value_undef = dxil_module_get_undef(&ctx->mod, dxil_value_get_type(value[0]));
3517 if (!value_undef)
3518 return false;
3519
3520 for (int i = num_components; i < 4; ++i)
3521 value[i] = value_undef;
3522 }
3523
3524 const struct dxil_value *write_mask =
3525 dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
3526 if (!write_mask)
3527 return false;
3528
3529 return ctx->mod.minor_version >= 2 ?
3530 emit_raw_bufferstore_call(ctx, handle, coord, value, write_mask, overload, intr->src[0].ssa->bit_size / 8) :
3531 emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
3532 }
3533
3534 static bool
emit_load_ubo_vec4(struct ntd_context * ctx,nir_intrinsic_instr * intr)3535 emit_load_ubo_vec4(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3536 {
3537 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER);
3538 const struct dxil_value *offset =
3539 get_src(ctx, &intr->src[1], 0, nir_type_uint);
3540
3541 if (!handle || !offset)
3542 return false;
3543
3544 enum overload_type overload = get_ambiguous_overload_alu_type(ctx, intr, nir_type_uint);
3545 const struct dxil_value *agg = load_ubo(ctx, handle, offset, overload);
3546 if (!agg)
3547 return false;
3548
3549 unsigned first_component = nir_intrinsic_has_component(intr) ?
3550 nir_intrinsic_component(intr) : 0;
3551 for (unsigned i = 0; i < intr->def.num_components; i++)
3552 store_def(ctx, &intr->def, i,
3553 dxil_emit_extractval(&ctx->mod, agg, i + first_component));
3554
3555 if (intr->def.bit_size == 16)
3556 ctx->mod.feats.native_low_precision = true;
3557 return true;
3558 }
3559
3560 /* Need to add patch-ness as a matching parameter, since driver_location is *not* unique
3561 * between control points and patch variables in HS/DS
3562 */
3563 static nir_variable *
find_patch_matching_variable_by_driver_location(nir_shader * s,nir_variable_mode mode,unsigned driver_location,bool patch)3564 find_patch_matching_variable_by_driver_location(nir_shader *s, nir_variable_mode mode, unsigned driver_location, bool patch)
3565 {
3566 nir_foreach_variable_with_modes(var, s, mode) {
3567 if (var->data.driver_location == driver_location &&
3568 var->data.patch == patch)
3569 return var;
3570 }
3571 return NULL;
3572 }
3573
3574 static bool
emit_store_output_via_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3575 emit_store_output_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3576 {
3577 assert(intr->intrinsic == nir_intrinsic_store_output ||
3578 ctx->mod.shader_kind == DXIL_HULL_SHADER);
3579 bool is_patch_constant = intr->intrinsic == nir_intrinsic_store_output &&
3580 ctx->mod.shader_kind == DXIL_HULL_SHADER;
3581 nir_alu_type out_type = nir_intrinsic_src_type(intr);
3582 enum overload_type overload = get_overload(out_type, intr->src[0].ssa->bit_size);
3583 const struct dxil_func *func = dxil_get_function(&ctx->mod, is_patch_constant ?
3584 "dx.op.storePatchConstant" : "dx.op.storeOutput",
3585 overload);
3586
3587 if (!func)
3588 return false;
3589
3590 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, is_patch_constant ?
3591 DXIL_INTR_STORE_PATCH_CONSTANT : DXIL_INTR_STORE_OUTPUT);
3592 uint8_t *io_mappings = is_patch_constant ? ctx->mod.patch_mappings : ctx->mod.output_mappings;
3593 uint8_t io_index = io_mappings[nir_intrinsic_base(intr)];
3594 const struct dxil_value *output_id = dxil_module_get_int32_const(&ctx->mod, io_index);
3595 unsigned row_index = intr->intrinsic == nir_intrinsic_store_output ? 1 : 2;
3596
3597 /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3598 * generation, so muck with them here too.
3599 */
3600 nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3601 bool is_tess_level = is_patch_constant &&
3602 (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3603 semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3604
3605 const struct dxil_value *row = NULL;
3606 const struct dxil_value *col = NULL;
3607 if (is_tess_level)
3608 col = dxil_module_get_int8_const(&ctx->mod, 0);
3609 else
3610 row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3611
3612 bool success = true;
3613 uint32_t writemask = nir_intrinsic_write_mask(intr);
3614
3615 nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_out, nir_intrinsic_base(intr), is_patch_constant);
3616 unsigned var_base_component = var->data.location_frac;
3617 unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3618
3619 if (ctx->mod.minor_validator >= 5) {
3620 struct dxil_signature_record *sig_rec = is_patch_constant ?
3621 &ctx->mod.patch_consts[io_index] :
3622 &ctx->mod.outputs[io_index];
3623 unsigned comp_size = intr->src[0].ssa->bit_size == 64 ? 2 : 1;
3624 unsigned comp_mask = 0;
3625 if (is_tess_level)
3626 comp_mask = 1;
3627 else if (comp_size == 1)
3628 comp_mask = writemask << var_base_component;
3629 else {
3630 for (unsigned i = 0; i < intr->num_components; ++i)
3631 if ((writemask & (1 << i)))
3632 comp_mask |= 3 << ((i + var_base_component) * comp_size);
3633 }
3634 for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3635 sig_rec->elements[r].never_writes_mask &= ~comp_mask;
3636
3637 if (!nir_src_is_const(intr->src[row_index])) {
3638 struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3639 &ctx->mod.psv_patch_consts[io_index] :
3640 &ctx->mod.psv_outputs[io_index];
3641 psv_rec->dynamic_mask_and_stream |= comp_mask;
3642 }
3643 }
3644
3645 for (unsigned i = 0; i < intr->num_components && success; ++i) {
3646 if (writemask & (1 << i)) {
3647 if (is_tess_level)
3648 row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3649 else
3650 col = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3651 const struct dxil_value *value = get_src(ctx, &intr->src[0], i, out_type);
3652 if (!col || !row || !value)
3653 return false;
3654
3655 const struct dxil_value *args[] = {
3656 opcode, output_id, row, col, value
3657 };
3658 success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3659 }
3660 }
3661
3662 return success;
3663 }
3664
3665 static bool
emit_load_input_via_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3666 emit_load_input_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3667 {
3668 bool attr_at_vertex = false;
3669 if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER &&
3670 ctx->opts->interpolate_at_vertex &&
3671 ctx->opts->provoking_vertex != 0 &&
3672 (nir_intrinsic_dest_type(intr) & nir_type_float)) {
3673 nir_variable *var = nir_find_variable_with_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr));
3674
3675 attr_at_vertex = var && var->data.interpolation == INTERP_MODE_FLAT;
3676 }
3677
3678 bool is_patch_constant = (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER &&
3679 intr->intrinsic == nir_intrinsic_load_input) ||
3680 (ctx->mod.shader_kind == DXIL_HULL_SHADER &&
3681 intr->intrinsic == nir_intrinsic_load_output);
3682 bool is_output_control_point = intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3683
3684 unsigned opcode_val;
3685 const char *func_name;
3686 if (attr_at_vertex) {
3687 opcode_val = DXIL_INTR_ATTRIBUTE_AT_VERTEX;
3688 func_name = "dx.op.attributeAtVertex";
3689 if (ctx->mod.minor_validator >= 6)
3690 ctx->mod.feats.barycentrics = 1;
3691 } else if (is_patch_constant) {
3692 opcode_val = DXIL_INTR_LOAD_PATCH_CONSTANT;
3693 func_name = "dx.op.loadPatchConstant";
3694 } else if (is_output_control_point) {
3695 opcode_val = DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT;
3696 func_name = "dx.op.loadOutputControlPoint";
3697 } else {
3698 opcode_val = DXIL_INTR_LOAD_INPUT;
3699 func_name = "dx.op.loadInput";
3700 }
3701
3702 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3703 if (!opcode)
3704 return false;
3705
3706 uint8_t *io_mappings =
3707 is_patch_constant ? ctx->mod.patch_mappings :
3708 is_output_control_point ? ctx->mod.output_mappings :
3709 ctx->mod.input_mappings;
3710 uint8_t io_index = io_mappings[nir_intrinsic_base(intr)];
3711 const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, io_index);
3712 if (!input_id)
3713 return false;
3714
3715 bool is_per_vertex =
3716 intr->intrinsic == nir_intrinsic_load_per_vertex_input ||
3717 intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3718 int row_index = is_per_vertex ? 1 : 0;
3719 const struct dxil_value *vertex_id = NULL;
3720 if (!is_patch_constant) {
3721 if (is_per_vertex) {
3722 vertex_id = get_src(ctx, &intr->src[0], 0, nir_type_int);
3723 } else if (attr_at_vertex) {
3724 vertex_id = dxil_module_get_int8_const(&ctx->mod, ctx->opts->provoking_vertex);
3725 } else {
3726 const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
3727 if (!int32_type)
3728 return false;
3729
3730 vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);
3731 }
3732 if (!vertex_id)
3733 return false;
3734 }
3735
3736 /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3737 * generation, so muck with them here too.
3738 */
3739 nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3740 bool is_tess_level = is_patch_constant &&
3741 (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3742 semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3743
3744 const struct dxil_value *row = NULL;
3745 const struct dxil_value *comp = NULL;
3746 if (is_tess_level)
3747 comp = dxil_module_get_int8_const(&ctx->mod, 0);
3748 else
3749 row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3750
3751 nir_alu_type out_type = nir_intrinsic_dest_type(intr);
3752 enum overload_type overload = get_overload(out_type, intr->def.bit_size);
3753
3754 const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, overload);
3755
3756 if (!func)
3757 return false;
3758
3759 nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), is_patch_constant);
3760 unsigned var_base_component = var ? var->data.location_frac : 0;
3761 unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3762
3763 if (ctx->mod.minor_validator >= 5 &&
3764 !is_output_control_point &&
3765 intr->intrinsic != nir_intrinsic_load_output) {
3766 struct dxil_signature_record *sig_rec = is_patch_constant ?
3767 &ctx->mod.patch_consts[io_index] :
3768 &ctx->mod.inputs[io_index];
3769 unsigned comp_size = intr->def.bit_size == 64 ? 2 : 1;
3770 unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3771 comp_mask <<= (var_base_component * comp_size);
3772 if (is_tess_level)
3773 comp_mask = 1;
3774 for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3775 sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3776
3777 if (!nir_src_is_const(intr->src[row_index])) {
3778 struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3779 &ctx->mod.psv_patch_consts[io_index] :
3780 &ctx->mod.psv_inputs[io_index];
3781 psv_rec->dynamic_mask_and_stream |= comp_mask;
3782 }
3783 }
3784
3785 for (unsigned i = 0; i < intr->num_components; ++i) {
3786 if (is_tess_level)
3787 row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3788 else
3789 comp = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3790
3791 if (!row || !comp)
3792 return false;
3793
3794 const struct dxil_value *args[] = {
3795 opcode, input_id, row, comp, vertex_id
3796 };
3797
3798 unsigned num_args = ARRAY_SIZE(args) - (is_patch_constant ? 1 : 0);
3799 const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3800 if (!retval)
3801 return false;
3802 store_def(ctx, &intr->def, i, retval);
3803 }
3804 return true;
3805 }
3806
3807 static bool
emit_load_interpolated_input(struct ntd_context * ctx,nir_intrinsic_instr * intr)3808 emit_load_interpolated_input(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3809 {
3810 nir_intrinsic_instr *barycentric = nir_src_as_intrinsic(intr->src[0]);
3811
3812 const struct dxil_value *args[6] = { 0 };
3813
3814 unsigned opcode_val;
3815 const char *func_name;
3816 unsigned num_args;
3817 switch (barycentric->intrinsic) {
3818 case nir_intrinsic_load_barycentric_at_offset:
3819 opcode_val = DXIL_INTR_EVAL_SNAPPED;
3820 func_name = "dx.op.evalSnapped";
3821 num_args = 6;
3822 for (unsigned i = 0; i < 2; ++i) {
3823 const struct dxil_value *float_offset = get_src(ctx, &barycentric->src[0], i, nir_type_float);
3824 /* GLSL uses [-0.5f, 0.5f), DXIL uses (-8, 7) */
3825 const struct dxil_value *offset_16 = dxil_emit_binop(&ctx->mod,
3826 DXIL_BINOP_MUL, float_offset, dxil_module_get_float_const(&ctx->mod, 16.0f), 0);
3827 args[i + 4] = dxil_emit_cast(&ctx->mod, DXIL_CAST_FPTOSI,
3828 dxil_module_get_int_type(&ctx->mod, 32), offset_16);
3829 }
3830 break;
3831 case nir_intrinsic_load_barycentric_pixel:
3832 opcode_val = DXIL_INTR_EVAL_SNAPPED;
3833 func_name = "dx.op.evalSnapped";
3834 num_args = 6;
3835 args[4] = args[5] = dxil_module_get_int32_const(&ctx->mod, 0);
3836 break;
3837 case nir_intrinsic_load_barycentric_at_sample:
3838 opcode_val = DXIL_INTR_EVAL_SAMPLE_INDEX;
3839 func_name = "dx.op.evalSampleIndex";
3840 num_args = 5;
3841 args[4] = get_src(ctx, &barycentric->src[0], 0, nir_type_int);
3842 break;
3843 case nir_intrinsic_load_barycentric_centroid:
3844 opcode_val = DXIL_INTR_EVAL_CENTROID;
3845 func_name = "dx.op.evalCentroid";
3846 num_args = 4;
3847 break;
3848 default:
3849 unreachable("Unsupported interpolation barycentric intrinsic");
3850 }
3851 uint8_t io_index = ctx->mod.input_mappings[nir_intrinsic_base(intr)];
3852 args[0] = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3853 args[1] = dxil_module_get_int32_const(&ctx->mod, io_index);
3854 args[2] = get_src(ctx, &intr->src[1], 0, nir_type_int);
3855
3856 const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, DXIL_F32);
3857
3858 if (!func)
3859 return false;
3860
3861 nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), false);
3862 unsigned var_base_component = var ? var->data.location_frac : 0;
3863 unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3864
3865 if (ctx->mod.minor_validator >= 5) {
3866 struct dxil_signature_record *sig_rec = &ctx->mod.inputs[io_index];
3867 unsigned comp_size = intr->def.bit_size == 64 ? 2 : 1;
3868 unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3869 comp_mask <<= (var_base_component * comp_size);
3870 for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3871 sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3872
3873 if (!nir_src_is_const(intr->src[1])) {
3874 struct dxil_psv_signature_element *psv_rec = &ctx->mod.psv_inputs[io_index];
3875 psv_rec->dynamic_mask_and_stream |= comp_mask;
3876 }
3877 }
3878
3879 for (unsigned i = 0; i < intr->num_components; ++i) {
3880 args[3] = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3881
3882 const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3883 if (!retval)
3884 return false;
3885 store_def(ctx, &intr->def, i, retval);
3886 }
3887 return true;
3888 }
3889
3890 static const struct dxil_value *
deref_to_gep(struct ntd_context * ctx,nir_deref_instr * deref)3891 deref_to_gep(struct ntd_context *ctx, nir_deref_instr *deref)
3892 {
3893 nir_deref_path path;
3894 nir_deref_path_init(&path, deref, ctx->ralloc_ctx);
3895 assert(path.path[0]->deref_type == nir_deref_type_var);
3896 uint32_t count = 0;
3897 while (path.path[count])
3898 ++count;
3899
3900 const struct dxil_value **gep_indices = ralloc_array(ctx->ralloc_ctx,
3901 const struct dxil_value *,
3902 count + 1);
3903 nir_variable *var = path.path[0]->var;
3904 const struct dxil_value **var_array;
3905 switch (deref->modes) {
3906 case nir_var_mem_constant: var_array = ctx->consts; break;
3907 case nir_var_mem_shared: var_array = ctx->sharedvars; break;
3908 case nir_var_function_temp: var_array = ctx->scratchvars; break;
3909 default: unreachable("Invalid deref mode");
3910 }
3911 gep_indices[0] = var_array[var->data.driver_location];
3912
3913 for (uint32_t i = 0; i < count; ++i)
3914 gep_indices[i + 1] = get_src_ssa(ctx, &path.path[i]->def, 0);
3915
3916 return dxil_emit_gep_inbounds(&ctx->mod, gep_indices, count + 1);
3917 }
3918
3919 static bool
emit_load_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3920 emit_load_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3921 {
3922 const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3923 if (!ptr)
3924 return false;
3925
3926 const struct dxil_value *retval =
3927 dxil_emit_load(&ctx->mod, ptr, intr->def.bit_size / 8, false);
3928 if (!retval)
3929 return false;
3930
3931 store_def(ctx, &intr->def, 0, retval);
3932 return true;
3933 }
3934
3935 static bool
emit_store_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3936 emit_store_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3937 {
3938 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
3939 const struct dxil_value *ptr = deref_to_gep(ctx, deref);
3940 if (!ptr)
3941 return false;
3942
3943 const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, nir_get_nir_type_for_glsl_type(deref->type));
3944 return dxil_emit_store(&ctx->mod, value, ptr, nir_src_bit_size(intr->src[1]) / 8, false);
3945 }
3946
3947 static bool
emit_atomic_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3948 emit_atomic_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3949 {
3950 const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3951 if (!ptr)
3952 return false;
3953
3954 const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3955 if (!value)
3956 return false;
3957
3958 enum dxil_rmw_op dxil_op = nir_atomic_to_dxil_rmw(nir_intrinsic_atomic_op(intr));
3959 const struct dxil_value *retval = dxil_emit_atomicrmw(&ctx->mod, value, ptr, dxil_op, false,
3960 DXIL_ATOMIC_ORDERING_ACQREL,
3961 DXIL_SYNC_SCOPE_CROSSTHREAD);
3962 if (!retval)
3963 return false;
3964
3965 store_def(ctx, &intr->def, 0, retval);
3966 return true;
3967 }
3968
3969 static bool
emit_atomic_deref_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)3970 emit_atomic_deref_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3971 {
3972 const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3973 if (!ptr)
3974 return false;
3975
3976 const struct dxil_value *cmp = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3977 const struct dxil_value *value = get_src(ctx, &intr->src[2], 0, nir_type_uint);
3978 if (!value)
3979 return false;
3980
3981 const struct dxil_value *retval = dxil_emit_cmpxchg(&ctx->mod, cmp, value, ptr, false,
3982 DXIL_ATOMIC_ORDERING_ACQREL,
3983 DXIL_SYNC_SCOPE_CROSSTHREAD);
3984 if (!retval)
3985 return false;
3986
3987 store_def(ctx, &intr->def, 0, retval);
3988 return true;
3989 }
3990
3991 static bool
emit_discard_if_with_value(struct ntd_context * ctx,const struct dxil_value * value)3992 emit_discard_if_with_value(struct ntd_context *ctx, const struct dxil_value *value)
3993 {
3994 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DISCARD);
3995 if (!opcode)
3996 return false;
3997
3998 const struct dxil_value *args[] = {
3999 opcode,
4000 value
4001 };
4002
4003 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.discard", DXIL_NONE);
4004 if (!func)
4005 return false;
4006
4007 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4008 }
4009
4010 static bool
emit_discard_if(struct ntd_context * ctx,nir_intrinsic_instr * intr)4011 emit_discard_if(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4012 {
4013 const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_bool);
4014 if (!value)
4015 return false;
4016
4017 return emit_discard_if_with_value(ctx, value);
4018 }
4019
4020 static bool
emit_discard(struct ntd_context * ctx)4021 emit_discard(struct ntd_context *ctx)
4022 {
4023 const struct dxil_value *value = dxil_module_get_int1_const(&ctx->mod, true);
4024 return emit_discard_if_with_value(ctx, value);
4025 }
4026
4027 static bool
emit_emit_vertex(struct ntd_context * ctx,nir_intrinsic_instr * intr)4028 emit_emit_vertex(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4029 {
4030 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_EMIT_STREAM);
4031 const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
4032 if (!opcode || !stream_id)
4033 return false;
4034
4035 const struct dxil_value *args[] = {
4036 opcode,
4037 stream_id
4038 };
4039
4040 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.emitStream", DXIL_NONE);
4041 if (!func)
4042 return false;
4043
4044 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4045 }
4046
4047 static bool
emit_end_primitive(struct ntd_context * ctx,nir_intrinsic_instr * intr)4048 emit_end_primitive(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4049 {
4050 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CUT_STREAM);
4051 const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
4052 if (!opcode || !stream_id)
4053 return false;
4054
4055 const struct dxil_value *args[] = {
4056 opcode,
4057 stream_id
4058 };
4059
4060 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cutStream", DXIL_NONE);
4061 if (!func)
4062 return false;
4063
4064 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4065 }
4066
4067 static bool
emit_image_store(struct ntd_context * ctx,nir_intrinsic_instr * intr)4068 emit_image_store(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4069 {
4070 const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_store ?
4071 create_image_handle(ctx, intr) :
4072 get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4073 if (!handle)
4074 return false;
4075
4076 bool is_array = false;
4077 if (intr->intrinsic == nir_intrinsic_image_deref_store)
4078 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4079 else
4080 is_array = nir_intrinsic_image_array(intr);
4081
4082 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4083 if (!int32_undef)
4084 return false;
4085
4086 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4087 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_store ?
4088 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4089 nir_intrinsic_image_dim(intr);
4090 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4091 if (is_array)
4092 ++num_coords;
4093
4094 assert(num_coords <= nir_src_num_components(intr->src[1]));
4095 for (unsigned i = 0; i < num_coords; ++i) {
4096 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4097 if (!coord[i])
4098 return false;
4099 }
4100
4101 nir_alu_type in_type = nir_intrinsic_src_type(intr);
4102 enum overload_type overload = get_overload(in_type, 32);
4103
4104 assert(nir_src_bit_size(intr->src[3]) == 32);
4105 unsigned num_components = nir_src_num_components(intr->src[3]);
4106 assert(num_components <= 4);
4107 const struct dxil_value *value[4];
4108 for (unsigned i = 0; i < num_components; ++i) {
4109 value[i] = get_src(ctx, &intr->src[3], i, in_type);
4110 if (!value[i])
4111 return false;
4112 }
4113
4114 for (int i = num_components; i < 4; ++i)
4115 value[i] = dxil_module_get_undef(&ctx->mod, dxil_value_get_type(value[0]));
4116
4117 const struct dxil_value *write_mask =
4118 dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
4119 if (!write_mask)
4120 return false;
4121
4122 if (image_dim == GLSL_SAMPLER_DIM_BUF) {
4123 coord[1] = int32_undef;
4124 return emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
4125 } else
4126 return emit_texturestore_call(ctx, handle, coord, value, write_mask, overload);
4127 }
4128
4129 static bool
emit_image_load(struct ntd_context * ctx,nir_intrinsic_instr * intr)4130 emit_image_load(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4131 {
4132 const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_load ?
4133 create_image_handle(ctx, intr) :
4134 get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4135 if (!handle)
4136 return false;
4137
4138 bool is_array = false;
4139 if (intr->intrinsic == nir_intrinsic_image_deref_load)
4140 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4141 else
4142 is_array = nir_intrinsic_image_array(intr);
4143
4144 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4145 if (!int32_undef)
4146 return false;
4147
4148 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4149 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_load ?
4150 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4151 nir_intrinsic_image_dim(intr);
4152 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4153 if (is_array)
4154 ++num_coords;
4155
4156 assert(num_coords <= nir_src_num_components(intr->src[1]));
4157 for (unsigned i = 0; i < num_coords; ++i) {
4158 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4159 if (!coord[i])
4160 return false;
4161 }
4162
4163 nir_alu_type out_type = nir_intrinsic_dest_type(intr);
4164 enum overload_type overload = get_overload(out_type, 32);
4165
4166 const struct dxil_value *load_result;
4167 if (image_dim == GLSL_SAMPLER_DIM_BUF) {
4168 coord[1] = int32_undef;
4169 load_result = emit_bufferload_call(ctx, handle, coord, overload);
4170 } else
4171 load_result = emit_textureload_call(ctx, handle, coord, overload);
4172
4173 if (!load_result)
4174 return false;
4175
4176 assert(intr->def.bit_size == 32);
4177 unsigned num_components = intr->def.num_components;
4178 assert(num_components <= 4);
4179 for (unsigned i = 0; i < num_components; ++i) {
4180 const struct dxil_value *component = dxil_emit_extractval(&ctx->mod, load_result, i);
4181 if (!component)
4182 return false;
4183 store_def(ctx, &intr->def, i, component);
4184 }
4185
4186 if (util_format_get_nr_components(nir_intrinsic_format(intr)) > 1)
4187 ctx->mod.feats.typed_uav_load_additional_formats = true;
4188
4189 return true;
4190 }
4191
4192 static bool
emit_image_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4193 emit_image_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4194 {
4195 const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_atomic ?
4196 create_image_handle(ctx, intr) :
4197 get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4198 if (!handle)
4199 return false;
4200
4201 bool is_array = false;
4202 if (intr->intrinsic == nir_intrinsic_image_deref_atomic)
4203 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4204 else
4205 is_array = nir_intrinsic_image_array(intr);
4206
4207 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4208 if (!int32_undef)
4209 return false;
4210
4211 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4212 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_atomic ?
4213 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4214 nir_intrinsic_image_dim(intr);
4215 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4216 if (is_array)
4217 ++num_coords;
4218
4219 assert(num_coords <= nir_src_num_components(intr->src[1]));
4220 for (unsigned i = 0; i < num_coords; ++i) {
4221 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4222 if (!coord[i])
4223 return false;
4224 }
4225
4226 nir_atomic_op nir_op = nir_intrinsic_atomic_op(intr);
4227 enum dxil_atomic_op dxil_op = nir_atomic_to_dxil_atomic(nir_op);
4228 nir_alu_type type = nir_atomic_op_type(nir_op);
4229 const struct dxil_value *value = get_src(ctx, &intr->src[3], 0, type);
4230 if (!value)
4231 return false;
4232
4233 const struct dxil_value *retval =
4234 emit_atomic_binop(ctx, handle, dxil_op, coord, value);
4235
4236 if (!retval)
4237 return false;
4238
4239 store_def(ctx, &intr->def, 0, retval);
4240 return true;
4241 }
4242
4243 static bool
emit_image_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)4244 emit_image_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4245 {
4246 const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_atomic_swap ?
4247 create_image_handle(ctx, intr) :
4248 get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4249 if (!handle)
4250 return false;
4251
4252 bool is_array = false;
4253 if (intr->intrinsic == nir_intrinsic_image_deref_atomic_swap)
4254 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4255 else
4256 is_array = nir_intrinsic_image_array(intr);
4257
4258 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4259 if (!int32_undef)
4260 return false;
4261
4262 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4263 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ?
4264 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4265 nir_intrinsic_image_dim(intr);
4266 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4267 if (is_array)
4268 ++num_coords;
4269
4270 assert(num_coords <= nir_src_num_components(intr->src[1]));
4271 for (unsigned i = 0; i < num_coords; ++i) {
4272 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4273 if (!coord[i])
4274 return false;
4275 }
4276
4277 const struct dxil_value *cmpval = get_src(ctx, &intr->src[3], 0, nir_type_uint);
4278 const struct dxil_value *newval = get_src(ctx, &intr->src[4], 0, nir_type_uint);
4279 if (!cmpval || !newval)
4280 return false;
4281
4282 const struct dxil_value *retval =
4283 emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
4284
4285 if (!retval)
4286 return false;
4287
4288 store_def(ctx, &intr->def, 0, retval);
4289 return true;
4290 }
4291
4292 struct texop_parameters {
4293 const struct dxil_value *tex;
4294 const struct dxil_value *sampler;
4295 const struct dxil_value *bias, *lod_or_sample, *min_lod;
4296 const struct dxil_value *coord[4], *offset[3], *dx[3], *dy[3];
4297 const struct dxil_value *cmp;
4298 enum overload_type overload;
4299 };
4300
4301 static const struct dxil_value *
emit_texture_size(struct ntd_context * ctx,struct texop_parameters * params)4302 emit_texture_size(struct ntd_context *ctx, struct texop_parameters *params)
4303 {
4304 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.getDimensions", DXIL_NONE);
4305 if (!func)
4306 return false;
4307
4308 const struct dxil_value *args[] = {
4309 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_SIZE),
4310 params->tex,
4311 params->lod_or_sample
4312 };
4313
4314 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4315 }
4316
4317 static bool
emit_image_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)4318 emit_image_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4319 {
4320 const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_size ?
4321 create_image_handle(ctx, intr) :
4322 get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4323 if (!handle)
4324 return false;
4325
4326 enum glsl_sampler_dim sampler_dim = intr->intrinsic == nir_intrinsic_image_deref_size ?
4327 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4328 nir_intrinsic_image_dim(intr);
4329 const struct dxil_value *lod = sampler_dim == GLSL_SAMPLER_DIM_BUF ?
4330 dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32)) :
4331 get_src(ctx, &intr->src[1], 0, nir_type_uint);
4332 if (!lod)
4333 return false;
4334
4335 struct texop_parameters params = {
4336 .tex = handle,
4337 .lod_or_sample = lod
4338 };
4339 const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms);
4340 if (!dimensions)
4341 return false;
4342
4343 for (unsigned i = 0; i < intr->def.num_components; ++i) {
4344 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, i);
4345 store_def(ctx, &intr->def, i, retval);
4346 }
4347
4348 return true;
4349 }
4350
4351 static bool
emit_get_ssbo_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)4352 emit_get_ssbo_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4353 {
4354 enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
4355 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
4356 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
4357 if (var && var->data.access & ACCESS_NON_WRITEABLE)
4358 class = DXIL_RESOURCE_CLASS_SRV;
4359 }
4360
4361 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
4362 if (!handle)
4363 return false;
4364
4365 struct texop_parameters params = {
4366 .tex = handle,
4367 .lod_or_sample = dxil_module_get_undef(
4368 &ctx->mod, dxil_module_get_int_type(&ctx->mod, 32))
4369 };
4370
4371 const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms);
4372 if (!dimensions)
4373 return false;
4374
4375 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, 0);
4376 store_def(ctx, &intr->def, 0, retval);
4377
4378 return true;
4379 }
4380
4381 static bool
emit_ssbo_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4382 emit_ssbo_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4383 {
4384 nir_atomic_op nir_op = nir_intrinsic_atomic_op(intr);
4385 enum dxil_atomic_op dxil_op = nir_atomic_to_dxil_atomic(nir_op);
4386 nir_alu_type type = nir_atomic_op_type(nir_op);
4387 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
4388 const struct dxil_value *offset =
4389 get_src(ctx, &intr->src[1], 0, nir_type_uint);
4390 const struct dxil_value *value =
4391 get_src(ctx, &intr->src[2], 0, type);
4392
4393 if (!value || !handle || !offset)
4394 return false;
4395
4396 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4397 if (!int32_undef)
4398 return false;
4399
4400 const struct dxil_value *coord[3] = {
4401 offset, int32_undef, int32_undef
4402 };
4403
4404 const struct dxil_value *retval =
4405 emit_atomic_binop(ctx, handle, dxil_op, coord, value);
4406
4407 if (!retval)
4408 return false;
4409
4410 store_def(ctx, &intr->def, 0, retval);
4411 return true;
4412 }
4413
4414 static bool
emit_ssbo_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)4415 emit_ssbo_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4416 {
4417 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
4418 const struct dxil_value *offset =
4419 get_src(ctx, &intr->src[1], 0, nir_type_uint);
4420 const struct dxil_value *cmpval =
4421 get_src(ctx, &intr->src[2], 0, nir_type_int);
4422 const struct dxil_value *newval =
4423 get_src(ctx, &intr->src[3], 0, nir_type_int);
4424
4425 if (!cmpval || !newval || !handle || !offset)
4426 return false;
4427
4428 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4429 if (!int32_undef)
4430 return false;
4431
4432 const struct dxil_value *coord[3] = {
4433 offset, int32_undef, int32_undef
4434 };
4435
4436 const struct dxil_value *retval =
4437 emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
4438
4439 if (!retval)
4440 return false;
4441
4442 store_def(ctx, &intr->def, 0, retval);
4443 return true;
4444 }
4445
4446 static bool
emit_vulkan_resource_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)4447 emit_vulkan_resource_index(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4448 {
4449 unsigned int binding = nir_intrinsic_binding(intr);
4450
4451 bool const_index = nir_src_is_const(intr->src[0]);
4452 if (const_index) {
4453 binding += nir_src_as_const_value(intr->src[0])->u32;
4454 }
4455
4456 const struct dxil_value *index_value = dxil_module_get_int32_const(&ctx->mod, binding);
4457 if (!index_value)
4458 return false;
4459
4460 if (!const_index) {
4461 const struct dxil_value *offset = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4462 if (!offset)
4463 return false;
4464
4465 index_value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, index_value, offset, 0);
4466 if (!index_value)
4467 return false;
4468 }
4469
4470 store_def(ctx, &intr->def, 0, index_value);
4471 store_def(ctx, &intr->def, 1, dxil_module_get_int32_const(&ctx->mod, 0));
4472 return true;
4473 }
4474
4475 static bool
emit_load_vulkan_descriptor(struct ntd_context * ctx,nir_intrinsic_instr * intr)4476 emit_load_vulkan_descriptor(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4477 {
4478 nir_intrinsic_instr* index = nir_src_as_intrinsic(intr->src[0]);
4479 const struct dxil_value *handle = NULL;
4480
4481 enum dxil_resource_class resource_class;
4482 enum dxil_resource_kind resource_kind;
4483 switch (nir_intrinsic_desc_type(intr)) {
4484 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4485 resource_class = DXIL_RESOURCE_CLASS_CBV;
4486 resource_kind = DXIL_RESOURCE_KIND_CBUFFER;
4487 break;
4488 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4489 resource_class = DXIL_RESOURCE_CLASS_UAV;
4490 resource_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
4491 break;
4492 default:
4493 unreachable("unknown descriptor type");
4494 return false;
4495 }
4496
4497 if (index && index->intrinsic == nir_intrinsic_vulkan_resource_index) {
4498 unsigned binding = nir_intrinsic_binding(index);
4499 unsigned space = nir_intrinsic_desc_set(index);
4500
4501 /* The descriptor_set field for variables is only 5 bits. We shouldn't have intrinsics trying to go beyond that. */
4502 assert(space < 32);
4503
4504 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
4505 if (resource_class == DXIL_RESOURCE_CLASS_UAV &&
4506 (var->data.access & ACCESS_NON_WRITEABLE))
4507 resource_class = DXIL_RESOURCE_CLASS_SRV;
4508
4509 const struct dxil_value *index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4510 if (!index_value)
4511 return false;
4512
4513 handle = emit_createhandle_call_dynamic(ctx, resource_class, space, binding, index_value, false);
4514 } else {
4515 const struct dxil_value *heap_index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4516 if (!heap_index_value)
4517 return false;
4518 const struct dxil_value *unannotated_handle = emit_createhandle_heap(ctx, heap_index_value, false, true);
4519 const struct dxil_value *res_props = dxil_module_get_buffer_res_props_const(&ctx->mod, resource_class, resource_kind);
4520 if (!unannotated_handle || !res_props)
4521 return false;
4522 handle = emit_annotate_handle(ctx, unannotated_handle, res_props);
4523 }
4524
4525 store_ssa_def(ctx, &intr->def, 0, handle);
4526 store_def(ctx, &intr->def, 1, get_src(ctx, &intr->src[0], 1, nir_type_uint32));
4527
4528 return true;
4529 }
4530
4531 static bool
emit_load_sample_pos_from_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4532 emit_load_sample_pos_from_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4533 {
4534 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.renderTargetGetSamplePosition", DXIL_NONE);
4535 if (!func)
4536 return false;
4537
4538 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION);
4539 if (!opcode)
4540 return false;
4541
4542 const struct dxil_value *args[] = {
4543 opcode,
4544 get_src(ctx, &intr->src[0], 0, nir_type_uint32),
4545 };
4546 if (!args[1])
4547 return false;
4548
4549 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4550 if (!v)
4551 return false;
4552
4553 for (unsigned i = 0; i < 2; ++i) {
4554 /* GL coords go from 0 -> 1, D3D from -0.5 -> 0.5 */
4555 const struct dxil_value *coord = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
4556 dxil_emit_extractval(&ctx->mod, v, i),
4557 dxil_module_get_float_const(&ctx->mod, 0.5f), 0);
4558 store_def(ctx, &intr->def, i, coord);
4559 }
4560 return true;
4561 }
4562
4563 static bool
emit_load_sample_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4564 emit_load_sample_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4565 {
4566 assert(ctx->mod.info.has_per_sample_input ||
4567 intr->intrinsic == nir_intrinsic_load_sample_id_no_per_sample);
4568
4569 if (ctx->mod.info.has_per_sample_input)
4570 return emit_load_unary_external_function(ctx, intr, "dx.op.sampleIndex",
4571 DXIL_INTR_SAMPLE_INDEX, nir_type_int);
4572
4573 store_def(ctx, &intr->def, 0, dxil_module_get_int32_const(&ctx->mod, 0));
4574 return true;
4575 }
4576
4577 static bool
emit_read_first_invocation(struct ntd_context * ctx,nir_intrinsic_instr * intr)4578 emit_read_first_invocation(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4579 {
4580 ctx->mod.feats.wave_ops = 1;
4581 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveReadLaneFirst",
4582 get_overload(nir_type_uint, intr->def.bit_size));
4583 const struct dxil_value *args[] = {
4584 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_READ_LANE_FIRST),
4585 get_src(ctx, intr->src, 0, nir_type_uint),
4586 };
4587 if (!func || !args[0] || !args[1])
4588 return false;
4589
4590 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4591 if (!ret)
4592 return false;
4593 store_def(ctx, &intr->def, 0, ret);
4594 return true;
4595 }
4596
4597 static bool
emit_read_invocation(struct ntd_context * ctx,nir_intrinsic_instr * intr)4598 emit_read_invocation(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4599 {
4600 ctx->mod.feats.wave_ops = 1;
4601 bool quad = intr->intrinsic == nir_intrinsic_quad_broadcast;
4602 const struct dxil_func *func = dxil_get_function(&ctx->mod, quad ? "dx.op.quadReadLaneAt" : "dx.op.waveReadLaneAt",
4603 get_overload(nir_type_uint, intr->def.bit_size));
4604 const struct dxil_value *args[] = {
4605 dxil_module_get_int32_const(&ctx->mod, quad ? DXIL_INTR_QUAD_READ_LANE_AT : DXIL_INTR_WAVE_READ_LANE_AT),
4606 get_src(ctx, &intr->src[0], 0, nir_type_uint),
4607 get_src(ctx, &intr->src[1], 0, nir_type_uint),
4608 };
4609 if (!func || !args[0] || !args[1] || !args[2])
4610 return false;
4611
4612 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4613 if (!ret)
4614 return false;
4615 store_def(ctx, &intr->def, 0, ret);
4616 return true;
4617 }
4618
4619 static bool
emit_vote_eq(struct ntd_context * ctx,nir_intrinsic_instr * intr)4620 emit_vote_eq(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4621 {
4622 ctx->mod.feats.wave_ops = 1;
4623 nir_alu_type alu_type = intr->intrinsic == nir_intrinsic_vote_ieq ? nir_type_int : nir_type_float;
4624 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveAllEqual",
4625 get_overload(alu_type, intr->src[0].ssa->bit_size));
4626 const struct dxil_value *args[] = {
4627 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_ALL_EQUAL),
4628 get_src(ctx, intr->src, 0, alu_type),
4629 };
4630 if (!func || !args[0] || !args[1])
4631 return false;
4632
4633 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4634 if (!ret)
4635 return false;
4636 store_def(ctx, &intr->def, 0, ret);
4637 return true;
4638 }
4639
4640 static bool
emit_vote(struct ntd_context * ctx,nir_intrinsic_instr * intr)4641 emit_vote(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4642 {
4643 ctx->mod.feats.wave_ops = 1;
4644 bool any = intr->intrinsic == nir_intrinsic_vote_any;
4645 const struct dxil_func *func = dxil_get_function(&ctx->mod,
4646 any ? "dx.op.waveAnyTrue" : "dx.op.waveAllTrue",
4647 DXIL_NONE);
4648 const struct dxil_value *args[] = {
4649 dxil_module_get_int32_const(&ctx->mod, any ? DXIL_INTR_WAVE_ANY_TRUE : DXIL_INTR_WAVE_ALL_TRUE),
4650 get_src(ctx, intr->src, 0, nir_type_bool),
4651 };
4652 if (!func || !args[0] || !args[1])
4653 return false;
4654
4655 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4656 if (!ret)
4657 return false;
4658 store_def(ctx, &intr->def, 0, ret);
4659 return true;
4660 }
4661
4662 static bool
emit_ballot(struct ntd_context * ctx,nir_intrinsic_instr * intr)4663 emit_ballot(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4664 {
4665 ctx->mod.feats.wave_ops = 1;
4666 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveBallot", DXIL_NONE);
4667 const struct dxil_value *args[] = {
4668 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_BALLOT),
4669 get_src(ctx, intr->src, 0, nir_type_bool),
4670 };
4671 if (!func || !args[0] || !args[1])
4672 return false;
4673
4674 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4675 if (!ret)
4676 return false;
4677 for (uint32_t i = 0; i < 4; ++i)
4678 store_def(ctx, &intr->def, i, dxil_emit_extractval(&ctx->mod, ret, i));
4679 return true;
4680 }
4681
4682 static bool
emit_quad_op(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_quad_op_kind op)4683 emit_quad_op(struct ntd_context *ctx, nir_intrinsic_instr *intr, enum dxil_quad_op_kind op)
4684 {
4685 ctx->mod.feats.wave_ops = 1;
4686 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quadOp",
4687 get_overload(nir_type_uint, intr->def.bit_size));
4688 const struct dxil_value *args[] = {
4689 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_QUAD_OP),
4690 get_src(ctx, intr->src, 0, nir_type_uint),
4691 dxil_module_get_int8_const(&ctx->mod, op),
4692 };
4693 if (!func || !args[0] || !args[1] || !args[2])
4694 return false;
4695
4696 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4697 if (!ret)
4698 return false;
4699 store_def(ctx, &intr->def, 0, ret);
4700 return true;
4701 }
4702
4703 static enum dxil_wave_bit_op_kind
get_reduce_bit_op(nir_op op)4704 get_reduce_bit_op(nir_op op)
4705 {
4706 switch (op) {
4707 case nir_op_ior: return DXIL_WAVE_BIT_OP_OR;
4708 case nir_op_ixor: return DXIL_WAVE_BIT_OP_XOR;
4709 case nir_op_iand: return DXIL_WAVE_BIT_OP_AND;
4710 default:
4711 unreachable("Invalid bit op");
4712 }
4713 }
4714
4715 static bool
emit_reduce_bitwise(struct ntd_context * ctx,nir_intrinsic_instr * intr)4716 emit_reduce_bitwise(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4717 {
4718 enum dxil_wave_bit_op_kind wave_bit_op = get_reduce_bit_op(nir_intrinsic_reduction_op(intr));
4719 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveBit",
4720 get_overload(nir_type_uint, intr->def.bit_size));
4721 const struct dxil_value *args[] = {
4722 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_BIT),
4723 get_src(ctx, intr->src, 0, nir_type_uint),
4724 dxil_module_get_int8_const(&ctx->mod, wave_bit_op),
4725 };
4726 if (!func || !args[0] || !args[1] || !args[2])
4727 return false;
4728
4729 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4730 if (!ret)
4731 return false;
4732 store_def(ctx, &intr->def, 0, ret);
4733 return true;
4734 }
4735
4736 static enum dxil_wave_op_kind
get_reduce_op(nir_op op)4737 get_reduce_op(nir_op op)
4738 {
4739 switch (op) {
4740 case nir_op_iadd:
4741 case nir_op_fadd:
4742 return DXIL_WAVE_OP_SUM;
4743 case nir_op_imul:
4744 case nir_op_fmul:
4745 return DXIL_WAVE_OP_PRODUCT;
4746 case nir_op_imax:
4747 case nir_op_umax:
4748 case nir_op_fmax:
4749 return DXIL_WAVE_OP_MAX;
4750 case nir_op_imin:
4751 case nir_op_umin:
4752 case nir_op_fmin:
4753 return DXIL_WAVE_OP_MIN;
4754 default:
4755 unreachable("Unexpected reduction op");
4756 }
4757 }
4758
4759 static bool
emit_reduce(struct ntd_context * ctx,nir_intrinsic_instr * intr)4760 emit_reduce(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4761 {
4762 ctx->mod.feats.wave_ops = 1;
4763 bool is_prefix = intr->intrinsic == nir_intrinsic_exclusive_scan;
4764 nir_op reduction_op = (nir_op)nir_intrinsic_reduction_op(intr);
4765 switch (reduction_op) {
4766 case nir_op_ior:
4767 case nir_op_ixor:
4768 case nir_op_iand:
4769 assert(!is_prefix);
4770 return emit_reduce_bitwise(ctx, intr);
4771 default:
4772 break;
4773 }
4774 nir_alu_type alu_type = nir_op_infos[reduction_op].input_types[0];
4775 enum dxil_wave_op_kind wave_op = get_reduce_op(reduction_op);
4776 const struct dxil_func *func = dxil_get_function(&ctx->mod, is_prefix ? "dx.op.wavePrefixOp" : "dx.op.waveActiveOp",
4777 get_overload(alu_type, intr->def.bit_size));
4778 bool is_unsigned = alu_type == nir_type_uint;
4779 const struct dxil_value *args[] = {
4780 dxil_module_get_int32_const(&ctx->mod, is_prefix ? DXIL_INTR_WAVE_PREFIX_OP : DXIL_INTR_WAVE_ACTIVE_OP),
4781 get_src(ctx, intr->src, 0, alu_type),
4782 dxil_module_get_int8_const(&ctx->mod, wave_op),
4783 dxil_module_get_int8_const(&ctx->mod, is_unsigned),
4784 };
4785 if (!func || !args[0] || !args[1] || !args[2] || !args[3])
4786 return false;
4787
4788 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4789 if (!ret)
4790 return false;
4791 store_def(ctx, &intr->def, 0, ret);
4792 return true;
4793 }
4794
4795 static bool
emit_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4796 emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4797 {
4798 switch (intr->intrinsic) {
4799 case nir_intrinsic_load_global_invocation_id:
4800 return emit_load_global_invocation_id(ctx, intr);
4801 case nir_intrinsic_load_local_invocation_id:
4802 return emit_load_local_invocation_id(ctx, intr);
4803 case nir_intrinsic_load_local_invocation_index:
4804 return emit_load_local_invocation_index(ctx, intr);
4805 case nir_intrinsic_load_workgroup_id:
4806 return emit_load_local_workgroup_id(ctx, intr);
4807 case nir_intrinsic_load_ssbo:
4808 return emit_load_ssbo(ctx, intr);
4809 case nir_intrinsic_store_ssbo:
4810 return emit_store_ssbo(ctx, intr);
4811 case nir_intrinsic_load_deref:
4812 return emit_load_deref(ctx, intr);
4813 case nir_intrinsic_store_deref:
4814 return emit_store_deref(ctx, intr);
4815 case nir_intrinsic_deref_atomic:
4816 return emit_atomic_deref(ctx, intr);
4817 case nir_intrinsic_deref_atomic_swap:
4818 return emit_atomic_deref_swap(ctx, intr);
4819 case nir_intrinsic_load_ubo_vec4:
4820 return emit_load_ubo_vec4(ctx, intr);
4821 case nir_intrinsic_load_primitive_id:
4822 return emit_load_unary_external_function(ctx, intr, "dx.op.primitiveID",
4823 DXIL_INTR_PRIMITIVE_ID, nir_type_int);
4824 case nir_intrinsic_load_sample_id:
4825 case nir_intrinsic_load_sample_id_no_per_sample:
4826 return emit_load_sample_id(ctx, intr);
4827 case nir_intrinsic_load_invocation_id:
4828 switch (ctx->mod.shader_kind) {
4829 case DXIL_HULL_SHADER:
4830 return emit_load_unary_external_function(ctx, intr, "dx.op.outputControlPointID",
4831 DXIL_INTR_OUTPUT_CONTROL_POINT_ID, nir_type_int);
4832 case DXIL_GEOMETRY_SHADER:
4833 return emit_load_unary_external_function(ctx, intr, "dx.op.gsInstanceID",
4834 DXIL_INTR_GS_INSTANCE_ID, nir_type_int);
4835 default:
4836 unreachable("Unexpected shader kind for invocation ID");
4837 }
4838 case nir_intrinsic_load_view_index:
4839 ctx->mod.feats.view_id = true;
4840 return emit_load_unary_external_function(ctx, intr, "dx.op.viewID",
4841 DXIL_INTR_VIEW_ID, nir_type_int);
4842 case nir_intrinsic_load_sample_mask_in:
4843 return emit_load_sample_mask_in(ctx, intr);
4844 case nir_intrinsic_load_tess_coord:
4845 return emit_load_tess_coord(ctx, intr);
4846 case nir_intrinsic_terminate_if:
4847 case nir_intrinsic_demote_if:
4848 return emit_discard_if(ctx, intr);
4849 case nir_intrinsic_terminate:
4850 case nir_intrinsic_demote:
4851 return emit_discard(ctx);
4852 case nir_intrinsic_emit_vertex:
4853 return emit_emit_vertex(ctx, intr);
4854 case nir_intrinsic_end_primitive:
4855 return emit_end_primitive(ctx, intr);
4856 case nir_intrinsic_barrier:
4857 return emit_barrier(ctx, intr);
4858 case nir_intrinsic_ssbo_atomic:
4859 return emit_ssbo_atomic(ctx, intr);
4860 case nir_intrinsic_ssbo_atomic_swap:
4861 return emit_ssbo_atomic_comp_swap(ctx, intr);
4862 case nir_intrinsic_image_deref_atomic:
4863 case nir_intrinsic_image_atomic:
4864 case nir_intrinsic_bindless_image_atomic:
4865 return emit_image_atomic(ctx, intr);
4866 case nir_intrinsic_image_deref_atomic_swap:
4867 case nir_intrinsic_image_atomic_swap:
4868 case nir_intrinsic_bindless_image_atomic_swap:
4869 return emit_image_atomic_comp_swap(ctx, intr);
4870 case nir_intrinsic_image_store:
4871 case nir_intrinsic_image_deref_store:
4872 case nir_intrinsic_bindless_image_store:
4873 return emit_image_store(ctx, intr);
4874 case nir_intrinsic_image_load:
4875 case nir_intrinsic_image_deref_load:
4876 case nir_intrinsic_bindless_image_load:
4877 return emit_image_load(ctx, intr);
4878 case nir_intrinsic_image_size:
4879 case nir_intrinsic_image_deref_size:
4880 case nir_intrinsic_bindless_image_size:
4881 return emit_image_size(ctx, intr);
4882 case nir_intrinsic_get_ssbo_size:
4883 return emit_get_ssbo_size(ctx, intr);
4884 case nir_intrinsic_load_input:
4885 case nir_intrinsic_load_per_vertex_input:
4886 case nir_intrinsic_load_output:
4887 case nir_intrinsic_load_per_vertex_output:
4888 return emit_load_input_via_intrinsic(ctx, intr);
4889 case nir_intrinsic_store_output:
4890 case nir_intrinsic_store_per_vertex_output:
4891 return emit_store_output_via_intrinsic(ctx, intr);
4892
4893 case nir_intrinsic_load_barycentric_at_offset:
4894 case nir_intrinsic_load_barycentric_at_sample:
4895 case nir_intrinsic_load_barycentric_centroid:
4896 case nir_intrinsic_load_barycentric_pixel:
4897 /* Emit nothing, we only support these as inputs to load_interpolated_input */
4898 return true;
4899 case nir_intrinsic_load_interpolated_input:
4900 return emit_load_interpolated_input(ctx, intr);
4901 break;
4902
4903 case nir_intrinsic_vulkan_resource_index:
4904 return emit_vulkan_resource_index(ctx, intr);
4905 case nir_intrinsic_load_vulkan_descriptor:
4906 return emit_load_vulkan_descriptor(ctx, intr);
4907
4908 case nir_intrinsic_load_sample_pos_from_id:
4909 return emit_load_sample_pos_from_id(ctx, intr);
4910
4911 case nir_intrinsic_is_helper_invocation:
4912 return emit_load_unary_external_function(
4913 ctx, intr, "dx.op.isHelperLane", DXIL_INTR_IS_HELPER_LANE, nir_type_int);
4914 case nir_intrinsic_elect:
4915 ctx->mod.feats.wave_ops = 1;
4916 return emit_load_unary_external_function(
4917 ctx, intr, "dx.op.waveIsFirstLane", DXIL_INTR_WAVE_IS_FIRST_LANE, nir_type_invalid);
4918 case nir_intrinsic_load_subgroup_size:
4919 ctx->mod.feats.wave_ops = 1;
4920 return emit_load_unary_external_function(
4921 ctx, intr, "dx.op.waveGetLaneCount", DXIL_INTR_WAVE_GET_LANE_COUNT, nir_type_invalid);
4922 case nir_intrinsic_load_subgroup_invocation:
4923 ctx->mod.feats.wave_ops = 1;
4924 return emit_load_unary_external_function(
4925 ctx, intr, "dx.op.waveGetLaneIndex", DXIL_INTR_WAVE_GET_LANE_INDEX, nir_type_invalid);
4926
4927 case nir_intrinsic_vote_feq:
4928 case nir_intrinsic_vote_ieq:
4929 return emit_vote_eq(ctx, intr);
4930 case nir_intrinsic_vote_any:
4931 case nir_intrinsic_vote_all:
4932 return emit_vote(ctx, intr);
4933
4934 case nir_intrinsic_ballot:
4935 return emit_ballot(ctx, intr);
4936
4937 case nir_intrinsic_read_first_invocation:
4938 return emit_read_first_invocation(ctx, intr);
4939 case nir_intrinsic_read_invocation:
4940 case nir_intrinsic_shuffle:
4941 case nir_intrinsic_quad_broadcast:
4942 return emit_read_invocation(ctx, intr);
4943
4944 case nir_intrinsic_quad_swap_horizontal:
4945 return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_X);
4946 case nir_intrinsic_quad_swap_vertical:
4947 return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_Y);
4948 case nir_intrinsic_quad_swap_diagonal:
4949 return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_DIAGONAL);
4950
4951 case nir_intrinsic_reduce:
4952 case nir_intrinsic_exclusive_scan:
4953 return emit_reduce(ctx, intr);
4954
4955 case nir_intrinsic_ddx:
4956 case nir_intrinsic_ddx_coarse: return emit_derivative(ctx, intr, DXIL_INTR_DDX_COARSE);
4957 case nir_intrinsic_ddx_fine: return emit_derivative(ctx, intr, DXIL_INTR_DDX_FINE);
4958 case nir_intrinsic_ddy:
4959 case nir_intrinsic_ddy_coarse: return emit_derivative(ctx, intr, DXIL_INTR_DDY_COARSE);
4960 case nir_intrinsic_ddy_fine: return emit_derivative(ctx, intr, DXIL_INTR_DDY_FINE);
4961
4962 case nir_intrinsic_load_first_vertex:
4963 ctx->mod.feats.extended_command_info = true;
4964 return emit_load_unary_external_function(ctx, intr, "dx.op.startVertexLocation",
4965 DXIL_INTR_START_VERTEX_LOCATION, nir_type_int);
4966 case nir_intrinsic_load_base_instance:
4967 ctx->mod.feats.extended_command_info = true;
4968 return emit_load_unary_external_function(ctx, intr, "dx.op.startInstanceLocation",
4969 DXIL_INTR_START_INSTANCE_LOCATION, nir_type_int);
4970
4971 case nir_intrinsic_load_num_workgroups:
4972 case nir_intrinsic_load_workgroup_size:
4973 default:
4974 log_nir_instr_unsupported(
4975 ctx->logger, "Unimplemented intrinsic instruction", &intr->instr);
4976 return false;
4977 }
4978 }
4979
4980 static const struct dxil_type *
dxil_type_for_const(struct ntd_context * ctx,nir_def * def)4981 dxil_type_for_const(struct ntd_context *ctx, nir_def *def)
4982 {
4983 if (BITSET_TEST(ctx->int_types, def->index) ||
4984 !BITSET_TEST(ctx->float_types, def->index))
4985 return dxil_module_get_int_type(&ctx->mod, def->bit_size);
4986 return dxil_module_get_float_type(&ctx->mod, def->bit_size);
4987 }
4988
4989 static bool
emit_load_const(struct ntd_context * ctx,nir_load_const_instr * load_const)4990 emit_load_const(struct ntd_context *ctx, nir_load_const_instr *load_const)
4991 {
4992 for (uint32_t i = 0; i < load_const->def.num_components; ++i) {
4993 const struct dxil_type *type = dxil_type_for_const(ctx, &load_const->def);
4994 store_ssa_def(ctx, &load_const->def, i, get_value_for_const(&ctx->mod, &load_const->value[i], type));
4995 }
4996 return true;
4997 }
4998
4999 static bool
emit_deref(struct ntd_context * ctx,nir_deref_instr * instr)5000 emit_deref(struct ntd_context* ctx, nir_deref_instr* instr)
5001 {
5002 /* There's two possible reasons we might be walking through derefs:
5003 * 1. Computing an index to be used for a texture/sampler/image binding, which
5004 * can only do array indexing and should compute the indices along the way with
5005 * array-of-array sizes.
5006 * 2. Storing an index to be used in a GEP for access to a variable.
5007 */
5008 nir_variable *var = nir_deref_instr_get_variable(instr);
5009 assert(var);
5010
5011 bool is_aoa_size =
5012 glsl_type_is_sampler(glsl_without_array(var->type)) ||
5013 glsl_type_is_image(glsl_without_array(var->type)) ||
5014 glsl_type_is_texture(glsl_without_array(var->type));
5015
5016 if (!is_aoa_size) {
5017 /* Just store the values, we'll use these to build a GEP in the load or store */
5018 switch (instr->deref_type) {
5019 case nir_deref_type_var:
5020 store_def(ctx, &instr->def, 0, dxil_module_get_int_const(&ctx->mod, 0, instr->def.bit_size));
5021 return true;
5022 case nir_deref_type_array:
5023 store_def(ctx, &instr->def, 0, get_src(ctx, &instr->arr.index, 0, nir_type_int));
5024 return true;
5025 case nir_deref_type_struct:
5026 store_def(ctx, &instr->def, 0, dxil_module_get_int_const(&ctx->mod, instr->strct.index, 32));
5027 return true;
5028 default:
5029 unreachable("Other deref types not supported");
5030 }
5031 }
5032
5033 /* In the CL environment, there's nothing to emit. Any references to
5034 * derefs will emit the necessary logic to handle scratch/shared GEP addressing
5035 */
5036 if (ctx->opts->environment == DXIL_ENVIRONMENT_CL)
5037 return true;
5038
5039 const struct glsl_type *type = instr->type;
5040 const struct dxil_value *binding;
5041 unsigned binding_val = ctx->opts->environment == DXIL_ENVIRONMENT_GL ?
5042 var->data.driver_location : var->data.binding;
5043
5044 if (instr->deref_type == nir_deref_type_var) {
5045 binding = dxil_module_get_int32_const(&ctx->mod, binding_val);
5046 } else {
5047 const struct dxil_value *base = get_src(ctx, &instr->parent, 0, nir_type_uint32);
5048 const struct dxil_value *offset = get_src(ctx, &instr->arr.index, 0, nir_type_uint32);
5049 if (!base || !offset)
5050 return false;
5051
5052 if (glsl_type_is_array(instr->type)) {
5053 offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_MUL, offset,
5054 dxil_module_get_int32_const(&ctx->mod, glsl_get_aoa_size(instr->type)), 0);
5055 if (!offset)
5056 return false;
5057 }
5058 binding = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, base, offset, 0);
5059 }
5060
5061 if (!binding)
5062 return false;
5063
5064 /* Haven't finished chasing the deref chain yet, just store the value */
5065 if (glsl_type_is_array(type)) {
5066 store_def(ctx, &instr->def, 0, binding);
5067 return true;
5068 }
5069
5070 assert(glsl_type_is_sampler(type) || glsl_type_is_image(type) || glsl_type_is_texture(type));
5071 enum dxil_resource_class res_class;
5072 if (glsl_type_is_image(type))
5073 res_class = DXIL_RESOURCE_CLASS_UAV;
5074 else if (glsl_type_is_sampler(type))
5075 res_class = DXIL_RESOURCE_CLASS_SAMPLER;
5076 else
5077 res_class = DXIL_RESOURCE_CLASS_SRV;
5078
5079 unsigned descriptor_set = ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN ?
5080 var->data.descriptor_set : (glsl_type_is_image(type) ? 1 : 0);
5081 const struct dxil_value *handle = emit_createhandle_call_dynamic(ctx, res_class,
5082 descriptor_set, binding_val, binding, false);
5083 if (!handle)
5084 return false;
5085
5086 store_ssa_def(ctx, &instr->def, 0, handle);
5087 return true;
5088 }
5089
5090 static bool
emit_cond_branch(struct ntd_context * ctx,const struct dxil_value * cond,int true_block,int false_block)5091 emit_cond_branch(struct ntd_context *ctx, const struct dxil_value *cond,
5092 int true_block, int false_block)
5093 {
5094 assert(cond);
5095 assert(true_block >= 0);
5096 assert(false_block >= 0);
5097 return dxil_emit_branch(&ctx->mod, cond, true_block, false_block);
5098 }
5099
5100 static bool
emit_branch(struct ntd_context * ctx,int block)5101 emit_branch(struct ntd_context *ctx, int block)
5102 {
5103 assert(block >= 0);
5104 return dxil_emit_branch(&ctx->mod, NULL, block, -1);
5105 }
5106
5107 static bool
emit_jump(struct ntd_context * ctx,nir_jump_instr * instr)5108 emit_jump(struct ntd_context *ctx, nir_jump_instr *instr)
5109 {
5110 switch (instr->type) {
5111 case nir_jump_break:
5112 case nir_jump_continue:
5113 assert(instr->instr.block->successors[0]);
5114 assert(!instr->instr.block->successors[1]);
5115 return emit_branch(ctx, instr->instr.block->successors[0]->index);
5116
5117 default:
5118 unreachable("Unsupported jump type\n");
5119 }
5120 }
5121
5122 struct phi_block {
5123 unsigned num_components;
5124 struct dxil_instr *comp[NIR_MAX_VEC_COMPONENTS];
5125 };
5126
5127 static bool
emit_phi(struct ntd_context * ctx,nir_phi_instr * instr)5128 emit_phi(struct ntd_context *ctx, nir_phi_instr *instr)
5129 {
5130 const struct dxil_type *type = NULL;
5131 nir_foreach_phi_src(src, instr) {
5132 /* All sources have the same type, just use the first one */
5133 type = dxil_value_get_type(ctx->defs[src->src.ssa->index].chans[0]);
5134 break;
5135 }
5136
5137 struct phi_block *vphi = ralloc(ctx->phis, struct phi_block);
5138 vphi->num_components = instr->def.num_components;
5139
5140 for (unsigned i = 0; i < vphi->num_components; ++i) {
5141 struct dxil_instr *phi = vphi->comp[i] = dxil_emit_phi(&ctx->mod, type);
5142 if (!phi)
5143 return false;
5144 store_ssa_def(ctx, &instr->def, i, dxil_instr_get_return_value(phi));
5145 }
5146 _mesa_hash_table_insert(ctx->phis, instr, vphi);
5147 return true;
5148 }
5149
5150 static bool
fixup_phi(struct ntd_context * ctx,nir_phi_instr * instr,struct phi_block * vphi)5151 fixup_phi(struct ntd_context *ctx, nir_phi_instr *instr,
5152 struct phi_block *vphi)
5153 {
5154 const struct dxil_value *values[16];
5155 unsigned blocks[16];
5156 for (unsigned i = 0; i < vphi->num_components; ++i) {
5157 size_t num_incoming = 0;
5158 nir_foreach_phi_src(src, instr) {
5159 const struct dxil_value *val = get_src_ssa(ctx, src->src.ssa, i);
5160 values[num_incoming] = val;
5161 blocks[num_incoming] = src->pred->index;
5162 ++num_incoming;
5163 if (num_incoming == ARRAY_SIZE(values)) {
5164 if (!dxil_phi_add_incoming(vphi->comp[i], values, blocks,
5165 num_incoming))
5166 return false;
5167 num_incoming = 0;
5168 }
5169 }
5170 if (num_incoming > 0 && !dxil_phi_add_incoming(vphi->comp[i], values,
5171 blocks, num_incoming))
5172 return false;
5173 }
5174 return true;
5175 }
5176
5177 static unsigned
get_n_src(struct ntd_context * ctx,const struct dxil_value ** values,unsigned max_components,nir_tex_src * src,nir_alu_type type)5178 get_n_src(struct ntd_context *ctx, const struct dxil_value **values,
5179 unsigned max_components, nir_tex_src *src, nir_alu_type type)
5180 {
5181 unsigned num_components = nir_src_num_components(src->src);
5182 unsigned i = 0;
5183
5184 assert(num_components <= max_components);
5185
5186 for (i = 0; i < num_components; ++i) {
5187 values[i] = get_src(ctx, &src->src, i, type);
5188 if (!values[i])
5189 return 0;
5190 }
5191
5192 return num_components;
5193 }
5194
5195 #define PAD_SRC(ctx, array, components, undef) \
5196 for (unsigned i = components; i < ARRAY_SIZE(array); ++i) { \
5197 array[i] = undef; \
5198 }
5199
5200 static const struct dxil_value *
emit_sample(struct ntd_context * ctx,struct texop_parameters * params)5201 emit_sample(struct ntd_context *ctx, struct texop_parameters *params)
5202 {
5203 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sample", params->overload);
5204 if (!func)
5205 return NULL;
5206
5207 const struct dxil_value *args[11] = {
5208 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE),
5209 params->tex, params->sampler,
5210 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5211 params->offset[0], params->offset[1], params->offset[2],
5212 params->min_lod
5213 };
5214
5215 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5216 }
5217
5218 static const struct dxil_value *
emit_sample_bias(struct ntd_context * ctx,struct texop_parameters * params)5219 emit_sample_bias(struct ntd_context *ctx, struct texop_parameters *params)
5220 {
5221 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleBias", params->overload);
5222 if (!func)
5223 return NULL;
5224
5225 assert(params->bias != NULL);
5226
5227 const struct dxil_value *args[12] = {
5228 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_BIAS),
5229 params->tex, params->sampler,
5230 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5231 params->offset[0], params->offset[1], params->offset[2],
5232 params->bias, params->min_lod
5233 };
5234
5235 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5236 }
5237
5238 static const struct dxil_value *
emit_sample_level(struct ntd_context * ctx,struct texop_parameters * params)5239 emit_sample_level(struct ntd_context *ctx, struct texop_parameters *params)
5240 {
5241 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleLevel", params->overload);
5242 if (!func)
5243 return NULL;
5244
5245 assert(params->lod_or_sample != NULL);
5246
5247 const struct dxil_value *args[11] = {
5248 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_LEVEL),
5249 params->tex, params->sampler,
5250 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5251 params->offset[0], params->offset[1], params->offset[2],
5252 params->lod_or_sample
5253 };
5254
5255 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5256 }
5257
5258 static const struct dxil_value *
emit_sample_cmp(struct ntd_context * ctx,struct texop_parameters * params)5259 emit_sample_cmp(struct ntd_context *ctx, struct texop_parameters *params)
5260 {
5261 const struct dxil_func *func;
5262 enum dxil_intr opcode;
5263
5264 func = dxil_get_function(&ctx->mod, "dx.op.sampleCmp", DXIL_F32);
5265 opcode = DXIL_INTR_SAMPLE_CMP;
5266
5267 if (!func)
5268 return NULL;
5269
5270 const struct dxil_value *args[12] = {
5271 dxil_module_get_int32_const(&ctx->mod, opcode),
5272 params->tex, params->sampler,
5273 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5274 params->offset[0], params->offset[1], params->offset[2],
5275 params->cmp, params->min_lod
5276 };
5277
5278 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5279 }
5280
5281 static const struct dxil_value *
emit_sample_cmp_level_zero(struct ntd_context * ctx,struct texop_parameters * params)5282 emit_sample_cmp_level_zero(struct ntd_context *ctx, struct texop_parameters *params)
5283 {
5284 const struct dxil_func *func;
5285 enum dxil_intr opcode;
5286
5287 func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevelZero", DXIL_F32);
5288 opcode = DXIL_INTR_SAMPLE_CMP_LVL_ZERO;
5289
5290 if (!func)
5291 return NULL;
5292
5293 const struct dxil_value *args[11] = {
5294 dxil_module_get_int32_const(&ctx->mod, opcode),
5295 params->tex, params->sampler,
5296 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5297 params->offset[0], params->offset[1], params->offset[2],
5298 params->cmp
5299 };
5300
5301 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5302 }
5303
5304 static const struct dxil_value *
emit_sample_cmp_level(struct ntd_context * ctx,struct texop_parameters * params)5305 emit_sample_cmp_level(struct ntd_context *ctx, struct texop_parameters *params)
5306 {
5307 ctx->mod.feats.advanced_texture_ops = true;
5308 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevel", params->overload);
5309 if (!func)
5310 return NULL;
5311
5312 assert(params->lod_or_sample != NULL);
5313
5314 const struct dxil_value *args[12] = {
5315 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_LEVEL),
5316 params->tex, params->sampler,
5317 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5318 params->offset[0], params->offset[1], params->offset[2],
5319 params->cmp, params->lod_or_sample
5320 };
5321
5322 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5323 }
5324
5325 static const struct dxil_value *
emit_sample_cmp_bias(struct ntd_context * ctx,struct texop_parameters * params)5326 emit_sample_cmp_bias(struct ntd_context *ctx, struct texop_parameters *params)
5327 {
5328 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpBias", params->overload);
5329 if (!func)
5330 return NULL;
5331
5332 assert(params->bias != NULL);
5333 ctx->mod.feats.sample_cmp_bias_gradient = 1;
5334
5335 const struct dxil_value *args[13] = {
5336 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_BIAS),
5337 params->tex, params->sampler,
5338 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5339 params->offset[0], params->offset[1], params->offset[2],
5340 params->cmp, params->bias, params->min_lod
5341 };
5342
5343 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5344 }
5345
5346 static const struct dxil_value *
emit_sample_grad(struct ntd_context * ctx,struct texop_parameters * params)5347 emit_sample_grad(struct ntd_context *ctx, struct texop_parameters *params)
5348 {
5349 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleGrad", params->overload);
5350 if (!func)
5351 return false;
5352
5353 const struct dxil_value *args[17] = {
5354 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_GRAD),
5355 params->tex, params->sampler,
5356 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5357 params->offset[0], params->offset[1], params->offset[2],
5358 params->dx[0], params->dx[1], params->dx[2],
5359 params->dy[0], params->dy[1], params->dy[2],
5360 params->min_lod
5361 };
5362
5363 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5364 }
5365
5366 static const struct dxil_value *
emit_sample_cmp_grad(struct ntd_context * ctx,struct texop_parameters * params)5367 emit_sample_cmp_grad(struct ntd_context *ctx, struct texop_parameters *params)
5368 {
5369 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpGrad", params->overload);
5370 if (!func)
5371 return false;
5372
5373 ctx->mod.feats.sample_cmp_bias_gradient = 1;
5374
5375 const struct dxil_value *args[18] = {
5376 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_GRAD),
5377 params->tex, params->sampler,
5378 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5379 params->offset[0], params->offset[1], params->offset[2],
5380 params->cmp,
5381 params->dx[0], params->dx[1], params->dx[2],
5382 params->dy[0], params->dy[1], params->dy[2],
5383 params->min_lod
5384 };
5385
5386 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5387 }
5388
5389 static const struct dxil_value *
emit_texel_fetch(struct ntd_context * ctx,struct texop_parameters * params)5390 emit_texel_fetch(struct ntd_context *ctx, struct texop_parameters *params)
5391 {
5392 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", params->overload);
5393 if (!func)
5394 return false;
5395
5396 if (!params->lod_or_sample)
5397 params->lod_or_sample = dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32));
5398
5399 const struct dxil_value *args[] = {
5400 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD),
5401 params->tex,
5402 params->lod_or_sample, params->coord[0], params->coord[1], params->coord[2],
5403 params->offset[0], params->offset[1], params->offset[2]
5404 };
5405
5406 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5407 }
5408
5409 static const struct dxil_value *
emit_texture_lod(struct ntd_context * ctx,struct texop_parameters * params,bool clamped)5410 emit_texture_lod(struct ntd_context *ctx, struct texop_parameters *params, bool clamped)
5411 {
5412 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.calculateLOD", DXIL_F32);
5413 if (!func)
5414 return false;
5415
5416 const struct dxil_value *args[] = {
5417 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOD),
5418 params->tex,
5419 params->sampler,
5420 params->coord[0],
5421 params->coord[1],
5422 params->coord[2],
5423 dxil_module_get_int1_const(&ctx->mod, clamped ? 1 : 0)
5424 };
5425
5426 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5427 }
5428
5429 static const struct dxil_value *
emit_texture_gather(struct ntd_context * ctx,struct texop_parameters * params,unsigned component)5430 emit_texture_gather(struct ntd_context *ctx, struct texop_parameters *params, unsigned component)
5431 {
5432 const struct dxil_func *func = dxil_get_function(&ctx->mod,
5433 params->cmp ? "dx.op.textureGatherCmp" : "dx.op.textureGather", params->overload);
5434 if (!func)
5435 return false;
5436
5437 const struct dxil_value *args[] = {
5438 dxil_module_get_int32_const(&ctx->mod, params->cmp ?
5439 DXIL_INTR_TEXTURE_GATHER_CMP : DXIL_INTR_TEXTURE_GATHER),
5440 params->tex,
5441 params->sampler,
5442 params->coord[0],
5443 params->coord[1],
5444 params->coord[2],
5445 params->coord[3],
5446 params->offset[0],
5447 params->offset[1],
5448 dxil_module_get_int32_const(&ctx->mod, component),
5449 params->cmp
5450 };
5451
5452 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args) - (params->cmp ? 0 : 1));
5453 }
5454
5455 static bool
emit_tex(struct ntd_context * ctx,nir_tex_instr * instr)5456 emit_tex(struct ntd_context *ctx, nir_tex_instr *instr)
5457 {
5458 struct texop_parameters params;
5459 memset(¶ms, 0, sizeof(struct texop_parameters));
5460 if (ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN) {
5461 params.tex = ctx->srv_handles[instr->texture_index];
5462 params.sampler = ctx->sampler_handles[instr->sampler_index];
5463 }
5464
5465 const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
5466 const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32);
5467 const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
5468 const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type);
5469
5470 unsigned coord_components = 0, offset_components = 0, dx_components = 0, dy_components = 0;
5471 params.overload = get_overload(instr->dest_type, 32);
5472
5473 bool lod_is_zero = false;
5474 for (unsigned i = 0; i < instr->num_srcs; i++) {
5475 nir_alu_type type = nir_tex_instr_src_type(instr, i);
5476
5477 switch (instr->src[i].src_type) {
5478 case nir_tex_src_coord:
5479 coord_components = get_n_src(ctx, params.coord, ARRAY_SIZE(params.coord),
5480 &instr->src[i], type);
5481 if (!coord_components)
5482 return false;
5483 break;
5484
5485 case nir_tex_src_offset:
5486 offset_components = get_n_src(ctx, params.offset, ARRAY_SIZE(params.offset),
5487 &instr->src[i], nir_type_int);
5488 if (!offset_components)
5489 return false;
5490
5491 /* Dynamic offsets were only allowed with gather, until "advanced texture ops" in SM7 */
5492 if (!nir_src_is_const(instr->src[i].src) && instr->op != nir_texop_tg4)
5493 ctx->mod.feats.advanced_texture_ops = true;
5494 break;
5495
5496 case nir_tex_src_bias:
5497 assert(instr->op == nir_texop_txb);
5498 assert(nir_src_num_components(instr->src[i].src) == 1);
5499 params.bias = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
5500 if (!params.bias)
5501 return false;
5502 break;
5503
5504 case nir_tex_src_lod:
5505 assert(nir_src_num_components(instr->src[i].src) == 1);
5506 if (instr->op == nir_texop_txf_ms) {
5507 assert(nir_src_as_int(instr->src[i].src) == 0);
5508 break;
5509 }
5510
5511 /* Buffers don't have a LOD */
5512 if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF)
5513 params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, type);
5514 else
5515 params.lod_or_sample = int_undef;
5516 if (!params.lod_or_sample)
5517 return false;
5518
5519 if (nir_src_is_const(instr->src[i].src) && nir_src_as_float(instr->src[i].src) == 0.0f)
5520 lod_is_zero = true;
5521 break;
5522
5523 case nir_tex_src_min_lod:
5524 assert(nir_src_num_components(instr->src[i].src) == 1);
5525 params.min_lod = get_src(ctx, &instr->src[i].src, 0, type);
5526 if (!params.min_lod)
5527 return false;
5528 break;
5529
5530 case nir_tex_src_comparator:
5531 assert(nir_src_num_components(instr->src[i].src) == 1);
5532 params.cmp = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
5533 if (!params.cmp)
5534 return false;
5535 break;
5536
5537 case nir_tex_src_ddx:
5538 dx_components = get_n_src(ctx, params.dx, ARRAY_SIZE(params.dx),
5539 &instr->src[i], nir_type_float);
5540 if (!dx_components)
5541 return false;
5542 break;
5543
5544 case nir_tex_src_ddy:
5545 dy_components = get_n_src(ctx, params.dy, ARRAY_SIZE(params.dy),
5546 &instr->src[i], nir_type_float);
5547 if (!dy_components)
5548 return false;
5549 break;
5550
5551 case nir_tex_src_ms_index:
5552 params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, nir_type_int);
5553 if (!params.lod_or_sample)
5554 return false;
5555 break;
5556
5557 case nir_tex_src_texture_deref:
5558 assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
5559 params.tex = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
5560 break;
5561
5562 case nir_tex_src_sampler_deref:
5563 assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
5564 params.sampler = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
5565 break;
5566
5567 case nir_tex_src_texture_offset:
5568 params.tex = emit_createhandle_call_dynamic(ctx, DXIL_RESOURCE_CLASS_SRV,
5569 0, instr->texture_index,
5570 dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
5571 get_src(ctx, &instr->src[i].src, 0, nir_type_uint),
5572 dxil_module_get_int32_const(&ctx->mod, instr->texture_index), 0),
5573 instr->texture_non_uniform);
5574 break;
5575
5576 case nir_tex_src_sampler_offset:
5577 if (nir_tex_instr_need_sampler(instr)) {
5578 params.sampler = emit_createhandle_call_dynamic(ctx, DXIL_RESOURCE_CLASS_SAMPLER,
5579 0, instr->sampler_index,
5580 dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
5581 get_src(ctx, &instr->src[i].src, 0, nir_type_uint),
5582 dxil_module_get_int32_const(&ctx->mod, instr->sampler_index), 0),
5583 instr->sampler_non_uniform);
5584 }
5585 break;
5586
5587 case nir_tex_src_texture_handle:
5588 params.tex = create_srv_handle(ctx, instr, &instr->src[i].src);
5589 break;
5590
5591 case nir_tex_src_sampler_handle:
5592 if (nir_tex_instr_need_sampler(instr))
5593 params.sampler = create_sampler_handle(ctx, instr->is_shadow, &instr->src[i].src);
5594 break;
5595
5596 case nir_tex_src_projector:
5597 unreachable("Texture projector should have been lowered");
5598
5599 default:
5600 fprintf(stderr, "texture source: %d\n", instr->src[i].src_type);
5601 unreachable("unknown texture source");
5602 }
5603 }
5604
5605 assert(params.tex != NULL);
5606 assert(instr->op == nir_texop_txf ||
5607 instr->op == nir_texop_txf_ms ||
5608 nir_tex_instr_is_query(instr) ||
5609 params.sampler != NULL);
5610
5611 PAD_SRC(ctx, params.coord, coord_components, float_undef);
5612 PAD_SRC(ctx, params.offset, offset_components, int_undef);
5613 if (!params.min_lod) params.min_lod = float_undef;
5614
5615 const struct dxil_value *sample = NULL;
5616 switch (instr->op) {
5617 case nir_texop_txb:
5618 if (params.cmp != NULL && ctx->mod.minor_version >= 8)
5619 sample = emit_sample_cmp_bias(ctx, ¶ms);
5620 else
5621 sample = emit_sample_bias(ctx, ¶ms);
5622 break;
5623
5624 case nir_texop_tex:
5625 if (params.cmp != NULL) {
5626 sample = emit_sample_cmp(ctx, ¶ms);
5627 break;
5628 } else if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) {
5629 sample = emit_sample(ctx, ¶ms);
5630 break;
5631 }
5632 params.lod_or_sample = dxil_module_get_float_const(&ctx->mod, 0);
5633 lod_is_zero = true;
5634 FALLTHROUGH;
5635 case nir_texop_txl:
5636 if (lod_is_zero && params.cmp != NULL && ctx->mod.minor_version < 7) {
5637 /* Prior to SM 6.7, if the level is constant 0.0, ignore the LOD argument,
5638 * so level-less DXIL instructions are used. This is needed to avoid emitting
5639 * dx.op.sampleCmpLevel, which would not be available.
5640 */
5641 sample = emit_sample_cmp_level_zero(ctx, ¶ms);
5642 } else {
5643 if (params.cmp != NULL)
5644 sample = emit_sample_cmp_level(ctx, ¶ms);
5645 else
5646 sample = emit_sample_level(ctx, ¶ms);
5647 }
5648 break;
5649
5650 case nir_texop_txd:
5651 PAD_SRC(ctx, params.dx, dx_components, float_undef);
5652 PAD_SRC(ctx, params.dy, dy_components,float_undef);
5653 if (params.cmp != NULL && ctx->mod.minor_version >= 8)
5654 sample = emit_sample_cmp_grad(ctx, ¶ms);
5655 else
5656 sample = emit_sample_grad(ctx, ¶ms);
5657 break;
5658
5659 case nir_texop_txf:
5660 case nir_texop_txf_ms:
5661 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
5662 params.coord[1] = int_undef;
5663 sample = emit_bufferload_call(ctx, params.tex, params.coord, params.overload);
5664 } else {
5665 PAD_SRC(ctx, params.coord, coord_components, int_undef);
5666 sample = emit_texel_fetch(ctx, ¶ms);
5667 }
5668 break;
5669
5670 case nir_texop_txs:
5671 sample = emit_texture_size(ctx, ¶ms);
5672 break;
5673
5674 case nir_texop_tg4:
5675 sample = emit_texture_gather(ctx, ¶ms, instr->component);
5676 break;
5677
5678 case nir_texop_lod:
5679 sample = emit_texture_lod(ctx, ¶ms, true);
5680 store_def(ctx, &instr->def, 0, sample);
5681 sample = emit_texture_lod(ctx, ¶ms, false);
5682 store_def(ctx, &instr->def, 1, sample);
5683 return true;
5684
5685 case nir_texop_query_levels: {
5686 params.lod_or_sample = dxil_module_get_int_const(&ctx->mod, 0, 32);
5687 sample = emit_texture_size(ctx, ¶ms);
5688 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
5689 store_def(ctx, &instr->def, 0, retval);
5690 return true;
5691 }
5692
5693 case nir_texop_texture_samples: {
5694 params.lod_or_sample = int_undef;
5695 sample = emit_texture_size(ctx, ¶ms);
5696 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
5697 store_def(ctx, &instr->def, 0, retval);
5698 return true;
5699 }
5700
5701 default:
5702 fprintf(stderr, "texture op: %d\n", instr->op);
5703 unreachable("unknown texture op");
5704 }
5705
5706 if (!sample)
5707 return false;
5708
5709 for (unsigned i = 0; i < instr->def.num_components; ++i) {
5710 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, i);
5711 store_def(ctx, &instr->def, i, retval);
5712 }
5713
5714 return true;
5715 }
5716
5717 static bool
emit_undefined(struct ntd_context * ctx,nir_undef_instr * undef)5718 emit_undefined(struct ntd_context *ctx, nir_undef_instr *undef)
5719 {
5720 for (unsigned i = 0; i < undef->def.num_components; ++i)
5721 store_ssa_def(ctx, &undef->def, i, dxil_module_get_int32_const(&ctx->mod, 0));
5722 return true;
5723 }
5724
emit_instr(struct ntd_context * ctx,struct nir_instr * instr)5725 static bool emit_instr(struct ntd_context *ctx, struct nir_instr* instr)
5726 {
5727 switch (instr->type) {
5728 case nir_instr_type_alu:
5729 return emit_alu(ctx, nir_instr_as_alu(instr));
5730 case nir_instr_type_intrinsic:
5731 return emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
5732 case nir_instr_type_load_const:
5733 return emit_load_const(ctx, nir_instr_as_load_const(instr));
5734 case nir_instr_type_deref:
5735 return emit_deref(ctx, nir_instr_as_deref(instr));
5736 case nir_instr_type_jump:
5737 return emit_jump(ctx, nir_instr_as_jump(instr));
5738 case nir_instr_type_phi:
5739 return emit_phi(ctx, nir_instr_as_phi(instr));
5740 case nir_instr_type_tex:
5741 return emit_tex(ctx, nir_instr_as_tex(instr));
5742 case nir_instr_type_undef:
5743 return emit_undefined(ctx, nir_instr_as_undef(instr));
5744 default:
5745 log_nir_instr_unsupported(ctx->logger, "Unimplemented instruction type",
5746 instr);
5747 return false;
5748 }
5749 }
5750
5751
5752 static bool
emit_block(struct ntd_context * ctx,struct nir_block * block)5753 emit_block(struct ntd_context *ctx, struct nir_block *block)
5754 {
5755 assert(block->index < ctx->mod.cur_emitting_func->num_basic_block_ids);
5756 ctx->mod.cur_emitting_func->basic_block_ids[block->index] = ctx->mod.cur_emitting_func->curr_block;
5757
5758 nir_foreach_instr(instr, block) {
5759 TRACE_CONVERSION(instr);
5760
5761 if (!emit_instr(ctx, instr)) {
5762 return false;
5763 }
5764 }
5765 return true;
5766 }
5767
5768 static bool
5769 emit_cf_list(struct ntd_context *ctx, struct exec_list *list);
5770
5771 static bool
emit_if(struct ntd_context * ctx,struct nir_if * if_stmt)5772 emit_if(struct ntd_context *ctx, struct nir_if *if_stmt)
5773 {
5774 assert(nir_src_num_components(if_stmt->condition) == 1);
5775 const struct dxil_value *cond = get_src(ctx, &if_stmt->condition, 0,
5776 nir_type_bool);
5777 if (!cond)
5778 return false;
5779
5780 /* prepare blocks */
5781 nir_block *then_block = nir_if_first_then_block(if_stmt);
5782 assert(nir_if_last_then_block(if_stmt)->successors[0]);
5783 assert(!nir_if_last_then_block(if_stmt)->successors[1]);
5784 int then_succ = nir_if_last_then_block(if_stmt)->successors[0]->index;
5785
5786 nir_block *else_block = NULL;
5787 int else_succ = -1;
5788 if (!exec_list_is_empty(&if_stmt->else_list)) {
5789 else_block = nir_if_first_else_block(if_stmt);
5790 assert(nir_if_last_else_block(if_stmt)->successors[0]);
5791 assert(!nir_if_last_else_block(if_stmt)->successors[1]);
5792 else_succ = nir_if_last_else_block(if_stmt)->successors[0]->index;
5793 }
5794
5795 if (!emit_cond_branch(ctx, cond, then_block->index,
5796 else_block ? else_block->index : then_succ))
5797 return false;
5798
5799 /* handle then-block */
5800 if (!emit_cf_list(ctx, &if_stmt->then_list) ||
5801 (!nir_block_ends_in_jump(nir_if_last_then_block(if_stmt)) &&
5802 !emit_branch(ctx, then_succ)))
5803 return false;
5804
5805 if (else_block) {
5806 /* handle else-block */
5807 if (!emit_cf_list(ctx, &if_stmt->else_list) ||
5808 (!nir_block_ends_in_jump(nir_if_last_else_block(if_stmt)) &&
5809 !emit_branch(ctx, else_succ)))
5810 return false;
5811 }
5812
5813 return true;
5814 }
5815
5816 static bool
emit_loop(struct ntd_context * ctx,nir_loop * loop)5817 emit_loop(struct ntd_context *ctx, nir_loop *loop)
5818 {
5819 assert(!nir_loop_has_continue_construct(loop));
5820 nir_block *first_block = nir_loop_first_block(loop);
5821 nir_block *last_block = nir_loop_last_block(loop);
5822
5823 assert(last_block->successors[0]);
5824 assert(!last_block->successors[1]);
5825
5826 if (!emit_branch(ctx, first_block->index))
5827 return false;
5828
5829 if (!emit_cf_list(ctx, &loop->body))
5830 return false;
5831
5832 /* If the loop's last block doesn't explicitly jump somewhere, then there's
5833 * an implicit continue that should take it back to the first loop block
5834 */
5835 nir_instr *last_instr = nir_block_last_instr(last_block);
5836 if ((!last_instr || last_instr->type != nir_instr_type_jump) &&
5837 !emit_branch(ctx, first_block->index))
5838 return false;
5839
5840 return true;
5841 }
5842
5843 static bool
emit_cf_list(struct ntd_context * ctx,struct exec_list * list)5844 emit_cf_list(struct ntd_context *ctx, struct exec_list *list)
5845 {
5846 foreach_list_typed(nir_cf_node, node, node, list) {
5847 switch (node->type) {
5848 case nir_cf_node_block:
5849 if (!emit_block(ctx, nir_cf_node_as_block(node)))
5850 return false;
5851 break;
5852
5853 case nir_cf_node_if:
5854 if (!emit_if(ctx, nir_cf_node_as_if(node)))
5855 return false;
5856 break;
5857
5858 case nir_cf_node_loop:
5859 if (!emit_loop(ctx, nir_cf_node_as_loop(node)))
5860 return false;
5861 break;
5862
5863 default:
5864 unreachable("unsupported cf-list node");
5865 break;
5866 }
5867 }
5868 return true;
5869 }
5870
5871 static void
insert_sorted_by_binding(struct exec_list * var_list,nir_variable * new_var)5872 insert_sorted_by_binding(struct exec_list *var_list, nir_variable *new_var)
5873 {
5874 nir_foreach_variable_in_list(var, var_list) {
5875 if (var->data.binding > new_var->data.binding) {
5876 exec_node_insert_node_before(&var->node, &new_var->node);
5877 return;
5878 }
5879 }
5880 exec_list_push_tail(var_list, &new_var->node);
5881 }
5882
5883
5884 static void
sort_uniforms_by_binding_and_remove_structs(nir_shader * s)5885 sort_uniforms_by_binding_and_remove_structs(nir_shader *s)
5886 {
5887 struct exec_list new_list;
5888 exec_list_make_empty(&new_list);
5889
5890 nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) {
5891 exec_node_remove(&var->node);
5892 const struct glsl_type *type = glsl_without_array(var->type);
5893 if (!glsl_type_is_struct(type))
5894 insert_sorted_by_binding(&new_list, var);
5895 }
5896 exec_list_append(&s->variables, &new_list);
5897 }
5898
5899 static bool
emit_cbvs(struct ntd_context * ctx)5900 emit_cbvs(struct ntd_context *ctx)
5901 {
5902 if (ctx->opts->environment != DXIL_ENVIRONMENT_GL) {
5903 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ubo) {
5904 if (!emit_ubo_var(ctx, var))
5905 return false;
5906 }
5907 } else {
5908 if (ctx->shader->info.num_ubos) {
5909 const unsigned ubo_size = 16384 /*4096 vec4's*/;
5910 uint array_base = ctx->shader->info.first_ubo_is_default_ubo ? 1 : 0;
5911 bool has_ubo0 = ctx->shader->num_uniforms > 0 && ctx->shader->info.first_ubo_is_default_ubo;
5912 bool has_state_vars = ctx->opts->last_ubo_is_not_arrayed;
5913 unsigned ubo1_array_size = ctx->shader->info.num_ubos - array_base -
5914 (has_state_vars ? 1 : 0);
5915
5916 if (has_ubo0 &&
5917 !emit_cbv(ctx, 0, 0, ubo_size, 1, "__ubo_uniforms"))
5918 return false;
5919 if (ubo1_array_size &&
5920 !emit_cbv(ctx, array_base, 0, ubo_size, ubo1_array_size, "__ubos"))
5921 return false;
5922 if (has_state_vars &&
5923 !emit_cbv(ctx, ctx->shader->info.num_ubos - 1, 0, ubo_size, 1, "__ubo_state_vars"))
5924 return false;
5925 }
5926 }
5927
5928 return true;
5929 }
5930
5931 static bool
emit_scratch(struct ntd_context * ctx,nir_function_impl * impl)5932 emit_scratch(struct ntd_context *ctx, nir_function_impl *impl)
5933 {
5934 uint32_t index = 0;
5935 nir_foreach_function_temp_variable(var, impl)
5936 var->data.driver_location = index++;
5937
5938 if (ctx->scratchvars)
5939 ralloc_free((void *)ctx->scratchvars);
5940
5941 ctx->scratchvars = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
5942
5943 nir_foreach_function_temp_variable(var, impl) {
5944 const struct dxil_type *type = get_type_for_glsl_type(&ctx->mod, var->type);
5945 const struct dxil_value *length = dxil_module_get_int32_const(&ctx->mod, 1);
5946 const struct dxil_value *ptr = dxil_emit_alloca(&ctx->mod, type, length, 16);
5947 if (!ptr)
5948 return false;
5949
5950 ctx->scratchvars[var->data.driver_location] = ptr;
5951 }
5952
5953 return true;
5954 }
5955
5956 static bool
emit_function(struct ntd_context * ctx,nir_function * func,nir_function_impl * impl)5957 emit_function(struct ntd_context *ctx, nir_function *func, nir_function_impl *impl)
5958 {
5959 assert(func->num_params == 0);
5960 nir_metadata_require(impl, nir_metadata_block_index);
5961
5962 const char *attr_keys[2] = { NULL };
5963 const char *attr_values[2] = { NULL };
5964 if (ctx->shader->info.float_controls_execution_mode &
5965 (FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32 | FLOAT_CONTROLS_DENORM_PRESERVE_FP32))
5966 attr_keys[0] = "fp32-denorm-mode";
5967 if (ctx->shader->info.float_controls_execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32)
5968 attr_values[0] = "ftz";
5969 else if (ctx->shader->info.float_controls_execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32)
5970 attr_values[0] = "preserve";
5971
5972 const struct dxil_type *void_type = dxil_module_get_void_type(&ctx->mod);
5973 const struct dxil_type *func_type = dxil_module_add_function_type(&ctx->mod, void_type, NULL, 0);
5974 struct dxil_func_def *func_def = dxil_add_function_def(&ctx->mod, func->name, func_type, impl->num_blocks, attr_keys, attr_values);
5975 if (!func_def)
5976 return false;
5977
5978 if (func->is_entrypoint)
5979 ctx->main_func_def = func_def;
5980 else if (func == ctx->tess_ctrl_patch_constant_func)
5981 ctx->tess_ctrl_patch_constant_func_def = func_def;
5982
5983 ctx->defs = rzalloc_array(ctx->ralloc_ctx, struct dxil_def, impl->ssa_alloc);
5984 ctx->float_types = rzalloc_array(ctx->ralloc_ctx, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc));
5985 ctx->int_types = rzalloc_array(ctx->ralloc_ctx, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc));
5986 if (!ctx->defs || !ctx->float_types || !ctx->int_types)
5987 return false;
5988 ctx->num_defs = impl->ssa_alloc;
5989
5990 ctx->phis = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
5991 if (!ctx->phis)
5992 return false;
5993
5994 nir_gather_types(impl, ctx->float_types, ctx->int_types);
5995
5996 if (!emit_scratch(ctx, impl))
5997 return false;
5998
5999 if (!emit_static_indexing_handles(ctx))
6000 return false;
6001
6002 if (!emit_cf_list(ctx, &impl->body))
6003 return false;
6004
6005 hash_table_foreach(ctx->phis, entry) {
6006 if (!fixup_phi(ctx, (nir_phi_instr *)entry->key,
6007 (struct phi_block *)entry->data))
6008 return false;
6009 }
6010
6011 if (!dxil_emit_ret_void(&ctx->mod))
6012 return false;
6013
6014 ralloc_free(ctx->defs);
6015 ctx->defs = NULL;
6016 _mesa_hash_table_destroy(ctx->phis, NULL);
6017 return true;
6018 }
6019
6020 static bool
emit_module(struct ntd_context * ctx,const struct nir_to_dxil_options * opts)6021 emit_module(struct ntd_context *ctx, const struct nir_to_dxil_options *opts)
6022 {
6023 /* The validator forces us to emit resources in a specific order:
6024 * CBVs, Samplers, SRVs, UAVs. While we are at it also remove
6025 * stale struct uniforms, they are lowered but might not have been removed */
6026 sort_uniforms_by_binding_and_remove_structs(ctx->shader);
6027
6028 /* CBVs */
6029 if (!emit_cbvs(ctx))
6030 return false;
6031
6032 /* Samplers */
6033 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
6034 unsigned count = glsl_type_get_sampler_count(var->type);
6035 assert(count == 0 || glsl_type_is_bare_sampler(glsl_without_array(var->type)));
6036 if (count > 0 && !emit_sampler(ctx, var, count))
6037 return false;
6038 }
6039
6040 /* SRVs */
6041 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
6042 unsigned count = glsl_type_get_texture_count(var->type);
6043 assert(count == 0 || glsl_type_is_texture(glsl_without_array(var->type)));
6044 if (count > 0 && !emit_srv(ctx, var, count))
6045 return false;
6046 }
6047
6048 /* Handle read-only SSBOs as SRVs */
6049 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6050 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
6051 if ((var->data.access & ACCESS_NON_WRITEABLE) != 0) {
6052 unsigned count = 1;
6053 if (glsl_type_is_array(var->type))
6054 count = glsl_get_length(var->type);
6055 if (!emit_srv(ctx, var, count))
6056 return false;
6057 }
6058 }
6059 }
6060
6061 if (!emit_shared_vars(ctx))
6062 return false;
6063 if (!emit_global_consts(ctx))
6064 return false;
6065
6066 /* UAVs */
6067 if (ctx->shader->info.stage == MESA_SHADER_KERNEL) {
6068 if (!emit_globals(ctx, opts->num_kernel_globals))
6069 return false;
6070
6071 } else if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6072 /* Handle read/write SSBOs as UAVs */
6073 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
6074 if ((var->data.access & ACCESS_NON_WRITEABLE) == 0) {
6075 unsigned count = 1;
6076 if (glsl_type_is_array(var->type))
6077 count = glsl_get_length(var->type);
6078 if (!emit_uav(ctx, var->data.binding, var->data.descriptor_set,
6079 count, DXIL_COMP_TYPE_INVALID, 1,
6080 DXIL_RESOURCE_KIND_RAW_BUFFER, var->data.access, var->name))
6081 return false;
6082
6083 }
6084 }
6085 } else {
6086 for (unsigned i = 0; i < ctx->shader->info.num_ssbos; ++i) {
6087 char name[64];
6088 snprintf(name, sizeof(name), "__ssbo%d", i);
6089 if (!emit_uav(ctx, i, 0, 1, DXIL_COMP_TYPE_INVALID, 1,
6090 DXIL_RESOURCE_KIND_RAW_BUFFER, 0, name))
6091 return false;
6092 }
6093 /* To work around a WARP bug, bind these descriptors a second time in descriptor
6094 * space 2. Space 0 will be used for static indexing, while space 2 will be used
6095 * for dynamic indexing. Space 0 will be individual SSBOs in the DXIL shader, while
6096 * space 2 will be a single array.
6097 */
6098 if (ctx->shader->info.num_ssbos &&
6099 !emit_uav(ctx, 0, 2, ctx->shader->info.num_ssbos, DXIL_COMP_TYPE_INVALID, 1,
6100 DXIL_RESOURCE_KIND_RAW_BUFFER, 0, "__ssbo_dynamic"))
6101 return false;
6102 }
6103
6104 nir_foreach_image_variable(var, ctx->shader) {
6105 if (!emit_uav_var(ctx, var, glsl_type_get_image_count(var->type)))
6106 return false;
6107 }
6108
6109 ctx->mod.info.has_per_sample_input =
6110 BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
6111 ctx->shader->info.fs.uses_sample_shading ||
6112 ctx->shader->info.fs.uses_sample_qualifier;
6113 if (!ctx->mod.info.has_per_sample_input && ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
6114 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in | nir_var_system_value) {
6115 if (var->data.sample) {
6116 ctx->mod.info.has_per_sample_input = true;
6117 break;
6118 }
6119 }
6120 }
6121
6122 /* From the Vulkan spec 1.3.238, section 15.8:
6123 * When Sample Shading is enabled, the x and y components of FragCoord reflect the location
6124 * of one of the samples corresponding to the shader invocation.
6125 *
6126 * In other words, if the fragment shader is executing per-sample, then the position variable
6127 * should always be per-sample,
6128 *
6129 * Also:
6130 * The Centroid interpolation decoration is ignored, but allowed, on FragCoord.
6131 */
6132 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6133 nir_variable *pos_var = nir_find_variable_with_location(ctx->shader, nir_var_shader_in, VARYING_SLOT_POS);
6134 if (pos_var) {
6135 if (ctx->mod.info.has_per_sample_input)
6136 pos_var->data.sample = true;
6137 pos_var->data.centroid = false;
6138 }
6139 }
6140
6141 unsigned input_clip_size = ctx->mod.shader_kind == DXIL_PIXEL_SHADER ?
6142 ctx->shader->info.clip_distance_array_size : ctx->opts->input_clip_size;
6143 preprocess_signatures(&ctx->mod, ctx->shader, input_clip_size);
6144
6145 nir_foreach_function_with_impl(func, impl, ctx->shader) {
6146 if (!emit_function(ctx, func, impl))
6147 return false;
6148 }
6149
6150 if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
6151 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_out) {
6152 if (var->data.location == FRAG_RESULT_STENCIL) {
6153 ctx->mod.feats.stencil_ref = true;
6154 }
6155 }
6156 } else if (ctx->shader->info.stage == MESA_SHADER_VERTEX ||
6157 ctx->shader->info.stage == MESA_SHADER_TESS_EVAL) {
6158 if (ctx->shader->info.outputs_written &
6159 (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER))
6160 ctx->mod.feats.array_layer_from_vs_or_ds = true;
6161 } else if (ctx->shader->info.stage == MESA_SHADER_GEOMETRY ||
6162 ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) {
6163 if (ctx->shader->info.inputs_read &
6164 (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER))
6165 ctx->mod.feats.array_layer_from_vs_or_ds = true;
6166 }
6167
6168 if (ctx->mod.feats.native_low_precision && ctx->mod.minor_version < 2) {
6169 ctx->logger->log(ctx->logger->priv,
6170 "Shader uses 16bit, which requires shader model 6.2, but 6.2 is unsupported\n");
6171 return false;
6172 }
6173
6174 return emit_metadata(ctx) &&
6175 dxil_emit_module(&ctx->mod);
6176 }
6177
6178 static unsigned int
get_dxil_shader_kind(struct nir_shader * s)6179 get_dxil_shader_kind(struct nir_shader *s)
6180 {
6181 switch (s->info.stage) {
6182 case MESA_SHADER_VERTEX:
6183 return DXIL_VERTEX_SHADER;
6184 case MESA_SHADER_TESS_CTRL:
6185 return DXIL_HULL_SHADER;
6186 case MESA_SHADER_TESS_EVAL:
6187 return DXIL_DOMAIN_SHADER;
6188 case MESA_SHADER_GEOMETRY:
6189 return DXIL_GEOMETRY_SHADER;
6190 case MESA_SHADER_FRAGMENT:
6191 return DXIL_PIXEL_SHADER;
6192 case MESA_SHADER_KERNEL:
6193 case MESA_SHADER_COMPUTE:
6194 return DXIL_COMPUTE_SHADER;
6195 default:
6196 unreachable("unknown shader stage in nir_to_dxil");
6197 return DXIL_COMPUTE_SHADER;
6198 }
6199 }
6200
6201 static unsigned
lower_bit_size_callback(const nir_instr * instr,void * data)6202 lower_bit_size_callback(const nir_instr* instr, void *data)
6203 {
6204 if (instr->type != nir_instr_type_alu)
6205 return 0;
6206 nir_alu_instr *alu = nir_instr_as_alu(instr);
6207
6208 if (nir_op_infos[alu->op].is_conversion)
6209 return 0;
6210
6211 if (nir_op_is_vec_or_mov(alu->op))
6212 return 0;
6213
6214 unsigned num_inputs = nir_op_infos[alu->op].num_inputs;
6215 const struct nir_to_dxil_options *opts = (const struct nir_to_dxil_options*)data;
6216 unsigned min_bit_size = opts->lower_int16 ? 32 : 16;
6217
6218 unsigned ret = 0;
6219 for (unsigned i = 0; i < num_inputs; i++) {
6220 unsigned bit_size = nir_src_bit_size(alu->src[i].src);
6221 if (bit_size != 1 && bit_size < min_bit_size)
6222 ret = min_bit_size;
6223 }
6224
6225 return ret;
6226 }
6227
6228 static bool
vectorize_filter(unsigned align_mul,unsigned align_offset,unsigned bit_size,unsigned num_components,int64_t hole_size,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * data)6229 vectorize_filter(
6230 unsigned align_mul,
6231 unsigned align_offset,
6232 unsigned bit_size,
6233 unsigned num_components,
6234 int64_t hole_size,
6235 nir_intrinsic_instr *low, nir_intrinsic_instr *high,
6236 void *data)
6237 {
6238 return hole_size <= 0 && util_is_power_of_two_nonzero(num_components);
6239 }
6240
6241 struct lower_mem_bit_sizes_data {
6242 const nir_shader_compiler_options *nir_options;
6243 const struct nir_to_dxil_options *dxil_options;
6244 };
6245
6246 static nir_mem_access_size_align
lower_mem_access_bit_sizes_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size_in,uint32_t align_mul,uint32_t align_offset,bool offset_is_const,enum gl_access_qualifier access,const void * cb_data)6247 lower_mem_access_bit_sizes_cb(nir_intrinsic_op intrin,
6248 uint8_t bytes,
6249 uint8_t bit_size_in,
6250 uint32_t align_mul,
6251 uint32_t align_offset,
6252 bool offset_is_const,
6253 enum gl_access_qualifier access,
6254 const void *cb_data)
6255 {
6256 const struct lower_mem_bit_sizes_data *data = cb_data;
6257 unsigned max_bit_size = 32;
6258 unsigned min_bit_size = data->dxil_options->lower_int16 ? 32 : 16;
6259 unsigned closest_bit_size = MAX2(min_bit_size, MIN2(max_bit_size, bit_size_in));
6260 if (intrin == nir_intrinsic_load_ubo) {
6261 /* UBO loads can be done at whatever (supported) bit size, but require 16 byte
6262 * alignment and can load up to 16 bytes per instruction. However this pass requires
6263 * loading 16 bytes of data to get 16-byte alignment. We're going to run lower_ubo_vec4
6264 * which can deal with unaligned vec4s, so for this pass let's just deal with bit size
6265 * and total size restrictions. */
6266 return (nir_mem_access_size_align) {
6267 .align = closest_bit_size / 8,
6268 .bit_size = closest_bit_size,
6269 .num_components = DIV_ROUND_UP(MIN2(bytes, 16) * 8, closest_bit_size),
6270 .shift = nir_mem_access_shift_method_scalar,
6271 };
6272 }
6273
6274 assert(intrin == nir_intrinsic_load_ssbo || intrin == nir_intrinsic_store_ssbo);
6275 uint32_t align = nir_combined_align(align_mul, align_offset);
6276 if (align < min_bit_size / 8) {
6277 /* Unaligned load/store, use the minimum bit size, up to 4 components */
6278 unsigned ideal_num_components = intrin == nir_intrinsic_load_ssbo ?
6279 DIV_ROUND_UP(bytes * 8, min_bit_size) :
6280 (32 / min_bit_size);
6281 return (nir_mem_access_size_align) {
6282 .align = min_bit_size / 8,
6283 .bit_size = min_bit_size,
6284 .num_components = MIN2(4, ideal_num_components),
6285 .shift = nir_mem_access_shift_method_scalar,
6286 };
6287 }
6288
6289 /* Increase/decrease bit size to try to get closer to the requested byte size/align */
6290 unsigned bit_size = closest_bit_size;
6291 unsigned target = MIN2(bytes, align);
6292 while (target < bit_size / 8 && bit_size > min_bit_size)
6293 bit_size /= 2;
6294 while (target > bit_size / 8 * 4 && bit_size < max_bit_size)
6295 bit_size *= 2;
6296
6297 /* This is the best we can do */
6298 unsigned num_components = intrin == nir_intrinsic_load_ssbo ?
6299 DIV_ROUND_UP(bytes * 8, bit_size) :
6300 MAX2(1, (bytes * 8 / bit_size));
6301 return (nir_mem_access_size_align) {
6302 .align = bit_size / 8,
6303 .bit_size = bit_size,
6304 .num_components = MIN2(4, num_components),
6305 .shift = nir_mem_access_shift_method_scalar,
6306 };
6307 }
6308
6309 static void
optimize_nir(struct nir_shader * s,const struct nir_to_dxil_options * opts)6310 optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts)
6311 {
6312 bool progress;
6313 do {
6314 progress = false;
6315 NIR_PASS_V(s, nir_lower_vars_to_ssa);
6316 NIR_PASS(progress, s, nir_lower_indirect_derefs, nir_var_function_temp, 4);
6317 NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
6318 NIR_PASS(progress, s, nir_copy_prop);
6319 NIR_PASS(progress, s, nir_opt_copy_prop_vars);
6320 NIR_PASS(progress, s, nir_lower_bit_size, lower_bit_size_callback, (void*)opts);
6321 NIR_PASS(progress, s, dxil_nir_lower_8bit_conv);
6322 if (opts->lower_int16)
6323 NIR_PASS(progress, s, dxil_nir_lower_16bit_conv);
6324 NIR_PASS(progress, s, nir_opt_remove_phis);
6325 NIR_PASS(progress, s, nir_opt_dce);
6326 NIR_PASS(progress, s, nir_opt_if,
6327 nir_opt_if_optimize_phi_true_false | nir_opt_if_avoid_64bit_phis);
6328 NIR_PASS(progress, s, nir_opt_dead_cf);
6329 NIR_PASS(progress, s, nir_opt_cse);
6330 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
6331 NIR_PASS(progress, s, nir_opt_algebraic);
6332 NIR_PASS(progress, s, dxil_nir_algebraic);
6333 if (s->options->lower_int64_options)
6334 NIR_PASS(progress, s, nir_lower_int64);
6335 NIR_PASS(progress, s, nir_lower_alu);
6336 NIR_PASS(progress, s, nir_opt_constant_folding);
6337 NIR_PASS(progress, s, nir_opt_undef);
6338 NIR_PASS(progress, s, nir_opt_deref);
6339 NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16);
6340 NIR_PASS(progress, s, nir_lower_64bit_phis);
6341 NIR_PASS(progress, s, nir_lower_phis_to_scalar, true);
6342 NIR_PASS(progress, s, nir_opt_loop_unroll);
6343 NIR_PASS(progress, s, nir_lower_pack);
6344 NIR_PASS(progress, s, dxil_nir_remove_oob_array_accesses);
6345 NIR_PASS_V(s, nir_lower_system_values);
6346 } while (progress);
6347
6348 do {
6349 progress = false;
6350 NIR_PASS(progress, s, nir_opt_algebraic_late);
6351 } while (progress);
6352
6353 NIR_PASS_V(s, nir_lower_undef_to_zero);
6354 }
6355
6356 static
dxil_fill_validation_state(struct ntd_context * ctx,struct dxil_validation_state * state)6357 void dxil_fill_validation_state(struct ntd_context *ctx,
6358 struct dxil_validation_state *state)
6359 {
6360 unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
6361 sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
6362 state->num_resources = ctx->resources.size / resource_element_size;
6363 state->resources.v0 = (struct dxil_resource_v0*)ctx->resources.data;
6364 if (ctx->shader->info.subgroup_size >= SUBGROUP_SIZE_REQUIRE_4) {
6365 state->state.psv1.psv0.max_expected_wave_lane_count = ctx->shader->info.subgroup_size;
6366 state->state.psv1.psv0.min_expected_wave_lane_count = ctx->shader->info.subgroup_size;
6367 } else {
6368 state->state.psv1.psv0.max_expected_wave_lane_count = UINT_MAX;
6369 }
6370 state->state.psv1.shader_stage = (uint8_t)ctx->mod.shader_kind;
6371 state->state.psv1.uses_view_id = (uint8_t)ctx->mod.feats.view_id;
6372 state->state.psv1.sig_input_elements = (uint8_t)ctx->mod.num_sig_inputs;
6373 state->state.psv1.sig_output_elements = (uint8_t)ctx->mod.num_sig_outputs;
6374 state->state.psv1.sig_patch_const_or_prim_elements = (uint8_t)ctx->mod.num_sig_patch_consts;
6375
6376 switch (ctx->mod.shader_kind) {
6377 case DXIL_VERTEX_SHADER:
6378 state->state.psv1.psv0.vs.output_position_present = ctx->mod.info.has_out_position;
6379 break;
6380 case DXIL_PIXEL_SHADER:
6381 /* TODO: handle depth outputs */
6382 state->state.psv1.psv0.ps.depth_output = ctx->mod.info.has_out_depth;
6383 state->state.psv1.psv0.ps.sample_frequency =
6384 ctx->mod.info.has_per_sample_input;
6385 break;
6386 case DXIL_COMPUTE_SHADER:
6387 state->state.num_threads_x = MAX2(ctx->shader->info.workgroup_size[0], 1);
6388 state->state.num_threads_y = MAX2(ctx->shader->info.workgroup_size[1], 1);
6389 state->state.num_threads_z = MAX2(ctx->shader->info.workgroup_size[2], 1);
6390 break;
6391 case DXIL_GEOMETRY_SHADER:
6392 state->state.psv1.max_vertex_count = ctx->shader->info.gs.vertices_out;
6393 state->state.psv1.psv0.gs.input_primitive = dxil_get_input_primitive(ctx->shader->info.gs.input_primitive);
6394 state->state.psv1.psv0.gs.output_toplology = dxil_get_primitive_topology(ctx->shader->info.gs.output_primitive);
6395 state->state.psv1.psv0.gs.output_stream_mask = MAX2(ctx->shader->info.gs.active_stream_mask, 1);
6396 state->state.psv1.psv0.gs.output_position_present = ctx->mod.info.has_out_position;
6397 break;
6398 case DXIL_HULL_SHADER:
6399 state->state.psv1.psv0.hs.input_control_point_count = ctx->tess_input_control_point_count;
6400 state->state.psv1.psv0.hs.output_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
6401 state->state.psv1.psv0.hs.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
6402 state->state.psv1.psv0.hs.tessellator_output_primitive = get_tessellator_output_primitive(&ctx->shader->info);
6403 state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
6404 break;
6405 case DXIL_DOMAIN_SHADER:
6406 state->state.psv1.psv0.ds.input_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
6407 state->state.psv1.psv0.ds.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
6408 state->state.psv1.psv0.ds.output_position_present = ctx->mod.info.has_out_position;
6409 state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
6410 break;
6411 default:
6412 assert(0 && "Shader type not (yet) supported");
6413 }
6414 }
6415
6416 static nir_variable *
add_sysvalue(struct ntd_context * ctx,uint8_t value,char * name,int driver_location)6417 add_sysvalue(struct ntd_context *ctx,
6418 uint8_t value, char *name,
6419 int driver_location)
6420 {
6421
6422 nir_variable *var = rzalloc(ctx->shader, nir_variable);
6423 if (!var)
6424 return NULL;
6425 var->data.driver_location = driver_location;
6426 var->data.location = value;
6427 var->type = glsl_uint_type();
6428 var->name = name;
6429 var->data.mode = nir_var_system_value;
6430 var->data.interpolation = INTERP_MODE_FLAT;
6431 return var;
6432 }
6433
6434 static bool
append_input_or_sysvalue(struct ntd_context * ctx,int input_loc,int sv_slot,char * name,int driver_location)6435 append_input_or_sysvalue(struct ntd_context *ctx,
6436 int input_loc, int sv_slot,
6437 char *name, int driver_location)
6438 {
6439 if (input_loc >= 0) {
6440 /* Check inputs whether a variable is available the corresponds
6441 * to the sysvalue */
6442 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
6443 if (var->data.location == input_loc) {
6444 ctx->system_value[sv_slot] = var;
6445 return true;
6446 }
6447 }
6448 }
6449
6450 ctx->system_value[sv_slot] = add_sysvalue(ctx, sv_slot, name, driver_location);
6451 if (!ctx->system_value[sv_slot])
6452 return false;
6453
6454 nir_shader_add_variable(ctx->shader, ctx->system_value[sv_slot]);
6455 return true;
6456 }
6457
6458 struct sysvalue_name {
6459 gl_system_value value;
6460 int slot;
6461 char *name;
6462 gl_shader_stage only_in_shader;
6463 } possible_sysvalues[] = {
6464 {SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, -1, "SV_VertexID", MESA_SHADER_NONE},
6465 {SYSTEM_VALUE_INSTANCE_ID, -1, "SV_InstanceID", MESA_SHADER_NONE},
6466 {SYSTEM_VALUE_FRONT_FACE, VARYING_SLOT_FACE, "SV_IsFrontFace", MESA_SHADER_NONE},
6467 {SYSTEM_VALUE_PRIMITIVE_ID, VARYING_SLOT_PRIMITIVE_ID, "SV_PrimitiveID", MESA_SHADER_GEOMETRY},
6468 {SYSTEM_VALUE_SAMPLE_ID, -1, "SV_SampleIndex", MESA_SHADER_NONE},
6469 };
6470
6471 static bool
allocate_sysvalues(struct ntd_context * ctx)6472 allocate_sysvalues(struct ntd_context *ctx)
6473 {
6474 unsigned driver_location = 0;
6475 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in)
6476 driver_location = MAX2(driver_location, var->data.driver_location + 1);
6477 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_system_value)
6478 driver_location = MAX2(driver_location, var->data.driver_location + 1);
6479
6480 if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
6481 !BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID)) {
6482 bool need_sample_id = ctx->shader->info.fs.uses_sample_shading;
6483
6484 /* "var->data.sample = true" sometimes just mean, "I want per-sample
6485 * shading", which explains why we can end up with vars having flat
6486 * interpolation with the per-sample bit set. If there's only such
6487 * type of variables, we need to tell DXIL that we read SV_SampleIndex
6488 * to make DXIL validation happy.
6489 */
6490 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
6491 bool var_can_be_sample_rate = !var->data.centroid && var->data.interpolation != INTERP_MODE_FLAT;
6492 /* If there's an input that will actually force sample-rate shading, then we don't
6493 * need SV_SampleIndex. */
6494 if (var->data.sample && var_can_be_sample_rate) {
6495 need_sample_id = false;
6496 break;
6497 }
6498 /* If there's an input that wants to be sample-rate, but can't be, then we might
6499 * need SV_SampleIndex. */
6500 if (var->data.sample && !var_can_be_sample_rate)
6501 need_sample_id = true;
6502 }
6503
6504 if (need_sample_id)
6505 BITSET_SET(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
6506 }
6507
6508 for (unsigned i = 0; i < ARRAY_SIZE(possible_sysvalues); ++i) {
6509 struct sysvalue_name *info = &possible_sysvalues[i];
6510 if (info->only_in_shader != MESA_SHADER_NONE &&
6511 info->only_in_shader != ctx->shader->info.stage)
6512 continue;
6513 if (BITSET_TEST(ctx->shader->info.system_values_read, info->value)) {
6514 if (!append_input_or_sysvalue(ctx, info->slot,
6515 info->value, info->name,
6516 driver_location++))
6517 return false;
6518 }
6519 }
6520 return true;
6521 }
6522
6523 static int
type_size_vec4(const struct glsl_type * type,bool bindless)6524 type_size_vec4(const struct glsl_type *type, bool bindless)
6525 {
6526 return glsl_count_attribute_slots(type, false);
6527 }
6528
6529 static const unsigned dxil_validator_min_capable_version = DXIL_VALIDATOR_1_4;
6530 static const unsigned dxil_validator_max_capable_version = DXIL_VALIDATOR_1_8;
6531 static const unsigned dxil_min_shader_model = SHADER_MODEL_6_0;
6532 static const unsigned dxil_max_shader_model = SHADER_MODEL_6_8;
6533
6534 bool
nir_to_dxil(struct nir_shader * s,const struct nir_to_dxil_options * opts,const struct dxil_logger * logger,struct blob * blob)6535 nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts,
6536 const struct dxil_logger *logger, struct blob *blob)
6537 {
6538 assert(opts);
6539 bool retval = true;
6540 debug_dxil = (int)debug_get_option_debug_dxil();
6541 blob_init(blob);
6542
6543 if (opts->shader_model_max < dxil_min_shader_model) {
6544 debug_printf("D3D12: cannot support emitting shader models lower than %d.%d\n",
6545 dxil_min_shader_model >> 16,
6546 dxil_min_shader_model & 0xffff);
6547 return false;
6548 }
6549
6550 if (opts->shader_model_max > dxil_max_shader_model) {
6551 debug_printf("D3D12: cannot support emitting higher than shader model %d.%d\n",
6552 dxil_max_shader_model >> 16,
6553 dxil_max_shader_model & 0xffff);
6554 return false;
6555 }
6556
6557 if (opts->validator_version_max != NO_DXIL_VALIDATION &&
6558 opts->validator_version_max < dxil_validator_min_capable_version) {
6559 debug_printf("D3D12: Invalid validator version %d.%d, must be 1.4 or greater\n",
6560 opts->validator_version_max >> 16,
6561 opts->validator_version_max & 0xffff);
6562 return false;
6563 }
6564
6565 /* If no validation, write a blob as if it was going to be validated by the newest understood validator.
6566 * Same if the validator is newer than we know how to write for.
6567 */
6568 uint32_t validator_version =
6569 opts->validator_version_max == NO_DXIL_VALIDATION ||
6570 opts->validator_version_max > dxil_validator_max_capable_version ?
6571 dxil_validator_max_capable_version : opts->validator_version_max;
6572
6573 struct ntd_context *ctx = calloc(1, sizeof(*ctx));
6574 if (!ctx)
6575 return false;
6576
6577 ctx->opts = opts;
6578 ctx->shader = s;
6579 ctx->logger = logger ? logger : &default_logger;
6580
6581 ctx->ralloc_ctx = ralloc_context(NULL);
6582 if (!ctx->ralloc_ctx) {
6583 retval = false;
6584 goto out;
6585 }
6586
6587 util_dynarray_init(&ctx->srv_metadata_nodes, ctx->ralloc_ctx);
6588 util_dynarray_init(&ctx->uav_metadata_nodes, ctx->ralloc_ctx);
6589 util_dynarray_init(&ctx->cbv_metadata_nodes, ctx->ralloc_ctx);
6590 util_dynarray_init(&ctx->sampler_metadata_nodes, ctx->ralloc_ctx);
6591 util_dynarray_init(&ctx->resources, ctx->ralloc_ctx);
6592 dxil_module_init(&ctx->mod, ctx->ralloc_ctx);
6593 ctx->mod.shader_kind = get_dxil_shader_kind(s);
6594 ctx->mod.major_version = 6;
6595 /* Use the highest shader model that's supported and can be validated */
6596 ctx->mod.minor_version =
6597 MIN2(opts->shader_model_max & 0xffff, validator_version & 0xffff);
6598 ctx->mod.major_validator = validator_version >> 16;
6599 ctx->mod.minor_validator = validator_version & 0xffff;
6600
6601 if (s->info.stage <= MESA_SHADER_FRAGMENT) {
6602 uint64_t in_mask =
6603 s->info.stage == MESA_SHADER_VERTEX ?
6604 0 : (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
6605 uint64_t out_mask =
6606 s->info.stage == MESA_SHADER_FRAGMENT ?
6607 ((1ull << FRAG_RESULT_STENCIL) | (1ull << FRAG_RESULT_SAMPLE_MASK)) :
6608 (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
6609
6610 NIR_PASS_V(s, dxil_nir_fix_io_uint_type, in_mask, out_mask);
6611 }
6612
6613 NIR_PASS_V(s, dxil_nir_lower_fquantize2f16);
6614 NIR_PASS_V(s, nir_lower_frexp);
6615 NIR_PASS_V(s, nir_lower_flrp, 16 | 32 | 64, true);
6616 NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32);
6617 NIR_PASS_V(s, dxil_nir_ensure_position_writes);
6618 NIR_PASS_V(s, dxil_nir_lower_system_values);
6619 NIR_PASS_V(s, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_system_value | nir_var_shader_out, NULL, NULL);
6620
6621 /* Do a round of optimization to try to vectorize loads/stores. Otherwise the addresses used for loads
6622 * might be too opaque for the pass to see that they're next to each other. */
6623 optimize_nir(s, opts);
6624
6625 /* Vectorize UBO/SSBO accesses aggressively. This can help increase alignment to enable us to do better
6626 * chunking of loads and stores after lowering bit sizes. Ignore load/store size limitations here, we'll
6627 * address them with lower_mem_access_bit_sizes */
6628 nir_load_store_vectorize_options vectorize_opts = {
6629 .callback = vectorize_filter,
6630 .modes = nir_var_mem_ubo | nir_var_mem_ssbo,
6631 };
6632 NIR_PASS_V(s, nir_opt_load_store_vectorize, &vectorize_opts);
6633
6634 /* Now that they're bloated to the max, address bit size restrictions and overall size limitations for
6635 * a single load/store op. */
6636 struct lower_mem_bit_sizes_data mem_size_data = { s->options, opts };
6637 nir_lower_mem_access_bit_sizes_options mem_size_options = {
6638 .modes = nir_var_mem_ubo | nir_var_mem_ssbo,
6639 .callback = lower_mem_access_bit_sizes_cb,
6640 .may_lower_unaligned_stores_to_atomics = true,
6641 .cb_data = &mem_size_data
6642 };
6643 NIR_PASS_V(s, nir_lower_mem_access_bit_sizes, &mem_size_options);
6644
6645 /* Lastly, conver byte-address UBO loads to vec-addressed. This pass can also deal with selecting sub-
6646 * components from the load and dealing with vec-straddling loads. */
6647 NIR_PASS_V(s, nir_lower_ubo_vec4);
6648
6649 if (opts->shader_model_max < SHADER_MODEL_6_6) {
6650 /* In a later pass, load_helper_invocation will be lowered to sample mask based fallback,
6651 * so both load- and is- will be emulated eventually.
6652 */
6653 NIR_PASS_V(s, nir_lower_is_helper_invocation);
6654 }
6655
6656 if (ctx->mod.shader_kind == DXIL_HULL_SHADER)
6657 NIR_PASS_V(s, dxil_nir_split_tess_ctrl, &ctx->tess_ctrl_patch_constant_func);
6658
6659 if (ctx->mod.shader_kind == DXIL_HULL_SHADER ||
6660 ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
6661 /* Make sure any derefs are gone after lower_io before updating tess level vars */
6662 NIR_PASS_V(s, nir_opt_dce);
6663 NIR_PASS_V(s, dxil_nir_fixup_tess_level_for_domain);
6664 }
6665
6666 optimize_nir(s, opts);
6667
6668 NIR_PASS_V(s, nir_remove_dead_variables,
6669 nir_var_function_temp | nir_var_mem_constant | nir_var_mem_shared, NULL);
6670
6671 if (!allocate_sysvalues(ctx))
6672 return false;
6673
6674 NIR_PASS_V(s, dxil_nir_lower_sysval_to_load_input, ctx->system_value);
6675 NIR_PASS_V(s, nir_opt_dce);
6676
6677 /* This needs to be after any copy prop is done to prevent these movs from being erased */
6678 NIR_PASS_V(s, dxil_nir_move_consts);
6679 NIR_PASS_V(s, nir_opt_dce);
6680
6681 NIR_PASS_V(s, dxil_nir_guess_image_formats);
6682
6683 if (debug_dxil & DXIL_DEBUG_VERBOSE)
6684 nir_print_shader(s, stderr);
6685
6686 if (!emit_module(ctx, opts)) {
6687 debug_printf("D3D12: dxil_container_add_module failed\n");
6688 retval = false;
6689 goto out;
6690 }
6691
6692 if (debug_dxil & DXIL_DEBUG_DUMP_MODULE) {
6693 struct dxil_dumper *dumper = dxil_dump_create();
6694 dxil_dump_module(dumper, &ctx->mod);
6695 fprintf(stderr, "\n");
6696 dxil_dump_buf_to_file(dumper, stderr);
6697 fprintf(stderr, "\n\n");
6698 dxil_dump_free(dumper);
6699 }
6700
6701 struct dxil_container container;
6702 dxil_container_init(&container);
6703 /* Native low precision disables min-precision */
6704 if (ctx->mod.feats.native_low_precision)
6705 ctx->mod.feats.min_precision = false;
6706 if (!dxil_container_add_features(&container, &ctx->mod.feats)) {
6707 debug_printf("D3D12: dxil_container_add_features failed\n");
6708 retval = false;
6709 goto out;
6710 }
6711
6712 if (!dxil_container_add_io_signature(&container,
6713 DXIL_ISG1,
6714 ctx->mod.num_sig_inputs,
6715 ctx->mod.inputs,
6716 ctx->mod.minor_validator >= 7)) {
6717 debug_printf("D3D12: failed to write input signature\n");
6718 retval = false;
6719 goto out;
6720 }
6721
6722 if (!dxil_container_add_io_signature(&container,
6723 DXIL_OSG1,
6724 ctx->mod.num_sig_outputs,
6725 ctx->mod.outputs,
6726 ctx->mod.minor_validator >= 7)) {
6727 debug_printf("D3D12: failed to write output signature\n");
6728 retval = false;
6729 goto out;
6730 }
6731
6732 if ((ctx->mod.shader_kind == DXIL_HULL_SHADER ||
6733 ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) &&
6734 !dxil_container_add_io_signature(&container,
6735 DXIL_PSG1,
6736 ctx->mod.num_sig_patch_consts,
6737 ctx->mod.patch_consts,
6738 ctx->mod.minor_validator >= 7)) {
6739 debug_printf("D3D12: failed to write patch constant signature\n");
6740 retval = false;
6741 goto out;
6742 }
6743
6744 struct dxil_validation_state validation_state;
6745 memset(&validation_state, 0, sizeof(validation_state));
6746 dxil_fill_validation_state(ctx, &validation_state);
6747
6748 if (!dxil_container_add_state_validation(&container,&ctx->mod,
6749 &validation_state)) {
6750 debug_printf("D3D12: failed to write state-validation\n");
6751 retval = false;
6752 goto out;
6753 }
6754
6755 if (!dxil_container_add_module(&container, &ctx->mod)) {
6756 debug_printf("D3D12: failed to write module\n");
6757 retval = false;
6758 goto out;
6759 }
6760
6761 if (!dxil_container_write(&container, blob)) {
6762 debug_printf("D3D12: dxil_container_write failed\n");
6763 retval = false;
6764 goto out;
6765 }
6766 dxil_container_finish(&container);
6767
6768 if (debug_dxil & DXIL_DEBUG_DUMP_BLOB) {
6769 static int shader_id = 0;
6770 char buffer[64];
6771 snprintf(buffer, sizeof(buffer), "shader_%s_%d.blob",
6772 get_shader_kind_str(ctx->mod.shader_kind), shader_id++);
6773 debug_printf("Try to write blob to %s\n", buffer);
6774 FILE *f = fopen(buffer, "wb");
6775 if (f) {
6776 fwrite(blob->data, 1, blob->size, f);
6777 fclose(f);
6778 }
6779 }
6780
6781 out:
6782 dxil_module_release(&ctx->mod);
6783 ralloc_free(ctx->ralloc_ctx);
6784 free(ctx);
6785 return retval;
6786 }
6787