1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_to_dxil.h"
25
26 #include "dxil_container.h"
27 #include "dxil_dump.h"
28 #include "dxil_enums.h"
29 #include "dxil_function.h"
30 #include "dxil_module.h"
31 #include "dxil_nir.h"
32 #include "dxil_signature.h"
33
34 #include "nir/nir_builder.h"
35 #include "nir_deref.h"
36 #include "util/ralloc.h"
37 #include "util/u_debug.h"
38 #include "util/u_dynarray.h"
39 #include "util/u_math.h"
40
41 #include "git_sha1.h"
42
43 #include "vulkan/vulkan_core.h"
44
45 #include <stdint.h>
46
47 int debug_dxil = 0;
48
49 static const struct debug_named_value
50 dxil_debug_options[] = {
51 { "verbose", DXIL_DEBUG_VERBOSE, NULL },
52 { "dump_blob", DXIL_DEBUG_DUMP_BLOB , "Write shader blobs" },
53 { "trace", DXIL_DEBUG_TRACE , "Trace instruction conversion" },
54 { "dump_module", DXIL_DEBUG_DUMP_MODULE, "dump module tree to stderr"},
55 DEBUG_NAMED_VALUE_END
56 };
57
58 DEBUG_GET_ONCE_FLAGS_OPTION(debug_dxil, "DXIL_DEBUG", dxil_debug_options, 0)
59
60 static void
log_nir_instr_unsupported(const struct dxil_logger * logger,const char * message_prefix,const nir_instr * instr)61 log_nir_instr_unsupported(const struct dxil_logger *logger,
62 const char *message_prefix, const nir_instr *instr)
63 {
64 char *msg = NULL;
65 char *instr_str = nir_instr_as_str(instr, NULL);
66 asprintf(&msg, "%s: %s\n", message_prefix, instr_str);
67 ralloc_free(instr_str);
68 assert(msg);
69 logger->log(logger->priv, msg);
70 free(msg);
71 }
72
73 static void
default_logger_func(void * priv,const char * msg)74 default_logger_func(void *priv, const char *msg)
75 {
76 fprintf(stderr, "%s", msg);
77 unreachable("Unhandled error");
78 }
79
80 static const struct dxil_logger default_logger = { .priv = NULL, .log = default_logger_func };
81
82 #define TRACE_CONVERSION(instr) \
83 if (debug_dxil & DXIL_DEBUG_TRACE) \
84 do { \
85 fprintf(stderr, "Convert '"); \
86 nir_print_instr(instr, stderr); \
87 fprintf(stderr, "'\n"); \
88 } while (0)
89
90 static const nir_shader_compiler_options
91 nir_options = {
92 .compact_arrays = true,
93 .lower_ineg = true,
94 .lower_fneg = true,
95 .lower_ffma16 = true,
96 .lower_ffma32 = true,
97 .lower_isign = true,
98 .lower_fsign = true,
99 .lower_iabs = true,
100 .lower_fmod = true,
101 .lower_fpow = true,
102 .lower_scmp = true,
103 .lower_ldexp = true,
104 .lower_flrp16 = true,
105 .lower_flrp32 = true,
106 .lower_flrp64 = true,
107 .lower_bitfield_extract = true,
108 .lower_ifind_msb = true,
109 .lower_ufind_msb = true,
110 .lower_extract_word = true,
111 .lower_extract_byte = true,
112 .lower_insert_word = true,
113 .lower_insert_byte = true,
114 .lower_hadd = true,
115 .lower_uadd_sat = true,
116 .lower_usub_sat = true,
117 .lower_iadd_sat = true,
118 .lower_uadd_carry = true,
119 .lower_usub_borrow = true,
120 .lower_mul_high = true,
121 .lower_pack_half_2x16 = true,
122 .lower_pack_unorm_4x8 = true,
123 .lower_pack_snorm_4x8 = true,
124 .lower_pack_snorm_2x16 = true,
125 .lower_pack_unorm_2x16 = true,
126 .lower_pack_64_2x32_split = true,
127 .lower_pack_32_2x16_split = true,
128 .lower_pack_64_4x16 = true,
129 .lower_unpack_64_2x32_split = true,
130 .lower_unpack_32_2x16_split = true,
131 .lower_unpack_half_2x16 = true,
132 .lower_unpack_snorm_2x16 = true,
133 .lower_unpack_snorm_4x8 = true,
134 .lower_unpack_unorm_2x16 = true,
135 .lower_unpack_unorm_4x8 = true,
136 .lower_interpolate_at = true,
137 .has_fsub = true,
138 .has_isub = true,
139 .has_bfe = true,
140 .has_find_msb_rev = true,
141 .vertex_id_zero_based = true,
142 .lower_base_vertex = true,
143 .lower_helper_invocation = true,
144 .has_cs_global_id = true,
145 .lower_mul_2x32_64 = true,
146 .lower_doubles_options =
147 nir_lower_drcp |
148 nir_lower_dsqrt |
149 nir_lower_drsq |
150 nir_lower_dfract |
151 nir_lower_dtrunc |
152 nir_lower_dfloor |
153 nir_lower_dceil |
154 nir_lower_dround_even,
155 .lower_uniforms_to_ubo = true,
156 .max_unroll_iterations = 32, /* arbitrary */
157 .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out),
158 .lower_device_index_to_zero = true,
159 .support_16bit_alu = true,
160 .preserve_mediump = true,
161 .discard_is_demote = true,
162 .scalarize_ddx = true,
163 .io_options = nir_io_dont_use_pos_for_non_fs_varyings | nir_io_mediump_is_32bit,
164 };
165
166 const nir_shader_compiler_options*
dxil_get_base_nir_compiler_options(void)167 dxil_get_base_nir_compiler_options(void)
168 {
169 return &nir_options;
170 }
171
172 void
dxil_get_nir_compiler_options(nir_shader_compiler_options * options,enum dxil_shader_model shader_model_max,unsigned supported_int_sizes,unsigned supported_float_sizes)173 dxil_get_nir_compiler_options(nir_shader_compiler_options *options,
174 enum dxil_shader_model shader_model_max,
175 unsigned supported_int_sizes,
176 unsigned supported_float_sizes)
177 {
178 *options = nir_options;
179 if (!(supported_int_sizes & 64)) {
180 options->lower_pack_64_2x32_split = false;
181 options->lower_unpack_64_2x32_split = false;
182 options->lower_int64_options = ~0;
183 }
184 if (!(supported_float_sizes & 64))
185 options->lower_doubles_options = ~0;
186 if (shader_model_max >= SHADER_MODEL_6_4) {
187 options->has_sdot_4x8 = true;
188 options->has_udot_4x8 = true;
189 }
190 }
191
192 static bool
emit_llvm_ident(struct dxil_module * m)193 emit_llvm_ident(struct dxil_module *m)
194 {
195 const struct dxil_mdnode *compiler = dxil_get_metadata_string(m, "Mesa version " PACKAGE_VERSION MESA_GIT_SHA1);
196 if (!compiler)
197 return false;
198
199 const struct dxil_mdnode *llvm_ident = dxil_get_metadata_node(m, &compiler, 1);
200 return llvm_ident &&
201 dxil_add_metadata_named_node(m, "llvm.ident", &llvm_ident, 1);
202 }
203
204 static bool
emit_named_version(struct dxil_module * m,const char * name,int major,int minor)205 emit_named_version(struct dxil_module *m, const char *name,
206 int major, int minor)
207 {
208 const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, major);
209 const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, minor);
210 const struct dxil_mdnode *version_nodes[] = { major_node, minor_node };
211 const struct dxil_mdnode *version = dxil_get_metadata_node(m, version_nodes,
212 ARRAY_SIZE(version_nodes));
213 return dxil_add_metadata_named_node(m, name, &version, 1);
214 }
215
216 static const char *
get_shader_kind_str(enum dxil_shader_kind kind)217 get_shader_kind_str(enum dxil_shader_kind kind)
218 {
219 switch (kind) {
220 case DXIL_PIXEL_SHADER:
221 return "ps";
222 case DXIL_VERTEX_SHADER:
223 return "vs";
224 case DXIL_GEOMETRY_SHADER:
225 return "gs";
226 case DXIL_HULL_SHADER:
227 return "hs";
228 case DXIL_DOMAIN_SHADER:
229 return "ds";
230 case DXIL_COMPUTE_SHADER:
231 return "cs";
232 default:
233 unreachable("invalid shader kind");
234 }
235 }
236
237 static bool
emit_dx_shader_model(struct dxil_module * m)238 emit_dx_shader_model(struct dxil_module *m)
239 {
240 const struct dxil_mdnode *type_node = dxil_get_metadata_string(m, get_shader_kind_str(m->shader_kind));
241 const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, m->major_version);
242 const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, m->minor_version);
243 const struct dxil_mdnode *shader_model[] = { type_node, major_node,
244 minor_node };
245 const struct dxil_mdnode *dx_shader_model = dxil_get_metadata_node(m, shader_model, ARRAY_SIZE(shader_model));
246
247 return dxil_add_metadata_named_node(m, "dx.shaderModel",
248 &dx_shader_model, 1);
249 }
250
251 enum {
252 DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG = 0,
253 DXIL_STRUCTURED_BUFFER_ELEMENT_STRIDE_TAG = 1
254 };
255
256 enum dxil_intr {
257 DXIL_INTR_LOAD_INPUT = 4,
258 DXIL_INTR_STORE_OUTPUT = 5,
259 DXIL_INTR_FABS = 6,
260 DXIL_INTR_SATURATE = 7,
261
262 DXIL_INTR_ISFINITE = 10,
263 DXIL_INTR_ISNORMAL = 11,
264
265 DXIL_INTR_FCOS = 12,
266 DXIL_INTR_FSIN = 13,
267
268 DXIL_INTR_FEXP2 = 21,
269 DXIL_INTR_FRC = 22,
270 DXIL_INTR_FLOG2 = 23,
271
272 DXIL_INTR_SQRT = 24,
273 DXIL_INTR_RSQRT = 25,
274 DXIL_INTR_ROUND_NE = 26,
275 DXIL_INTR_ROUND_NI = 27,
276 DXIL_INTR_ROUND_PI = 28,
277 DXIL_INTR_ROUND_Z = 29,
278
279 DXIL_INTR_BFREV = 30,
280 DXIL_INTR_COUNTBITS = 31,
281 DXIL_INTR_FIRSTBIT_LO = 32,
282 DXIL_INTR_FIRSTBIT_HI = 33,
283 DXIL_INTR_FIRSTBIT_SHI = 34,
284
285 DXIL_INTR_FMAX = 35,
286 DXIL_INTR_FMIN = 36,
287 DXIL_INTR_IMAX = 37,
288 DXIL_INTR_IMIN = 38,
289 DXIL_INTR_UMAX = 39,
290 DXIL_INTR_UMIN = 40,
291
292 DXIL_INTR_FMA = 47,
293
294 DXIL_INTR_IBFE = 51,
295 DXIL_INTR_UBFE = 52,
296 DXIL_INTR_BFI = 53,
297
298 DXIL_INTR_CREATE_HANDLE = 57,
299 DXIL_INTR_CBUFFER_LOAD_LEGACY = 59,
300
301 DXIL_INTR_SAMPLE = 60,
302 DXIL_INTR_SAMPLE_BIAS = 61,
303 DXIL_INTR_SAMPLE_LEVEL = 62,
304 DXIL_INTR_SAMPLE_GRAD = 63,
305 DXIL_INTR_SAMPLE_CMP = 64,
306 DXIL_INTR_SAMPLE_CMP_LVL_ZERO = 65,
307
308 DXIL_INTR_TEXTURE_LOAD = 66,
309 DXIL_INTR_TEXTURE_STORE = 67,
310
311 DXIL_INTR_BUFFER_LOAD = 68,
312 DXIL_INTR_BUFFER_STORE = 69,
313
314 DXIL_INTR_TEXTURE_SIZE = 72,
315 DXIL_INTR_TEXTURE_GATHER = 73,
316 DXIL_INTR_TEXTURE_GATHER_CMP = 74,
317
318 DXIL_INTR_TEXTURE2DMS_GET_SAMPLE_POSITION = 75,
319 DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION = 76,
320 DXIL_INTR_RENDER_TARGET_GET_SAMPLE_COUNT = 77,
321
322 DXIL_INTR_ATOMIC_BINOP = 78,
323 DXIL_INTR_ATOMIC_CMPXCHG = 79,
324 DXIL_INTR_BARRIER = 80,
325 DXIL_INTR_TEXTURE_LOD = 81,
326
327 DXIL_INTR_DISCARD = 82,
328 DXIL_INTR_DDX_COARSE = 83,
329 DXIL_INTR_DDY_COARSE = 84,
330 DXIL_INTR_DDX_FINE = 85,
331 DXIL_INTR_DDY_FINE = 86,
332
333 DXIL_INTR_EVAL_SNAPPED = 87,
334 DXIL_INTR_EVAL_SAMPLE_INDEX = 88,
335 DXIL_INTR_EVAL_CENTROID = 89,
336
337 DXIL_INTR_SAMPLE_INDEX = 90,
338 DXIL_INTR_COVERAGE = 91,
339
340 DXIL_INTR_THREAD_ID = 93,
341 DXIL_INTR_GROUP_ID = 94,
342 DXIL_INTR_THREAD_ID_IN_GROUP = 95,
343 DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP = 96,
344
345 DXIL_INTR_EMIT_STREAM = 97,
346 DXIL_INTR_CUT_STREAM = 98,
347
348 DXIL_INTR_GS_INSTANCE_ID = 100,
349
350 DXIL_INTR_MAKE_DOUBLE = 101,
351 DXIL_INTR_SPLIT_DOUBLE = 102,
352
353 DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT = 103,
354 DXIL_INTR_LOAD_PATCH_CONSTANT = 104,
355 DXIL_INTR_DOMAIN_LOCATION = 105,
356 DXIL_INTR_STORE_PATCH_CONSTANT = 106,
357 DXIL_INTR_OUTPUT_CONTROL_POINT_ID = 107,
358 DXIL_INTR_PRIMITIVE_ID = 108,
359
360 DXIL_INTR_WAVE_IS_FIRST_LANE = 110,
361 DXIL_INTR_WAVE_GET_LANE_INDEX = 111,
362 DXIL_INTR_WAVE_GET_LANE_COUNT = 112,
363 DXIL_INTR_WAVE_ANY_TRUE = 113,
364 DXIL_INTR_WAVE_ALL_TRUE = 114,
365 DXIL_INTR_WAVE_ACTIVE_ALL_EQUAL = 115,
366 DXIL_INTR_WAVE_ACTIVE_BALLOT = 116,
367 DXIL_INTR_WAVE_READ_LANE_AT = 117,
368 DXIL_INTR_WAVE_READ_LANE_FIRST = 118,
369 DXIL_INTR_WAVE_ACTIVE_OP = 119,
370 DXIL_INTR_WAVE_ACTIVE_BIT = 120,
371 DXIL_INTR_WAVE_PREFIX_OP = 121,
372 DXIL_INTR_QUAD_READ_LANE_AT = 122,
373 DXIL_INTR_QUAD_OP = 123,
374
375 DXIL_INTR_LEGACY_F32TOF16 = 130,
376 DXIL_INTR_LEGACY_F16TOF32 = 131,
377
378 DXIL_INTR_ATTRIBUTE_AT_VERTEX = 137,
379 DXIL_INTR_VIEW_ID = 138,
380
381 DXIL_INTR_RAW_BUFFER_LOAD = 139,
382 DXIL_INTR_RAW_BUFFER_STORE = 140,
383
384 DXIL_INTR_DOT4_ADD_I8_PACKED = 163,
385 DXIL_INTR_DOT4_ADD_U8_PACKED = 164,
386
387 DXIL_INTR_ANNOTATE_HANDLE = 216,
388 DXIL_INTR_CREATE_HANDLE_FROM_BINDING = 217,
389 DXIL_INTR_CREATE_HANDLE_FROM_HEAP = 218,
390
391 DXIL_INTR_IS_HELPER_LANE = 221,
392 DXIL_INTR_SAMPLE_CMP_LEVEL = 224,
393 DXIL_INTR_SAMPLE_CMP_GRAD = 254,
394 DXIL_INTR_SAMPLE_CMP_BIAS = 255,
395
396 DXIL_INTR_START_VERTEX_LOCATION = 256,
397 DXIL_INTR_START_INSTANCE_LOCATION = 257,
398 };
399
400 enum dxil_atomic_op {
401 DXIL_ATOMIC_ADD = 0,
402 DXIL_ATOMIC_AND = 1,
403 DXIL_ATOMIC_OR = 2,
404 DXIL_ATOMIC_XOR = 3,
405 DXIL_ATOMIC_IMIN = 4,
406 DXIL_ATOMIC_IMAX = 5,
407 DXIL_ATOMIC_UMIN = 6,
408 DXIL_ATOMIC_UMAX = 7,
409 DXIL_ATOMIC_EXCHANGE = 8,
410 };
411
412 static enum dxil_atomic_op
nir_atomic_to_dxil_atomic(nir_atomic_op op)413 nir_atomic_to_dxil_atomic(nir_atomic_op op)
414 {
415 switch (op) {
416 case nir_atomic_op_iadd: return DXIL_ATOMIC_ADD;
417 case nir_atomic_op_iand: return DXIL_ATOMIC_AND;
418 case nir_atomic_op_ior: return DXIL_ATOMIC_OR;
419 case nir_atomic_op_ixor: return DXIL_ATOMIC_XOR;
420 case nir_atomic_op_imin: return DXIL_ATOMIC_IMIN;
421 case nir_atomic_op_imax: return DXIL_ATOMIC_IMAX;
422 case nir_atomic_op_umin: return DXIL_ATOMIC_UMIN;
423 case nir_atomic_op_umax: return DXIL_ATOMIC_UMAX;
424 case nir_atomic_op_xchg: return DXIL_ATOMIC_EXCHANGE;
425 default: unreachable("Unsupported atomic op");
426 }
427 }
428
429 static enum dxil_rmw_op
nir_atomic_to_dxil_rmw(nir_atomic_op op)430 nir_atomic_to_dxil_rmw(nir_atomic_op op)
431 {
432 switch (op) {
433 case nir_atomic_op_iadd: return DXIL_RMWOP_ADD;
434 case nir_atomic_op_iand: return DXIL_RMWOP_AND;
435 case nir_atomic_op_ior: return DXIL_RMWOP_OR;
436 case nir_atomic_op_ixor: return DXIL_RMWOP_XOR;
437 case nir_atomic_op_imin: return DXIL_RMWOP_MIN;
438 case nir_atomic_op_imax: return DXIL_RMWOP_MAX;
439 case nir_atomic_op_umin: return DXIL_RMWOP_UMIN;
440 case nir_atomic_op_umax: return DXIL_RMWOP_UMAX;
441 case nir_atomic_op_xchg: return DXIL_RMWOP_XCHG;
442 default: unreachable("Unsupported atomic op");
443 }
444 }
445
446 typedef struct {
447 unsigned id;
448 unsigned binding;
449 unsigned size;
450 unsigned space;
451 } resource_array_layout;
452
453 static void
fill_resource_metadata(struct dxil_module * m,const struct dxil_mdnode ** fields,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout)454 fill_resource_metadata(struct dxil_module *m, const struct dxil_mdnode **fields,
455 const struct dxil_type *struct_type,
456 const char *name, const resource_array_layout *layout)
457 {
458 const struct dxil_type *pointer_type = dxil_module_get_pointer_type(m, struct_type);
459 const struct dxil_value *pointer_undef = dxil_module_get_undef(m, pointer_type);
460
461 fields[0] = dxil_get_metadata_int32(m, layout->id); // resource ID
462 fields[1] = dxil_get_metadata_value(m, pointer_type, pointer_undef); // global constant symbol
463 fields[2] = dxil_get_metadata_string(m, name ? name : ""); // name
464 fields[3] = dxil_get_metadata_int32(m, layout->space); // space ID
465 fields[4] = dxil_get_metadata_int32(m, layout->binding); // lower bound
466 fields[5] = dxil_get_metadata_int32(m, layout->size); // range size
467 }
468
469 static const struct dxil_mdnode *
emit_srv_metadata(struct dxil_module * m,const struct dxil_type * elem_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind)470 emit_srv_metadata(struct dxil_module *m, const struct dxil_type *elem_type,
471 const char *name, const resource_array_layout *layout,
472 enum dxil_component_type comp_type,
473 enum dxil_resource_kind res_kind)
474 {
475 const struct dxil_mdnode *fields[9];
476
477 const struct dxil_mdnode *metadata_tag_nodes[2];
478
479 fill_resource_metadata(m, fields, elem_type, name, layout);
480 fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
481 fields[7] = dxil_get_metadata_int1(m, 0); // sample count
482 if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
483 res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
484 metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
485 metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
486 fields[8] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
487 } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
488 fields[8] = NULL;
489 else
490 unreachable("Structured buffers not supported yet");
491
492 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
493 }
494
495 static const struct dxil_mdnode *
emit_uav_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind,enum gl_access_qualifier access)496 emit_uav_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
497 const char *name, const resource_array_layout *layout,
498 enum dxil_component_type comp_type,
499 enum dxil_resource_kind res_kind,
500 enum gl_access_qualifier access)
501 {
502 const struct dxil_mdnode *fields[11];
503
504 const struct dxil_mdnode *metadata_tag_nodes[2];
505
506 fill_resource_metadata(m, fields, struct_type, name, layout);
507 fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
508 fields[7] = dxil_get_metadata_int1(m, (access & ACCESS_COHERENT) != 0); // globally-coherent
509 fields[8] = dxil_get_metadata_int1(m, false); // has counter
510 fields[9] = dxil_get_metadata_int1(m, false); // is ROV
511 if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
512 res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
513 metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
514 metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
515 fields[10] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
516 } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
517 fields[10] = NULL;
518 else
519 unreachable("Structured buffers not supported yet");
520
521 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
522 }
523
524 static const struct dxil_mdnode *
emit_cbv_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,unsigned size)525 emit_cbv_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
526 const char *name, const resource_array_layout *layout,
527 unsigned size)
528 {
529 const struct dxil_mdnode *fields[8];
530
531 fill_resource_metadata(m, fields, struct_type, name, layout);
532 fields[6] = dxil_get_metadata_int32(m, size); // constant buffer size
533 fields[7] = NULL; // metadata
534
535 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
536 }
537
538 static const struct dxil_mdnode *
emit_sampler_metadata(struct dxil_module * m,const struct dxil_type * struct_type,nir_variable * var,const resource_array_layout * layout)539 emit_sampler_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
540 nir_variable *var, const resource_array_layout *layout)
541 {
542 const struct dxil_mdnode *fields[8];
543 const struct glsl_type *type = glsl_without_array(var->type);
544
545 fill_resource_metadata(m, fields, struct_type, var->name, layout);
546 enum dxil_sampler_kind sampler_kind = glsl_sampler_type_is_shadow(type) ?
547 DXIL_SAMPLER_KIND_COMPARISON : DXIL_SAMPLER_KIND_DEFAULT;
548 fields[6] = dxil_get_metadata_int32(m, sampler_kind); // sampler kind
549 fields[7] = NULL; // metadata
550
551 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
552 }
553
554
555 #define MAX_SRVS 128
556 #define MAX_UAVS 64
557 #define MAX_CBVS 64 // ??
558 #define MAX_SAMPLERS 64 // ??
559
560 struct dxil_def {
561 const struct dxil_value *chans[NIR_MAX_VEC_COMPONENTS];
562 };
563
564 struct ntd_context {
565 void *ralloc_ctx;
566 const struct nir_to_dxil_options *opts;
567 struct nir_shader *shader;
568
569 struct dxil_module mod;
570
571 struct util_dynarray srv_metadata_nodes;
572 const struct dxil_value *srv_handles[MAX_SRVS];
573
574 struct util_dynarray uav_metadata_nodes;
575 const struct dxil_value *ssbo_handles[MAX_UAVS];
576 const struct dxil_value *image_handles[MAX_UAVS];
577 uint32_t num_uavs;
578
579 struct util_dynarray cbv_metadata_nodes;
580 const struct dxil_value *cbv_handles[MAX_CBVS];
581
582 struct util_dynarray sampler_metadata_nodes;
583 const struct dxil_value *sampler_handles[MAX_SAMPLERS];
584
585 struct util_dynarray resources;
586
587 const struct dxil_mdnode *shader_property_nodes[6];
588 size_t num_shader_property_nodes;
589
590 struct dxil_def *defs;
591 unsigned num_defs;
592 struct hash_table *phis;
593
594 const struct dxil_value **sharedvars;
595 const struct dxil_value **scratchvars;
596 const struct dxil_value **consts;
597
598 nir_variable *system_value[SYSTEM_VALUE_MAX];
599
600 nir_function *tess_ctrl_patch_constant_func;
601 unsigned tess_input_control_point_count;
602
603 struct dxil_func_def *main_func_def;
604 struct dxil_func_def *tess_ctrl_patch_constant_func_def;
605 unsigned unnamed_ubo_count;
606
607 BITSET_WORD *float_types;
608 BITSET_WORD *int_types;
609
610 const struct dxil_logger *logger;
611 };
612
613 static const char*
unary_func_name(enum dxil_intr intr)614 unary_func_name(enum dxil_intr intr)
615 {
616 switch (intr) {
617 case DXIL_INTR_COUNTBITS:
618 case DXIL_INTR_FIRSTBIT_HI:
619 case DXIL_INTR_FIRSTBIT_SHI:
620 case DXIL_INTR_FIRSTBIT_LO:
621 return "dx.op.unaryBits";
622 case DXIL_INTR_ISFINITE:
623 case DXIL_INTR_ISNORMAL:
624 return "dx.op.isSpecialFloat";
625 default:
626 return "dx.op.unary";
627 }
628 }
629
630 static const struct dxil_value *
emit_unary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0)631 emit_unary_call(struct ntd_context *ctx, enum overload_type overload,
632 enum dxil_intr intr,
633 const struct dxil_value *op0)
634 {
635 const struct dxil_func *func = dxil_get_function(&ctx->mod,
636 unary_func_name(intr),
637 overload);
638 if (!func)
639 return NULL;
640
641 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
642 if (!opcode)
643 return NULL;
644
645 const struct dxil_value *args[] = {
646 opcode,
647 op0
648 };
649
650 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
651 }
652
653 static const struct dxil_value *
emit_binary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)654 emit_binary_call(struct ntd_context *ctx, enum overload_type overload,
655 enum dxil_intr intr,
656 const struct dxil_value *op0, const struct dxil_value *op1)
657 {
658 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.binary", overload);
659 if (!func)
660 return NULL;
661
662 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
663 if (!opcode)
664 return NULL;
665
666 const struct dxil_value *args[] = {
667 opcode,
668 op0,
669 op1
670 };
671
672 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
673 }
674
675 static const struct dxil_value *
emit_tertiary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)676 emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload,
677 enum dxil_intr intr,
678 const struct dxil_value *op0,
679 const struct dxil_value *op1,
680 const struct dxil_value *op2)
681 {
682 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.tertiary", overload);
683 if (!func)
684 return NULL;
685
686 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
687 if (!opcode)
688 return NULL;
689
690 const struct dxil_value *args[] = {
691 opcode,
692 op0,
693 op1,
694 op2
695 };
696
697 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
698 }
699
700 static const struct dxil_value *
emit_quaternary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2,const struct dxil_value * op3)701 emit_quaternary_call(struct ntd_context *ctx, enum overload_type overload,
702 enum dxil_intr intr,
703 const struct dxil_value *op0,
704 const struct dxil_value *op1,
705 const struct dxil_value *op2,
706 const struct dxil_value *op3)
707 {
708 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quaternary", overload);
709 if (!func)
710 return NULL;
711
712 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
713 if (!opcode)
714 return NULL;
715
716 const struct dxil_value *args[] = {
717 opcode,
718 op0,
719 op1,
720 op2,
721 op3
722 };
723
724 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
725 }
726
727 static const struct dxil_value *
emit_threadid_call(struct ntd_context * ctx,const struct dxil_value * comp)728 emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp)
729 {
730 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadId", DXIL_I32);
731 if (!func)
732 return NULL;
733
734 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
735 DXIL_INTR_THREAD_ID);
736 if (!opcode)
737 return NULL;
738
739 const struct dxil_value *args[] = {
740 opcode,
741 comp
742 };
743
744 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
745 }
746
747 static const struct dxil_value *
emit_threadidingroup_call(struct ntd_context * ctx,const struct dxil_value * comp)748 emit_threadidingroup_call(struct ntd_context *ctx,
749 const struct dxil_value *comp)
750 {
751 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadIdInGroup", DXIL_I32);
752
753 if (!func)
754 return NULL;
755
756 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
757 DXIL_INTR_THREAD_ID_IN_GROUP);
758 if (!opcode)
759 return NULL;
760
761 const struct dxil_value *args[] = {
762 opcode,
763 comp
764 };
765
766 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
767 }
768
769 static const struct dxil_value *
emit_flattenedthreadidingroup_call(struct ntd_context * ctx)770 emit_flattenedthreadidingroup_call(struct ntd_context *ctx)
771 {
772 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.flattenedThreadIdInGroup", DXIL_I32);
773
774 if (!func)
775 return NULL;
776
777 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
778 DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP);
779 if (!opcode)
780 return NULL;
781
782 const struct dxil_value *args[] = {
783 opcode
784 };
785
786 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
787 }
788
789 static const struct dxil_value *
emit_groupid_call(struct ntd_context * ctx,const struct dxil_value * comp)790 emit_groupid_call(struct ntd_context *ctx, const struct dxil_value *comp)
791 {
792 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.groupId", DXIL_I32);
793
794 if (!func)
795 return NULL;
796
797 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
798 DXIL_INTR_GROUP_ID);
799 if (!opcode)
800 return NULL;
801
802 const struct dxil_value *args[] = {
803 opcode,
804 comp
805 };
806
807 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
808 }
809
810 static const struct dxil_value *
emit_raw_bufferload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],enum overload_type overload,unsigned component_count,unsigned alignment)811 emit_raw_bufferload_call(struct ntd_context *ctx,
812 const struct dxil_value *handle,
813 const struct dxil_value *coord[2],
814 enum overload_type overload,
815 unsigned component_count,
816 unsigned alignment)
817 {
818 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.rawBufferLoad", overload);
819 if (!func)
820 return NULL;
821
822 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
823 DXIL_INTR_RAW_BUFFER_LOAD);
824 const struct dxil_value *args[] = {
825 opcode, handle, coord[0], coord[1],
826 dxil_module_get_int8_const(&ctx->mod, (1 << component_count) - 1),
827 dxil_module_get_int32_const(&ctx->mod, alignment),
828 };
829
830 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
831 }
832
833 static const struct dxil_value *
emit_bufferload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],enum overload_type overload)834 emit_bufferload_call(struct ntd_context *ctx,
835 const struct dxil_value *handle,
836 const struct dxil_value *coord[2],
837 enum overload_type overload)
838 {
839 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferLoad", overload);
840 if (!func)
841 return NULL;
842
843 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
844 DXIL_INTR_BUFFER_LOAD);
845 const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1] };
846
847 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
848 }
849
850 static bool
emit_raw_bufferstore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload,unsigned alignment)851 emit_raw_bufferstore_call(struct ntd_context *ctx,
852 const struct dxil_value *handle,
853 const struct dxil_value *coord[2],
854 const struct dxil_value *value[4],
855 const struct dxil_value *write_mask,
856 enum overload_type overload,
857 unsigned alignment)
858 {
859 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.rawBufferStore", overload);
860
861 if (!func)
862 return false;
863
864 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
865 DXIL_INTR_RAW_BUFFER_STORE);
866 const struct dxil_value *args[] = {
867 opcode, handle, coord[0], coord[1],
868 value[0], value[1], value[2], value[3],
869 write_mask,
870 dxil_module_get_int32_const(&ctx->mod, alignment),
871 };
872
873 return dxil_emit_call_void(&ctx->mod, func,
874 args, ARRAY_SIZE(args));
875 }
876
877 static bool
emit_bufferstore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)878 emit_bufferstore_call(struct ntd_context *ctx,
879 const struct dxil_value *handle,
880 const struct dxil_value *coord[2],
881 const struct dxil_value *value[4],
882 const struct dxil_value *write_mask,
883 enum overload_type overload)
884 {
885 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferStore", overload);
886
887 if (!func)
888 return false;
889
890 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
891 DXIL_INTR_BUFFER_STORE);
892 const struct dxil_value *args[] = {
893 opcode, handle, coord[0], coord[1],
894 value[0], value[1], value[2], value[3],
895 write_mask
896 };
897
898 return dxil_emit_call_void(&ctx->mod, func,
899 args, ARRAY_SIZE(args));
900 }
901
902 static const struct dxil_value *
emit_textureload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],enum overload_type overload)903 emit_textureload_call(struct ntd_context *ctx,
904 const struct dxil_value *handle,
905 const struct dxil_value *coord[3],
906 enum overload_type overload)
907 {
908 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", overload);
909 if (!func)
910 return NULL;
911 const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
912 const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
913
914 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
915 DXIL_INTR_TEXTURE_LOAD);
916 const struct dxil_value *args[] = { opcode, handle,
917 /*lod_or_sample*/ int_undef,
918 coord[0], coord[1], coord[2],
919 /* offsets */ int_undef, int_undef, int_undef};
920
921 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
922 }
923
924 static bool
emit_texturestore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)925 emit_texturestore_call(struct ntd_context *ctx,
926 const struct dxil_value *handle,
927 const struct dxil_value *coord[3],
928 const struct dxil_value *value[4],
929 const struct dxil_value *write_mask,
930 enum overload_type overload)
931 {
932 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureStore", overload);
933
934 if (!func)
935 return false;
936
937 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
938 DXIL_INTR_TEXTURE_STORE);
939 const struct dxil_value *args[] = {
940 opcode, handle, coord[0], coord[1], coord[2],
941 value[0], value[1], value[2], value[3],
942 write_mask
943 };
944
945 return dxil_emit_call_void(&ctx->mod, func,
946 args, ARRAY_SIZE(args));
947 }
948
949 static const struct dxil_value *
emit_atomic_binop(struct ntd_context * ctx,const struct dxil_value * handle,enum dxil_atomic_op atomic_op,const struct dxil_value * coord[3],const struct dxil_value * value)950 emit_atomic_binop(struct ntd_context *ctx,
951 const struct dxil_value *handle,
952 enum dxil_atomic_op atomic_op,
953 const struct dxil_value *coord[3],
954 const struct dxil_value *value)
955 {
956 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicBinOp", DXIL_I32);
957
958 if (!func)
959 return false;
960
961 const struct dxil_value *opcode =
962 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_BINOP);
963 const struct dxil_value *atomic_op_value =
964 dxil_module_get_int32_const(&ctx->mod, atomic_op);
965 const struct dxil_value *args[] = {
966 opcode, handle, atomic_op_value,
967 coord[0], coord[1], coord[2], value
968 };
969
970 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
971 }
972
973 static const struct dxil_value *
emit_atomic_cmpxchg(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * cmpval,const struct dxil_value * newval)974 emit_atomic_cmpxchg(struct ntd_context *ctx,
975 const struct dxil_value *handle,
976 const struct dxil_value *coord[3],
977 const struct dxil_value *cmpval,
978 const struct dxil_value *newval)
979 {
980 const struct dxil_func *func =
981 dxil_get_function(&ctx->mod, "dx.op.atomicCompareExchange", DXIL_I32);
982
983 if (!func)
984 return false;
985
986 const struct dxil_value *opcode =
987 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_CMPXCHG);
988 const struct dxil_value *args[] = {
989 opcode, handle, coord[0], coord[1], coord[2], cmpval, newval
990 };
991
992 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
993 }
994
995 static const struct dxil_value *
emit_createhandle_call_pre_6_6(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)996 emit_createhandle_call_pre_6_6(struct ntd_context *ctx,
997 enum dxil_resource_class resource_class,
998 unsigned lower_bound,
999 unsigned upper_bound,
1000 unsigned space,
1001 unsigned resource_range_id,
1002 const struct dxil_value *resource_range_index,
1003 bool non_uniform_resource_index)
1004 {
1005 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE);
1006 const struct dxil_value *resource_class_value = dxil_module_get_int8_const(&ctx->mod, resource_class);
1007 const struct dxil_value *resource_range_id_value = dxil_module_get_int32_const(&ctx->mod, resource_range_id);
1008 const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1009 if (!opcode || !resource_class_value || !resource_range_id_value ||
1010 !non_uniform_resource_index_value)
1011 return NULL;
1012
1013 const struct dxil_value *args[] = {
1014 opcode,
1015 resource_class_value,
1016 resource_range_id_value,
1017 resource_range_index,
1018 non_uniform_resource_index_value
1019 };
1020
1021 const struct dxil_func *func =
1022 dxil_get_function(&ctx->mod, "dx.op.createHandle", DXIL_NONE);
1023
1024 if (!func)
1025 return NULL;
1026
1027 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1028 }
1029
1030 static const struct dxil_value *
emit_annotate_handle(struct ntd_context * ctx,const struct dxil_value * unannotated_handle,const struct dxil_value * res_props)1031 emit_annotate_handle(struct ntd_context *ctx,
1032 const struct dxil_value *unannotated_handle,
1033 const struct dxil_value *res_props)
1034 {
1035 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ANNOTATE_HANDLE);
1036 if (!opcode)
1037 return NULL;
1038
1039 const struct dxil_value *args[] = {
1040 opcode,
1041 unannotated_handle,
1042 res_props
1043 };
1044
1045 const struct dxil_func *func =
1046 dxil_get_function(&ctx->mod, "dx.op.annotateHandle", DXIL_NONE);
1047
1048 if (!func)
1049 return NULL;
1050
1051 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1052 }
1053
1054 static const struct dxil_value *
emit_annotate_handle_from_metadata(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned resource_range_id,const struct dxil_value * unannotated_handle)1055 emit_annotate_handle_from_metadata(struct ntd_context *ctx,
1056 enum dxil_resource_class resource_class,
1057 unsigned resource_range_id,
1058 const struct dxil_value *unannotated_handle)
1059 {
1060
1061 const struct util_dynarray *mdnodes;
1062 switch (resource_class) {
1063 case DXIL_RESOURCE_CLASS_SRV:
1064 mdnodes = &ctx->srv_metadata_nodes;
1065 break;
1066 case DXIL_RESOURCE_CLASS_UAV:
1067 mdnodes = &ctx->uav_metadata_nodes;
1068 break;
1069 case DXIL_RESOURCE_CLASS_CBV:
1070 mdnodes = &ctx->cbv_metadata_nodes;
1071 break;
1072 case DXIL_RESOURCE_CLASS_SAMPLER:
1073 mdnodes = &ctx->sampler_metadata_nodes;
1074 break;
1075 default:
1076 unreachable("Invalid resource class");
1077 }
1078
1079 const struct dxil_mdnode *mdnode = *util_dynarray_element(mdnodes, const struct dxil_mdnode *, resource_range_id);
1080 const struct dxil_value *res_props = dxil_module_get_res_props_const(&ctx->mod, resource_class, mdnode);
1081 if (!res_props)
1082 return NULL;
1083
1084 return emit_annotate_handle(ctx, unannotated_handle, res_props);
1085 }
1086
1087 static const struct dxil_value *
emit_createhandle_and_annotate(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1088 emit_createhandle_and_annotate(struct ntd_context *ctx,
1089 enum dxil_resource_class resource_class,
1090 unsigned lower_bound,
1091 unsigned upper_bound,
1092 unsigned space,
1093 unsigned resource_range_id,
1094 const struct dxil_value *resource_range_index,
1095 bool non_uniform_resource_index)
1096 {
1097 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE_FROM_BINDING);
1098 const struct dxil_value *res_bind = dxil_module_get_res_bind_const(&ctx->mod, lower_bound, upper_bound, space, resource_class);
1099 const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1100 if (!opcode || !res_bind || !non_uniform_resource_index_value)
1101 return NULL;
1102
1103 const struct dxil_value *args[] = {
1104 opcode,
1105 res_bind,
1106 resource_range_index,
1107 non_uniform_resource_index_value
1108 };
1109
1110 const struct dxil_func *func =
1111 dxil_get_function(&ctx->mod, "dx.op.createHandleFromBinding", DXIL_NONE);
1112
1113 if (!func)
1114 return NULL;
1115
1116 const struct dxil_value *unannotated_handle = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1117 if (!unannotated_handle)
1118 return NULL;
1119
1120 return emit_annotate_handle_from_metadata(ctx, resource_class, resource_range_id, unannotated_handle);
1121 }
1122
1123 static const struct dxil_value *
emit_createhandle_call(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1124 emit_createhandle_call(struct ntd_context *ctx,
1125 enum dxil_resource_class resource_class,
1126 unsigned lower_bound,
1127 unsigned upper_bound,
1128 unsigned space,
1129 unsigned resource_range_id,
1130 const struct dxil_value *resource_range_index,
1131 bool non_uniform_resource_index)
1132 {
1133 if (ctx->mod.minor_version < 6)
1134 return emit_createhandle_call_pre_6_6(ctx, resource_class, lower_bound, upper_bound, space, resource_range_id, resource_range_index, non_uniform_resource_index);
1135 else
1136 return emit_createhandle_and_annotate(ctx, resource_class, lower_bound, upper_bound, space, resource_range_id, resource_range_index, non_uniform_resource_index);
1137 }
1138
1139 static const struct dxil_value *
emit_createhandle_call_const_index(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,unsigned resource_range_index,bool non_uniform_resource_index)1140 emit_createhandle_call_const_index(struct ntd_context *ctx,
1141 enum dxil_resource_class resource_class,
1142 unsigned lower_bound,
1143 unsigned upper_bound,
1144 unsigned space,
1145 unsigned resource_range_id,
1146 unsigned resource_range_index,
1147 bool non_uniform_resource_index)
1148 {
1149
1150 const struct dxil_value *resource_range_index_value = dxil_module_get_int32_const(&ctx->mod, resource_range_index);
1151 if (!resource_range_index_value)
1152 return NULL;
1153
1154 return emit_createhandle_call(ctx, resource_class, lower_bound, upper_bound, space,
1155 resource_range_id, resource_range_index_value,
1156 non_uniform_resource_index);
1157 }
1158
1159 static const struct dxil_value *
emit_createhandle_heap(struct ntd_context * ctx,const struct dxil_value * resource_range_index,bool is_sampler,bool non_uniform_resource_index)1160 emit_createhandle_heap(struct ntd_context *ctx,
1161 const struct dxil_value *resource_range_index,
1162 bool is_sampler,
1163 bool non_uniform_resource_index)
1164 {
1165 if (is_sampler)
1166 ctx->mod.feats.sampler_descriptor_heap_indexing = true;
1167 else
1168 ctx->mod.feats.resource_descriptor_heap_indexing = true;
1169
1170 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE_FROM_HEAP);
1171 const struct dxil_value *sampler = dxil_module_get_int1_const(&ctx->mod, is_sampler);
1172 const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1173 if (!opcode || !sampler || !non_uniform_resource_index_value)
1174 return NULL;
1175
1176 const struct dxil_value *args[] = {
1177 opcode,
1178 resource_range_index,
1179 sampler,
1180 non_uniform_resource_index_value
1181 };
1182
1183 const struct dxil_func *func =
1184 dxil_get_function(&ctx->mod, "dx.op.createHandleFromHeap", DXIL_NONE);
1185
1186 if (!func)
1187 return NULL;
1188
1189 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1190 }
1191
1192 static void
add_resource(struct ntd_context * ctx,enum dxil_resource_type type,enum dxil_resource_kind kind,const resource_array_layout * layout)1193 add_resource(struct ntd_context *ctx, enum dxil_resource_type type,
1194 enum dxil_resource_kind kind,
1195 const resource_array_layout *layout)
1196 {
1197 struct dxil_resource_v0 *resource_v0 = NULL;
1198 struct dxil_resource_v1 *resource_v1 = NULL;
1199 if (ctx->mod.minor_validator >= 6) {
1200 resource_v1 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v1, 1);
1201 resource_v0 = &resource_v1->v0;
1202 } else {
1203 resource_v0 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v0, 1);
1204 }
1205 resource_v0->resource_type = type;
1206 resource_v0->space = layout->space;
1207 resource_v0->lower_bound = layout->binding;
1208 if (layout->size == 0 || (uint64_t)layout->size + layout->binding >= UINT_MAX)
1209 resource_v0->upper_bound = UINT_MAX;
1210 else
1211 resource_v0->upper_bound = layout->binding + layout->size - 1;
1212 if (type == DXIL_RES_UAV_TYPED ||
1213 type == DXIL_RES_UAV_RAW ||
1214 type == DXIL_RES_UAV_STRUCTURED) {
1215 uint32_t new_uav_count = ctx->num_uavs + layout->size;
1216 if (layout->size == 0 || new_uav_count < ctx->num_uavs)
1217 ctx->num_uavs = UINT_MAX;
1218 else
1219 ctx->num_uavs = new_uav_count;
1220 if (ctx->mod.minor_validator >= 6 && ctx->num_uavs > 8)
1221 ctx->mod.feats.use_64uavs = 1;
1222 }
1223
1224 if (resource_v1) {
1225 resource_v1->resource_kind = kind;
1226 /* No flags supported yet */
1227 resource_v1->resource_flags = 0;
1228 }
1229 }
1230
1231 static const struct dxil_value *
emit_createhandle_call_dynamic(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned space,unsigned binding,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1232 emit_createhandle_call_dynamic(struct ntd_context *ctx,
1233 enum dxil_resource_class resource_class,
1234 unsigned space,
1235 unsigned binding,
1236 const struct dxil_value *resource_range_index,
1237 bool non_uniform_resource_index)
1238 {
1239 unsigned offset = 0;
1240 unsigned count = 0;
1241
1242 unsigned num_srvs = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
1243 unsigned num_uavs = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
1244 unsigned num_cbvs = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1245 unsigned num_samplers = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1246
1247 switch (resource_class) {
1248 case DXIL_RESOURCE_CLASS_UAV:
1249 offset = num_srvs + num_samplers + num_cbvs;
1250 count = num_uavs;
1251 break;
1252 case DXIL_RESOURCE_CLASS_SRV:
1253 offset = num_samplers + num_cbvs;
1254 count = num_srvs;
1255 break;
1256 case DXIL_RESOURCE_CLASS_SAMPLER:
1257 offset = num_cbvs;
1258 count = num_samplers;
1259 break;
1260 case DXIL_RESOURCE_CLASS_CBV:
1261 offset = 0;
1262 count = num_cbvs;
1263 break;
1264 }
1265
1266 unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
1267 sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
1268 assert(offset + count <= ctx->resources.size / resource_element_size);
1269 for (unsigned i = offset; i < offset + count; ++i) {
1270 const struct dxil_resource_v0 *resource = (const struct dxil_resource_v0 *)((const char *)ctx->resources.data + resource_element_size * i);
1271 if (resource->space == space &&
1272 resource->lower_bound <= binding &&
1273 resource->upper_bound >= binding) {
1274 return emit_createhandle_call(ctx, resource_class, resource->lower_bound,
1275 resource->upper_bound, space,
1276 i - offset,
1277 resource_range_index,
1278 non_uniform_resource_index);
1279 }
1280 }
1281
1282 unreachable("Resource access for undeclared range");
1283 }
1284
1285 static bool
emit_srv(struct ntd_context * ctx,nir_variable * var,unsigned count)1286 emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned count)
1287 {
1288 unsigned id = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
1289 unsigned binding = var->data.binding;
1290 resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1291
1292 enum dxil_component_type comp_type;
1293 enum dxil_resource_kind res_kind;
1294 enum dxil_resource_type res_type;
1295 if (var->data.mode == nir_var_mem_ssbo) {
1296 comp_type = DXIL_COMP_TYPE_INVALID;
1297 res_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
1298 res_type = DXIL_RES_SRV_RAW;
1299 } else {
1300 comp_type = dxil_get_comp_type(var->type);
1301 res_kind = dxil_get_resource_kind(var->type);
1302 res_type = DXIL_RES_SRV_TYPED;
1303 }
1304 const struct dxil_type *res_type_as_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, 4, false /* readwrite */);
1305
1306 if (glsl_type_is_array(var->type))
1307 res_type_as_type = dxil_module_get_array_type(&ctx->mod, res_type_as_type, count);
1308
1309 const struct dxil_mdnode *srv_meta = emit_srv_metadata(&ctx->mod, res_type_as_type, var->name,
1310 &layout, comp_type, res_kind);
1311
1312 if (!srv_meta)
1313 return false;
1314
1315 util_dynarray_append(&ctx->srv_metadata_nodes, const struct dxil_mdnode *, srv_meta);
1316 add_resource(ctx, res_type, res_kind, &layout);
1317 if (res_type == DXIL_RES_SRV_RAW)
1318 ctx->mod.raw_and_structured_buffers = true;
1319
1320 return true;
1321 }
1322
1323 static bool
emit_uav(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned count,enum dxil_component_type comp_type,unsigned num_comps,enum dxil_resource_kind res_kind,enum gl_access_qualifier access,const char * name)1324 emit_uav(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned count,
1325 enum dxil_component_type comp_type, unsigned num_comps, enum dxil_resource_kind res_kind,
1326 enum gl_access_qualifier access, const char *name)
1327 {
1328 unsigned id = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
1329 resource_array_layout layout = { id, binding, count, space };
1330
1331 const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, num_comps, true /* readwrite */);
1332 res_type = dxil_module_get_array_type(&ctx->mod, res_type, count);
1333 const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, res_type, name,
1334 &layout, comp_type, res_kind, access);
1335
1336 if (!uav_meta)
1337 return false;
1338
1339 util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
1340 if (ctx->mod.minor_validator < 6 &&
1341 util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
1342 ctx->mod.feats.use_64uavs = 1;
1343
1344 add_resource(ctx, res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER ? DXIL_RES_UAV_RAW : DXIL_RES_UAV_TYPED, res_kind, &layout);
1345 if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
1346 ctx->mod.raw_and_structured_buffers = true;
1347 if (ctx->mod.shader_kind != DXIL_PIXEL_SHADER &&
1348 ctx->mod.shader_kind != DXIL_COMPUTE_SHADER)
1349 ctx->mod.feats.uavs_at_every_stage = true;
1350
1351 return true;
1352 }
1353
1354 static bool
emit_globals(struct ntd_context * ctx,unsigned size)1355 emit_globals(struct ntd_context *ctx, unsigned size)
1356 {
1357 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo)
1358 size++;
1359
1360 if (!size)
1361 return true;
1362
1363 if (!emit_uav(ctx, 0, 0, size, DXIL_COMP_TYPE_INVALID, 1, DXIL_RESOURCE_KIND_RAW_BUFFER, 0, "globals"))
1364 return false;
1365
1366 return true;
1367 }
1368
1369 static bool
emit_uav_var(struct ntd_context * ctx,nir_variable * var,unsigned count)1370 emit_uav_var(struct ntd_context *ctx, nir_variable *var, unsigned count)
1371 {
1372 unsigned binding, space;
1373 if (ctx->opts->environment == DXIL_ENVIRONMENT_GL) {
1374 /* For GL, the image intrinsics are already lowered, using driver_location
1375 * as the 0-based image index. Use space 1 so that we can keep using these
1376 * NIR constants without having to remap them, and so they don't overlap
1377 * SSBOs, which are also 0-based UAV bindings.
1378 */
1379 binding = var->data.driver_location;
1380 space = 1;
1381 } else {
1382 binding = var->data.binding;
1383 space = var->data.descriptor_set;
1384 }
1385 enum dxil_component_type comp_type = dxil_get_comp_type(var->type);
1386 enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type);
1387 const char *name = var->name;
1388
1389 return emit_uav(ctx, binding, space, count, comp_type,
1390 util_format_get_nr_components(var->data.image.format),
1391 res_kind, var->data.access, name);
1392 }
1393
1394 static const struct dxil_value *
get_value_for_const(struct dxil_module * mod,nir_const_value * c,const struct dxil_type * type)1395 get_value_for_const(struct dxil_module *mod, nir_const_value *c, const struct dxil_type *type)
1396 {
1397 if (type == mod->int1_type) return dxil_module_get_int1_const(mod, c->b);
1398 if (type == mod->float32_type) return dxil_module_get_float_const(mod, c->f32);
1399 if (type == mod->int32_type) return dxil_module_get_int32_const(mod, c->i32);
1400 if (type == mod->int16_type) {
1401 mod->feats.min_precision = true;
1402 return dxil_module_get_int16_const(mod, c->i16);
1403 }
1404 if (type == mod->int64_type) {
1405 mod->feats.int64_ops = true;
1406 return dxil_module_get_int64_const(mod, c->i64);
1407 }
1408 if (type == mod->float16_type) {
1409 mod->feats.min_precision = true;
1410 return dxil_module_get_float16_const(mod, c->u16);
1411 }
1412 if (type == mod->float64_type) {
1413 mod->feats.doubles = true;
1414 return dxil_module_get_double_const(mod, c->f64);
1415 }
1416 unreachable("Invalid type");
1417 }
1418
1419 static const struct dxil_type *
get_type_for_glsl_base_type(struct dxil_module * mod,enum glsl_base_type type)1420 get_type_for_glsl_base_type(struct dxil_module *mod, enum glsl_base_type type)
1421 {
1422 uint32_t bit_size = glsl_base_type_bit_size(type);
1423 if (nir_alu_type_get_base_type(nir_get_nir_type_for_glsl_base_type(type)) == nir_type_float)
1424 return dxil_module_get_float_type(mod, bit_size);
1425 return dxil_module_get_int_type(mod, bit_size);
1426 }
1427
1428 static const struct dxil_type *
get_type_for_glsl_type(struct dxil_module * mod,const struct glsl_type * type)1429 get_type_for_glsl_type(struct dxil_module *mod, const struct glsl_type *type)
1430 {
1431 if (glsl_type_is_scalar(type))
1432 return get_type_for_glsl_base_type(mod, glsl_get_base_type(type));
1433
1434 if (glsl_type_is_vector(type))
1435 return dxil_module_get_vector_type(mod, get_type_for_glsl_base_type(mod, glsl_get_base_type(type)),
1436 glsl_get_vector_elements(type));
1437
1438 if (glsl_type_is_array(type))
1439 return dxil_module_get_array_type(mod, get_type_for_glsl_type(mod, glsl_get_array_element(type)),
1440 glsl_array_size(type));
1441
1442 assert(glsl_type_is_struct(type));
1443 uint32_t size = glsl_get_length(type);
1444 const struct dxil_type **fields = calloc(sizeof(const struct dxil_type *), size);
1445 for (uint32_t i = 0; i < size; ++i)
1446 fields[i] = get_type_for_glsl_type(mod, glsl_get_struct_field(type, i));
1447 const struct dxil_type *ret = dxil_module_get_struct_type(mod, glsl_get_type_name(type), fields, size);
1448 free((void *)fields);
1449 return ret;
1450 }
1451
1452 static const struct dxil_value *
get_value_for_const_aggregate(struct dxil_module * mod,nir_constant * c,const struct glsl_type * type)1453 get_value_for_const_aggregate(struct dxil_module *mod, nir_constant *c, const struct glsl_type *type)
1454 {
1455 const struct dxil_type *dxil_type = get_type_for_glsl_type(mod, type);
1456 if (glsl_type_is_vector_or_scalar(type)) {
1457 const struct dxil_type *element_type = get_type_for_glsl_base_type(mod, glsl_get_base_type(type));
1458 const struct dxil_value *elements[NIR_MAX_VEC_COMPONENTS];
1459 for (uint32_t i = 0; i < glsl_get_vector_elements(type); ++i)
1460 elements[i] = get_value_for_const(mod, &c->values[i], element_type);
1461 if (glsl_type_is_scalar(type))
1462 return elements[0];
1463 return dxil_module_get_vector_const(mod, dxil_type, elements);
1464 }
1465
1466 uint32_t num_values = glsl_get_length(type);
1467 assert(num_values == c->num_elements);
1468 const struct dxil_value **values = calloc(sizeof(const struct dxil_value *), num_values);
1469 const struct dxil_value *ret;
1470 if (glsl_type_is_array(type)) {
1471 const struct glsl_type *element_type = glsl_get_array_element(type);
1472 for (uint32_t i = 0; i < num_values; ++i)
1473 values[i] = get_value_for_const_aggregate(mod, c->elements[i], element_type);
1474 ret = dxil_module_get_array_const(mod, dxil_type, values);
1475 } else {
1476 for (uint32_t i = 0; i < num_values; ++i)
1477 values[i] = get_value_for_const_aggregate(mod, c->elements[i], glsl_get_struct_field(type, i));
1478 ret = dxil_module_get_struct_const(mod, dxil_type, values);
1479 }
1480 free((void *)values);
1481 return ret;
1482 }
1483
1484 static bool
emit_global_consts(struct ntd_context * ctx)1485 emit_global_consts(struct ntd_context *ctx)
1486 {
1487 uint32_t index = 0;
1488 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_constant) {
1489 assert(var->constant_initializer);
1490 var->data.driver_location = index++;
1491 }
1492
1493 ctx->consts = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
1494
1495 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_constant) {
1496 if (!var->name)
1497 var->name = ralloc_asprintf(var, "const_%d", var->data.driver_location);
1498
1499 const struct dxil_value *agg_vals =
1500 get_value_for_const_aggregate(&ctx->mod, var->constant_initializer, var->type);
1501 if (!agg_vals)
1502 return false;
1503
1504 const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name,
1505 dxil_value_get_type(agg_vals),
1506 DXIL_AS_DEFAULT, 16,
1507 agg_vals);
1508 if (!gvar)
1509 return false;
1510
1511 ctx->consts[var->data.driver_location] = gvar;
1512 }
1513
1514 return true;
1515 }
1516
1517 static bool
emit_shared_vars(struct ntd_context * ctx)1518 emit_shared_vars(struct ntd_context *ctx)
1519 {
1520 uint32_t index = 0;
1521 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_shared)
1522 var->data.driver_location = index++;
1523
1524 ctx->sharedvars = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
1525
1526 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_shared) {
1527 if (!var->name)
1528 var->name = ralloc_asprintf(var, "shared_%d", var->data.driver_location);
1529 const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name,
1530 get_type_for_glsl_type(&ctx->mod, var->type),
1531 DXIL_AS_GROUPSHARED, 16,
1532 NULL);
1533 if (!gvar)
1534 return false;
1535
1536 ctx->sharedvars[var->data.driver_location] = gvar;
1537 }
1538
1539 return true;
1540 }
1541
1542 static bool
emit_cbv(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned size,unsigned count,char * name)1543 emit_cbv(struct ntd_context *ctx, unsigned binding, unsigned space,
1544 unsigned size, unsigned count, char *name)
1545 {
1546 assert(count != 0);
1547
1548 unsigned idx = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1549
1550 const struct dxil_type *float32 = dxil_module_get_float_type(&ctx->mod, 32);
1551 const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, float32, size);
1552 const struct dxil_type *buffer_type = dxil_module_get_struct_type(&ctx->mod, name,
1553 &array_type, 1);
1554 // All ubo[1]s should have been lowered to ubo with static indexing
1555 const struct dxil_type *final_type = count != 1 ? dxil_module_get_array_type(&ctx->mod, buffer_type, count) : buffer_type;
1556 resource_array_layout layout = {idx, binding, count, space};
1557 const struct dxil_mdnode *cbv_meta = emit_cbv_metadata(&ctx->mod, final_type,
1558 name, &layout, 4 * size);
1559
1560 if (!cbv_meta)
1561 return false;
1562
1563 util_dynarray_append(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *, cbv_meta);
1564 add_resource(ctx, DXIL_RES_CBV, DXIL_RESOURCE_KIND_CBUFFER, &layout);
1565
1566 return true;
1567 }
1568
1569 static bool
emit_ubo_var(struct ntd_context * ctx,nir_variable * var)1570 emit_ubo_var(struct ntd_context *ctx, nir_variable *var)
1571 {
1572 unsigned count = 1;
1573 if (glsl_type_is_array(var->type))
1574 count = glsl_get_length(var->type);
1575
1576 char *name = var->name;
1577 char temp_name[30];
1578 if (name && strlen(name) == 0) {
1579 snprintf(temp_name, sizeof(temp_name), "__unnamed_ubo_%d",
1580 ctx->unnamed_ubo_count++);
1581 name = temp_name;
1582 }
1583
1584 const struct glsl_type *type = glsl_without_array(var->type);
1585 assert(glsl_type_is_struct(type) || glsl_type_is_interface(type));
1586 unsigned dwords = ALIGN_POT(glsl_get_explicit_size(type, false), 16) / 4;
1587
1588 return emit_cbv(ctx, var->data.binding, var->data.descriptor_set,
1589 dwords, count, name);
1590 }
1591
1592 static bool
emit_sampler(struct ntd_context * ctx,nir_variable * var,unsigned count)1593 emit_sampler(struct ntd_context *ctx, nir_variable *var, unsigned count)
1594 {
1595 unsigned id = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1596 unsigned binding = var->data.binding;
1597 resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1598 const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
1599 const struct dxil_type *sampler_type = dxil_module_get_struct_type(&ctx->mod, "struct.SamplerState", &int32_type, 1);
1600
1601 if (glsl_type_is_array(var->type))
1602 sampler_type = dxil_module_get_array_type(&ctx->mod, sampler_type, count);
1603
1604 const struct dxil_mdnode *sampler_meta = emit_sampler_metadata(&ctx->mod, sampler_type, var, &layout);
1605
1606 if (!sampler_meta)
1607 return false;
1608
1609 util_dynarray_append(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *, sampler_meta);
1610 add_resource(ctx, DXIL_RES_SAMPLER, DXIL_RESOURCE_KIND_SAMPLER, &layout);
1611
1612 return true;
1613 }
1614
1615 static bool
emit_static_indexing_handles(struct ntd_context * ctx)1616 emit_static_indexing_handles(struct ntd_context *ctx)
1617 {
1618 /* Vulkan always uses dynamic handles, from instructions in the NIR */
1619 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN)
1620 return true;
1621
1622 unsigned last_res_class = -1;
1623 unsigned id = 0;
1624
1625 unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
1626 sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
1627 for (struct dxil_resource_v0 *res = (struct dxil_resource_v0 *)ctx->resources.data;
1628 res < (struct dxil_resource_v0 *)((char *)ctx->resources.data + ctx->resources.size);
1629 res = (struct dxil_resource_v0 *)((char *)res + resource_element_size)) {
1630 enum dxil_resource_class res_class;
1631 const struct dxil_value **handle_array;
1632 switch (res->resource_type) {
1633 case DXIL_RES_SRV_TYPED:
1634 case DXIL_RES_SRV_RAW:
1635 case DXIL_RES_SRV_STRUCTURED:
1636 res_class = DXIL_RESOURCE_CLASS_SRV;
1637 handle_array = ctx->srv_handles;
1638 break;
1639 case DXIL_RES_CBV:
1640 res_class = DXIL_RESOURCE_CLASS_CBV;
1641 handle_array = ctx->cbv_handles;
1642 break;
1643 case DXIL_RES_SAMPLER:
1644 res_class = DXIL_RESOURCE_CLASS_SAMPLER;
1645 handle_array = ctx->sampler_handles;
1646 break;
1647 case DXIL_RES_UAV_RAW:
1648 res_class = DXIL_RESOURCE_CLASS_UAV;
1649 handle_array = ctx->ssbo_handles;
1650 break;
1651 case DXIL_RES_UAV_TYPED:
1652 case DXIL_RES_UAV_STRUCTURED:
1653 case DXIL_RES_UAV_STRUCTURED_WITH_COUNTER:
1654 res_class = DXIL_RESOURCE_CLASS_UAV;
1655 handle_array = ctx->image_handles;
1656 break;
1657 default:
1658 unreachable("Unexpected resource type");
1659 }
1660
1661 if (last_res_class != res_class)
1662 id = 0;
1663 else
1664 id++;
1665 last_res_class = res_class;
1666
1667 if (res->space > 1)
1668 continue;
1669 assert(res->space == 0 ||
1670 (res->space == 1 &&
1671 res->resource_type != DXIL_RES_UAV_RAW &&
1672 ctx->opts->environment == DXIL_ENVIRONMENT_GL));
1673
1674 /* CL uses dynamic handles for the "globals" UAV array, but uses static
1675 * handles for UBOs, textures, and samplers.
1676 */
1677 if (ctx->opts->environment == DXIL_ENVIRONMENT_CL &&
1678 res->resource_type == DXIL_RES_UAV_RAW)
1679 continue;
1680
1681 for (unsigned i = res->lower_bound; i <= res->upper_bound; ++i) {
1682 handle_array[i] = emit_createhandle_call_const_index(ctx,
1683 res_class,
1684 res->lower_bound,
1685 res->upper_bound,
1686 res->space,
1687 id,
1688 i,
1689 false);
1690 if (!handle_array[i])
1691 return false;
1692 }
1693 }
1694 return true;
1695 }
1696
1697 static const struct dxil_mdnode *
emit_gs_state(struct ntd_context * ctx)1698 emit_gs_state(struct ntd_context *ctx)
1699 {
1700 const struct dxil_mdnode *gs_state_nodes[5];
1701 const nir_shader *s = ctx->shader;
1702
1703 gs_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, dxil_get_input_primitive(s->info.gs.input_primitive));
1704 gs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.vertices_out);
1705 gs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.gs.active_stream_mask, 1));
1706 gs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, dxil_get_primitive_topology(s->info.gs.output_primitive));
1707 gs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.invocations);
1708
1709 for (unsigned i = 0; i < ARRAY_SIZE(gs_state_nodes); ++i) {
1710 if (!gs_state_nodes[i])
1711 return NULL;
1712 }
1713
1714 return dxil_get_metadata_node(&ctx->mod, gs_state_nodes, ARRAY_SIZE(gs_state_nodes));
1715 }
1716
1717 static enum dxil_tessellator_domain
get_tessellator_domain(enum tess_primitive_mode primitive_mode)1718 get_tessellator_domain(enum tess_primitive_mode primitive_mode)
1719 {
1720 switch (primitive_mode) {
1721 case TESS_PRIMITIVE_QUADS: return DXIL_TESSELLATOR_DOMAIN_QUAD;
1722 case TESS_PRIMITIVE_TRIANGLES: return DXIL_TESSELLATOR_DOMAIN_TRI;
1723 case TESS_PRIMITIVE_ISOLINES: return DXIL_TESSELLATOR_DOMAIN_ISOLINE;
1724 default:
1725 unreachable("Invalid tessellator primitive mode");
1726 }
1727 }
1728
1729 static enum dxil_tessellator_partitioning
get_tessellator_partitioning(enum gl_tess_spacing spacing)1730 get_tessellator_partitioning(enum gl_tess_spacing spacing)
1731 {
1732 switch (spacing) {
1733 default:
1734 case TESS_SPACING_EQUAL:
1735 return DXIL_TESSELLATOR_PARTITIONING_INTEGER;
1736 case TESS_SPACING_FRACTIONAL_EVEN:
1737 return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
1738 case TESS_SPACING_FRACTIONAL_ODD:
1739 return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
1740 }
1741 }
1742
1743 static enum dxil_tessellator_output_primitive
get_tessellator_output_primitive(const struct shader_info * info)1744 get_tessellator_output_primitive(const struct shader_info *info)
1745 {
1746 if (info->tess.point_mode)
1747 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_POINT;
1748 if (info->tess._primitive_mode == TESS_PRIMITIVE_ISOLINES)
1749 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_LINE;
1750 /* Note: GL tessellation domain is inverted from D3D, which means triangle
1751 * winding needs to be inverted.
1752 */
1753 if (info->tess.ccw)
1754 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CW;
1755 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CCW;
1756 }
1757
1758 static const struct dxil_mdnode *
emit_hs_state(struct ntd_context * ctx)1759 emit_hs_state(struct ntd_context *ctx)
1760 {
1761 const struct dxil_mdnode *hs_state_nodes[7];
1762
1763 hs_state_nodes[0] = dxil_get_metadata_func(&ctx->mod, ctx->tess_ctrl_patch_constant_func_def->func);
1764 hs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->tess_input_control_point_count);
1765 hs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1766 hs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1767 hs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_partitioning(ctx->shader->info.tess.spacing));
1768 hs_state_nodes[5] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_output_primitive(&ctx->shader->info));
1769 hs_state_nodes[6] = dxil_get_metadata_float32(&ctx->mod, 64.0f);
1770
1771 return dxil_get_metadata_node(&ctx->mod, hs_state_nodes, ARRAY_SIZE(hs_state_nodes));
1772 }
1773
1774 static const struct dxil_mdnode *
emit_ds_state(struct ntd_context * ctx)1775 emit_ds_state(struct ntd_context *ctx)
1776 {
1777 const struct dxil_mdnode *ds_state_nodes[2];
1778
1779 ds_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1780 ds_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1781
1782 return dxil_get_metadata_node(&ctx->mod, ds_state_nodes, ARRAY_SIZE(ds_state_nodes));
1783 }
1784
1785 static const struct dxil_mdnode *
emit_threads(struct ntd_context * ctx)1786 emit_threads(struct ntd_context *ctx)
1787 {
1788 const nir_shader *s = ctx->shader;
1789 const struct dxil_mdnode *threads_x = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[0], 1));
1790 const struct dxil_mdnode *threads_y = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[1], 1));
1791 const struct dxil_mdnode *threads_z = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[2], 1));
1792 if (!threads_x || !threads_y || !threads_z)
1793 return false;
1794
1795 const struct dxil_mdnode *threads_nodes[] = { threads_x, threads_y, threads_z };
1796 return dxil_get_metadata_node(&ctx->mod, threads_nodes, ARRAY_SIZE(threads_nodes));
1797 }
1798
1799 static const struct dxil_mdnode *
emit_wave_size(struct ntd_context * ctx)1800 emit_wave_size(struct ntd_context *ctx)
1801 {
1802 const nir_shader *s = ctx->shader;
1803 const struct dxil_mdnode *wave_size_node = dxil_get_metadata_int32(&ctx->mod, s->info.subgroup_size);
1804 return dxil_get_metadata_node(&ctx->mod, &wave_size_node, 1);
1805 }
1806
1807 static const struct dxil_mdnode *
emit_wave_size_range(struct ntd_context * ctx)1808 emit_wave_size_range(struct ntd_context *ctx)
1809 {
1810 const nir_shader *s = ctx->shader;
1811 const struct dxil_mdnode *wave_size_nodes[3];
1812 wave_size_nodes[0] = dxil_get_metadata_int32(&ctx->mod, s->info.subgroup_size);
1813 wave_size_nodes[1] = wave_size_nodes[0];
1814 wave_size_nodes[2] = wave_size_nodes[0];
1815 return dxil_get_metadata_node(&ctx->mod, wave_size_nodes, ARRAY_SIZE(wave_size_nodes));
1816 }
1817
1818 static int64_t
get_module_flags(struct ntd_context * ctx)1819 get_module_flags(struct ntd_context *ctx)
1820 {
1821 /* See the DXIL documentation for the definition of these flags:
1822 *
1823 * https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-flags
1824 */
1825
1826 uint64_t flags = 0;
1827 if (ctx->mod.feats.doubles)
1828 flags |= (1 << 2);
1829 if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
1830 ctx->shader->info.fs.early_fragment_tests)
1831 flags |= (1 << 3);
1832 if (ctx->mod.raw_and_structured_buffers)
1833 flags |= (1 << 4);
1834 if (ctx->mod.feats.min_precision)
1835 flags |= (1 << 5);
1836 if (ctx->mod.feats.dx11_1_double_extensions)
1837 flags |= (1 << 6);
1838 if (ctx->mod.feats.array_layer_from_vs_or_ds)
1839 flags |= (1 << 9);
1840 if (ctx->mod.feats.inner_coverage)
1841 flags |= (1 << 10);
1842 if (ctx->mod.feats.stencil_ref)
1843 flags |= (1 << 11);
1844 if (ctx->mod.feats.tiled_resources)
1845 flags |= (1 << 12);
1846 if (ctx->mod.feats.typed_uav_load_additional_formats)
1847 flags |= (1 << 13);
1848 if (ctx->mod.feats.use_64uavs)
1849 flags |= (1 << 15);
1850 if (ctx->mod.feats.uavs_at_every_stage)
1851 flags |= (1 << 16);
1852 if (ctx->mod.feats.cs_4x_raw_sb)
1853 flags |= (1 << 17);
1854 if (ctx->mod.feats.rovs)
1855 flags |= (1 << 18);
1856 if (ctx->mod.feats.wave_ops)
1857 flags |= (1 << 19);
1858 if (ctx->mod.feats.int64_ops)
1859 flags |= (1 << 20);
1860 if (ctx->mod.feats.view_id)
1861 flags |= (1 << 21);
1862 if (ctx->mod.feats.barycentrics)
1863 flags |= (1 << 22);
1864 if (ctx->mod.feats.native_low_precision)
1865 flags |= (1 << 23) | (1 << 5);
1866 if (ctx->mod.feats.shading_rate)
1867 flags |= (1 << 24);
1868 if (ctx->mod.feats.raytracing_tier_1_1)
1869 flags |= (1 << 25);
1870 if (ctx->mod.feats.sampler_feedback)
1871 flags |= (1 << 26);
1872 if (ctx->mod.feats.atomic_int64_typed)
1873 flags |= (1 << 27);
1874 if (ctx->mod.feats.atomic_int64_tgsm)
1875 flags |= (1 << 28);
1876 if (ctx->mod.feats.derivatives_in_mesh_or_amp)
1877 flags |= (1 << 29);
1878 if (ctx->mod.feats.resource_descriptor_heap_indexing)
1879 flags |= (1 << 30);
1880 if (ctx->mod.feats.sampler_descriptor_heap_indexing)
1881 flags |= (1ull << 31);
1882 if (ctx->mod.feats.atomic_int64_heap_resource)
1883 flags |= (1ull << 32);
1884 if (ctx->mod.feats.advanced_texture_ops)
1885 flags |= (1ull << 34);
1886 if (ctx->mod.feats.writable_msaa)
1887 flags |= (1ull << 35);
1888 // Bit 36 is wave MMA
1889 if (ctx->mod.feats.sample_cmp_bias_gradient)
1890 flags |= (1ull << 37);
1891 if (ctx->mod.feats.extended_command_info)
1892 flags |= (1ull << 38);
1893
1894 if (ctx->opts->disable_math_refactoring)
1895 flags |= (1 << 1);
1896
1897 /* Work around https://github.com/microsoft/DirectXShaderCompiler/issues/4616
1898 * When targeting SM6.7 and with at least one UAV, if no other flags are present,
1899 * set the resources-may-not-alias flag, or else the DXIL validator may end up
1900 * with uninitialized memory which will fail validation, due to missing that flag.
1901 */
1902 if (flags == 0 && ctx->mod.minor_version >= 7 && ctx->num_uavs > 0)
1903 flags |= (1ull << 33);
1904
1905 return flags;
1906 }
1907
1908 static const struct dxil_mdnode *
emit_entrypoint(struct ntd_context * ctx,const struct dxil_func * func,const char * name,const struct dxil_mdnode * signatures,const struct dxil_mdnode * resources,const struct dxil_mdnode * shader_props)1909 emit_entrypoint(struct ntd_context *ctx,
1910 const struct dxil_func *func, const char *name,
1911 const struct dxil_mdnode *signatures,
1912 const struct dxil_mdnode *resources,
1913 const struct dxil_mdnode *shader_props)
1914 {
1915 char truncated_name[254] = { 0 };
1916 strncpy(truncated_name, name, ARRAY_SIZE(truncated_name) - 1);
1917
1918 const struct dxil_mdnode *func_md = dxil_get_metadata_func(&ctx->mod, func);
1919 const struct dxil_mdnode *name_md = dxil_get_metadata_string(&ctx->mod, truncated_name);
1920 const struct dxil_mdnode *nodes[] = {
1921 func_md,
1922 name_md,
1923 signatures,
1924 resources,
1925 shader_props
1926 };
1927 return dxil_get_metadata_node(&ctx->mod, nodes,
1928 ARRAY_SIZE(nodes));
1929 }
1930
1931 static const struct dxil_mdnode *
emit_resources(struct ntd_context * ctx)1932 emit_resources(struct ntd_context *ctx)
1933 {
1934 bool emit_resources = false;
1935 const struct dxil_mdnode *resources_nodes[] = {
1936 NULL, NULL, NULL, NULL
1937 };
1938
1939 #define ARRAY_AND_SIZE(arr) arr.data, util_dynarray_num_elements(&arr, const struct dxil_mdnode *)
1940
1941 if (ctx->srv_metadata_nodes.size) {
1942 resources_nodes[0] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->srv_metadata_nodes));
1943 emit_resources = true;
1944 }
1945
1946 if (ctx->uav_metadata_nodes.size) {
1947 resources_nodes[1] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->uav_metadata_nodes));
1948 emit_resources = true;
1949 }
1950
1951 if (ctx->cbv_metadata_nodes.size) {
1952 resources_nodes[2] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->cbv_metadata_nodes));
1953 emit_resources = true;
1954 }
1955
1956 if (ctx->sampler_metadata_nodes.size) {
1957 resources_nodes[3] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->sampler_metadata_nodes));
1958 emit_resources = true;
1959 }
1960
1961 #undef ARRAY_AND_SIZE
1962
1963 return emit_resources ?
1964 dxil_get_metadata_node(&ctx->mod, resources_nodes, ARRAY_SIZE(resources_nodes)): NULL;
1965 }
1966
1967 static bool
emit_tag(struct ntd_context * ctx,enum dxil_shader_tag tag,const struct dxil_mdnode * value_node)1968 emit_tag(struct ntd_context *ctx, enum dxil_shader_tag tag,
1969 const struct dxil_mdnode *value_node)
1970 {
1971 const struct dxil_mdnode *tag_node = dxil_get_metadata_int32(&ctx->mod, tag);
1972 if (!tag_node || !value_node)
1973 return false;
1974 assert(ctx->num_shader_property_nodes <= ARRAY_SIZE(ctx->shader_property_nodes) - 2);
1975 ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = tag_node;
1976 ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = value_node;
1977
1978 return true;
1979 }
1980
1981 static bool
emit_metadata(struct ntd_context * ctx)1982 emit_metadata(struct ntd_context *ctx)
1983 {
1984 /* DXIL versions are 1.x for shader model 6.x */
1985 assert(ctx->mod.major_version == 6);
1986 unsigned dxilMajor = 1;
1987 unsigned dxilMinor = ctx->mod.minor_version;
1988 unsigned valMajor = ctx->mod.major_validator;
1989 unsigned valMinor = ctx->mod.minor_validator;
1990 if (!emit_llvm_ident(&ctx->mod) ||
1991 !emit_named_version(&ctx->mod, "dx.version", dxilMajor, dxilMinor) ||
1992 !emit_named_version(&ctx->mod, "dx.valver", valMajor, valMinor) ||
1993 !emit_dx_shader_model(&ctx->mod))
1994 return false;
1995
1996 const struct dxil_func_def *main_func_def = ctx->main_func_def;
1997 if (!main_func_def)
1998 return false;
1999 const struct dxil_func *main_func = main_func_def->func;
2000
2001 const struct dxil_mdnode *resources_node = emit_resources(ctx);
2002
2003 const struct dxil_mdnode *main_entrypoint = dxil_get_metadata_func(&ctx->mod, main_func);
2004 const struct dxil_mdnode *node27 = dxil_get_metadata_node(&ctx->mod, NULL, 0);
2005
2006 const struct dxil_mdnode *node4 = dxil_get_metadata_int32(&ctx->mod, 0);
2007 const struct dxil_mdnode *nodes_4_27_27[] = {
2008 node4, node27, node27
2009 };
2010 const struct dxil_mdnode *node28 = dxil_get_metadata_node(&ctx->mod, nodes_4_27_27,
2011 ARRAY_SIZE(nodes_4_27_27));
2012
2013 const struct dxil_mdnode *node29 = dxil_get_metadata_node(&ctx->mod, &node28, 1);
2014
2015 const struct dxil_mdnode *node3 = dxil_get_metadata_int32(&ctx->mod, 1);
2016 const struct dxil_mdnode *main_type_annotation_nodes[] = {
2017 node3, main_entrypoint, node29
2018 };
2019 const struct dxil_mdnode *main_type_annotation = dxil_get_metadata_node(&ctx->mod, main_type_annotation_nodes,
2020 ARRAY_SIZE(main_type_annotation_nodes));
2021
2022 if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {
2023 if (!emit_tag(ctx, DXIL_SHADER_TAG_GS_STATE, emit_gs_state(ctx)))
2024 return false;
2025 } else if (ctx->mod.shader_kind == DXIL_HULL_SHADER) {
2026 ctx->tess_input_control_point_count = 32;
2027 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
2028 if (nir_is_arrayed_io(var, MESA_SHADER_TESS_CTRL)) {
2029 ctx->tess_input_control_point_count = glsl_array_size(var->type);
2030 break;
2031 }
2032 }
2033
2034 if (!emit_tag(ctx, DXIL_SHADER_TAG_HS_STATE, emit_hs_state(ctx)))
2035 return false;
2036 } else if (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
2037 if (!emit_tag(ctx, DXIL_SHADER_TAG_DS_STATE, emit_ds_state(ctx)))
2038 return false;
2039 } else if (ctx->mod.shader_kind == DXIL_COMPUTE_SHADER) {
2040 if (!emit_tag(ctx, DXIL_SHADER_TAG_NUM_THREADS, emit_threads(ctx)))
2041 return false;
2042 if (ctx->mod.minor_version >= 6 &&
2043 ctx->shader->info.subgroup_size >= SUBGROUP_SIZE_REQUIRE_4) {
2044 if (ctx->mod.minor_version < 8) {
2045 if (!emit_tag(ctx, DXIL_SHADER_TAG_WAVE_SIZE, emit_wave_size(ctx)))
2046 return false;
2047 } else {
2048 if (!emit_tag(ctx, DXIL_SHADER_TAG_WAVE_SIZE_RANGE, emit_wave_size_range(ctx)))
2049 return false;
2050 }
2051 }
2052 }
2053
2054 uint64_t flags = get_module_flags(ctx);
2055 if (flags != 0) {
2056 if (!emit_tag(ctx, DXIL_SHADER_TAG_FLAGS, dxil_get_metadata_int64(&ctx->mod, flags)))
2057 return false;
2058 }
2059 const struct dxil_mdnode *shader_properties = NULL;
2060 if (ctx->num_shader_property_nodes > 0) {
2061 shader_properties = dxil_get_metadata_node(&ctx->mod, ctx->shader_property_nodes,
2062 ctx->num_shader_property_nodes);
2063 if (!shader_properties)
2064 return false;
2065 }
2066
2067 nir_function_impl *entry_func_impl = nir_shader_get_entrypoint(ctx->shader);
2068 const struct dxil_mdnode *dx_entry_point = emit_entrypoint(ctx, main_func,
2069 entry_func_impl->function->name, get_signatures(&ctx->mod), resources_node, shader_properties);
2070 if (!dx_entry_point)
2071 return false;
2072
2073 if (resources_node) {
2074 const struct dxil_mdnode *dx_resources = resources_node;
2075 dxil_add_metadata_named_node(&ctx->mod, "dx.resources",
2076 &dx_resources, 1);
2077 }
2078
2079 if (ctx->mod.minor_version >= 2 &&
2080 dxil_nir_analyze_io_dependencies(&ctx->mod, ctx->shader)) {
2081 const struct dxil_type *i32_type = dxil_module_get_int_type(&ctx->mod, 32);
2082 if (!i32_type)
2083 return false;
2084
2085 const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, i32_type, ctx->mod.serialized_dependency_table_size);
2086 if (!array_type)
2087 return false;
2088
2089 const struct dxil_value **array_entries = malloc(sizeof(const struct value *) * ctx->mod.serialized_dependency_table_size);
2090 if (!array_entries)
2091 return false;
2092
2093 for (uint32_t i = 0; i < ctx->mod.serialized_dependency_table_size; ++i)
2094 array_entries[i] = dxil_module_get_int32_const(&ctx->mod, ctx->mod.serialized_dependency_table[i]);
2095 const struct dxil_value *array_val = dxil_module_get_array_const(&ctx->mod, array_type, array_entries);
2096 free((void *)array_entries);
2097
2098 const struct dxil_mdnode *view_id_state_val = dxil_get_metadata_value(&ctx->mod, array_type, array_val);
2099 if (!view_id_state_val)
2100 return false;
2101
2102 const struct dxil_mdnode *view_id_state_node = dxil_get_metadata_node(&ctx->mod, &view_id_state_val, 1);
2103
2104 dxil_add_metadata_named_node(&ctx->mod, "dx.viewIdState", &view_id_state_node, 1);
2105 }
2106
2107 const struct dxil_mdnode *dx_type_annotations[] = { main_type_annotation };
2108 return dxil_add_metadata_named_node(&ctx->mod, "dx.typeAnnotations",
2109 dx_type_annotations,
2110 ARRAY_SIZE(dx_type_annotations)) &&
2111 dxil_add_metadata_named_node(&ctx->mod, "dx.entryPoints",
2112 &dx_entry_point, 1);
2113 }
2114
2115 static const struct dxil_value *
bitcast_to_int(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)2116 bitcast_to_int(struct ntd_context *ctx, unsigned bit_size,
2117 const struct dxil_value *value)
2118 {
2119 const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size);
2120 if (!type)
2121 return NULL;
2122
2123 return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
2124 }
2125
2126 static const struct dxil_value *
bitcast_to_float(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)2127 bitcast_to_float(struct ntd_context *ctx, unsigned bit_size,
2128 const struct dxil_value *value)
2129 {
2130 const struct dxil_type *type = dxil_module_get_float_type(&ctx->mod, bit_size);
2131 if (!type)
2132 return NULL;
2133
2134 return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
2135 }
2136
2137 static bool
is_phi_src(nir_def * ssa)2138 is_phi_src(nir_def *ssa)
2139 {
2140 nir_foreach_use(src, ssa)
2141 if (nir_src_parent_instr(src)->type == nir_instr_type_phi)
2142 return true;
2143 return false;
2144 }
2145
2146 static void
store_ssa_def(struct ntd_context * ctx,nir_def * ssa,unsigned chan,const struct dxil_value * value)2147 store_ssa_def(struct ntd_context *ctx, nir_def *ssa, unsigned chan,
2148 const struct dxil_value *value)
2149 {
2150 assert(ssa->index < ctx->num_defs);
2151 assert(chan < ssa->num_components);
2152 /* Insert bitcasts for phi srcs in the parent block */
2153 if (is_phi_src(ssa)) {
2154 /* Prefer ints over floats if it could be both or if we have no type info */
2155 nir_alu_type expect_type =
2156 BITSET_TEST(ctx->int_types, ssa->index) ? nir_type_int :
2157 (BITSET_TEST(ctx->float_types, ssa->index) ? nir_type_float :
2158 nir_type_int);
2159 assert(ssa->bit_size != 1 || expect_type == nir_type_int);
2160 if (ssa->bit_size != 1 && expect_type != dxil_type_to_nir_type(dxil_value_get_type(value)))
2161 value = dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST,
2162 expect_type == nir_type_int ?
2163 dxil_module_get_int_type(&ctx->mod, ssa->bit_size) :
2164 dxil_module_get_float_type(&ctx->mod, ssa->bit_size), value);
2165 if (ssa->bit_size == 64) {
2166 if (expect_type == nir_type_int)
2167 ctx->mod.feats.int64_ops = true;
2168 if (expect_type == nir_type_float)
2169 ctx->mod.feats.doubles = true;
2170 }
2171 }
2172 ctx->defs[ssa->index].chans[chan] = value;
2173 }
2174
2175 static void
store_def(struct ntd_context * ctx,nir_def * def,unsigned chan,const struct dxil_value * value)2176 store_def(struct ntd_context *ctx, nir_def *def, unsigned chan,
2177 const struct dxil_value *value)
2178 {
2179 const struct dxil_type *type = dxil_value_get_type(value);
2180 if (type == ctx->mod.float64_type)
2181 ctx->mod.feats.doubles = true;
2182 if (type == ctx->mod.float16_type ||
2183 type == ctx->mod.int16_type)
2184 ctx->mod.feats.min_precision = true;
2185 if (type == ctx->mod.int64_type)
2186 ctx->mod.feats.int64_ops = true;
2187 store_ssa_def(ctx, def, chan, value);
2188 }
2189
2190 static void
store_alu_dest(struct ntd_context * ctx,nir_alu_instr * alu,unsigned chan,const struct dxil_value * value)2191 store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan,
2192 const struct dxil_value *value)
2193 {
2194 store_def(ctx, &alu->def, chan, value);
2195 }
2196
2197 static const struct dxil_value *
get_src_ssa(struct ntd_context * ctx,const nir_def * ssa,unsigned chan)2198 get_src_ssa(struct ntd_context *ctx, const nir_def *ssa, unsigned chan)
2199 {
2200 assert(ssa->index < ctx->num_defs);
2201 assert(chan < ssa->num_components);
2202 assert(ctx->defs[ssa->index].chans[chan]);
2203 return ctx->defs[ssa->index].chans[chan];
2204 }
2205
2206 static const struct dxil_value *
get_src(struct ntd_context * ctx,nir_src * src,unsigned chan,nir_alu_type type)2207 get_src(struct ntd_context *ctx, nir_src *src, unsigned chan,
2208 nir_alu_type type)
2209 {
2210 const struct dxil_value *value = get_src_ssa(ctx, src->ssa, chan);
2211
2212 const int bit_size = nir_src_bit_size(*src);
2213
2214 switch (nir_alu_type_get_base_type(type)) {
2215 case nir_type_int:
2216 case nir_type_uint: {
2217 const struct dxil_type *expect_type = dxil_module_get_int_type(&ctx->mod, bit_size);
2218 /* nohing to do */
2219 if (dxil_value_type_equal_to(value, expect_type)) {
2220 assert(bit_size != 64 || ctx->mod.feats.int64_ops);
2221 return value;
2222 }
2223 if (bit_size == 64) {
2224 assert(ctx->mod.feats.doubles);
2225 ctx->mod.feats.int64_ops = true;
2226 }
2227 if (bit_size == 16)
2228 ctx->mod.feats.native_low_precision = true;
2229 assert(dxil_value_type_bitsize_equal_to(value, bit_size));
2230 return bitcast_to_int(ctx, bit_size, value);
2231 }
2232
2233 case nir_type_float:
2234 assert(nir_src_bit_size(*src) >= 16);
2235 if (dxil_value_type_equal_to(value, dxil_module_get_float_type(&ctx->mod, bit_size))) {
2236 assert(nir_src_bit_size(*src) != 64 || ctx->mod.feats.doubles);
2237 return value;
2238 }
2239 if (bit_size == 64) {
2240 assert(ctx->mod.feats.int64_ops);
2241 ctx->mod.feats.doubles = true;
2242 }
2243 if (bit_size == 16)
2244 ctx->mod.feats.native_low_precision = true;
2245 assert(dxil_value_type_bitsize_equal_to(value, bit_size));
2246 return bitcast_to_float(ctx, bit_size, value);
2247
2248 case nir_type_bool:
2249 if (!dxil_value_type_bitsize_equal_to(value, 1)) {
2250 return dxil_emit_cast(&ctx->mod, DXIL_CAST_TRUNC,
2251 dxil_module_get_int_type(&ctx->mod, 1), value);
2252 }
2253 return value;
2254
2255 default:
2256 unreachable("unexpected nir_alu_type");
2257 }
2258 }
2259
2260 static const struct dxil_value *
get_alu_src(struct ntd_context * ctx,nir_alu_instr * alu,unsigned src)2261 get_alu_src(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)
2262 {
2263 unsigned chan = alu->src[src].swizzle[0];
2264 return get_src(ctx, &alu->src[src].src, chan,
2265 nir_op_infos[alu->op].input_types[src]);
2266 }
2267
2268 static bool
emit_binop(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)2269 emit_binop(struct ntd_context *ctx, nir_alu_instr *alu,
2270 enum dxil_bin_opcode opcode,
2271 const struct dxil_value *op0, const struct dxil_value *op1)
2272 {
2273 bool is_float_op = nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float;
2274
2275 enum dxil_opt_flags flags = 0;
2276 if (is_float_op && !alu->exact)
2277 flags |= DXIL_UNSAFE_ALGEBRA;
2278
2279 const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, flags);
2280 if (!v)
2281 return false;
2282 store_alu_dest(ctx, alu, 0, v);
2283 return true;
2284 }
2285
2286 static bool
emit_shift(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)2287 emit_shift(struct ntd_context *ctx, nir_alu_instr *alu,
2288 enum dxil_bin_opcode opcode,
2289 const struct dxil_value *op0, const struct dxil_value *op1)
2290 {
2291 unsigned op0_bit_size = nir_src_bit_size(alu->src[0].src);
2292 unsigned op1_bit_size = nir_src_bit_size(alu->src[1].src);
2293
2294 uint64_t shift_mask = op0_bit_size - 1;
2295 if (!nir_src_is_const(alu->src[1].src)) {
2296 if (op0_bit_size != op1_bit_size) {
2297 const struct dxil_type *type =
2298 dxil_module_get_int_type(&ctx->mod, op0_bit_size);
2299 enum dxil_cast_opcode cast_op =
2300 op1_bit_size < op0_bit_size ? DXIL_CAST_ZEXT : DXIL_CAST_TRUNC;
2301 op1 = dxil_emit_cast(&ctx->mod, cast_op, type, op1);
2302 }
2303 op1 = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND,
2304 op1,
2305 dxil_module_get_int_const(&ctx->mod, shift_mask, op0_bit_size),
2306 0);
2307 } else {
2308 uint64_t val = nir_scalar_as_uint(
2309 nir_scalar_chase_alu_src(nir_get_scalar(&alu->def, 0), 1));
2310 op1 = dxil_module_get_int_const(&ctx->mod, val & shift_mask, op0_bit_size);
2311 }
2312
2313 const struct dxil_value *v =
2314 dxil_emit_binop(&ctx->mod, opcode, op0, op1, 0);
2315 if (!v)
2316 return false;
2317 store_alu_dest(ctx, alu, 0, v);
2318 return true;
2319 }
2320
2321 static bool
emit_cmp(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_cmp_pred pred,const struct dxil_value * op0,const struct dxil_value * op1)2322 emit_cmp(struct ntd_context *ctx, nir_alu_instr *alu,
2323 enum dxil_cmp_pred pred,
2324 const struct dxil_value *op0, const struct dxil_value *op1)
2325 {
2326 const struct dxil_value *v = dxil_emit_cmp(&ctx->mod, pred, op0, op1);
2327 if (!v)
2328 return false;
2329 store_alu_dest(ctx, alu, 0, v);
2330 return true;
2331 }
2332
2333 static enum dxil_cast_opcode
get_cast_op(nir_alu_instr * alu)2334 get_cast_op(nir_alu_instr *alu)
2335 {
2336 unsigned dst_bits = alu->def.bit_size;
2337 unsigned src_bits = nir_src_bit_size(alu->src[0].src);
2338
2339 switch (alu->op) {
2340 /* bool -> int */
2341 case nir_op_b2i16:
2342 case nir_op_b2i32:
2343 case nir_op_b2i64:
2344 return DXIL_CAST_ZEXT;
2345
2346 /* float -> float */
2347 case nir_op_f2f16_rtz:
2348 case nir_op_f2f16:
2349 case nir_op_f2fmp:
2350 case nir_op_f2f32:
2351 case nir_op_f2f64:
2352 assert(dst_bits != src_bits);
2353 if (dst_bits < src_bits)
2354 return DXIL_CAST_FPTRUNC;
2355 else
2356 return DXIL_CAST_FPEXT;
2357
2358 /* int -> int */
2359 case nir_op_i2i1:
2360 case nir_op_i2i16:
2361 case nir_op_i2imp:
2362 case nir_op_i2i32:
2363 case nir_op_i2i64:
2364 assert(dst_bits != src_bits);
2365 if (dst_bits < src_bits)
2366 return DXIL_CAST_TRUNC;
2367 else
2368 return DXIL_CAST_SEXT;
2369
2370 /* uint -> uint */
2371 case nir_op_u2u1:
2372 case nir_op_u2u16:
2373 case nir_op_u2u32:
2374 case nir_op_u2u64:
2375 assert(dst_bits != src_bits);
2376 if (dst_bits < src_bits)
2377 return DXIL_CAST_TRUNC;
2378 else
2379 return DXIL_CAST_ZEXT;
2380
2381 /* float -> int */
2382 case nir_op_f2i16:
2383 case nir_op_f2imp:
2384 case nir_op_f2i32:
2385 case nir_op_f2i64:
2386 return DXIL_CAST_FPTOSI;
2387
2388 /* float -> uint */
2389 case nir_op_f2u16:
2390 case nir_op_f2ump:
2391 case nir_op_f2u32:
2392 case nir_op_f2u64:
2393 return DXIL_CAST_FPTOUI;
2394
2395 /* int -> float */
2396 case nir_op_i2f16:
2397 case nir_op_i2fmp:
2398 case nir_op_i2f32:
2399 case nir_op_i2f64:
2400 return DXIL_CAST_SITOFP;
2401
2402 /* uint -> float */
2403 case nir_op_u2f16:
2404 case nir_op_u2fmp:
2405 case nir_op_u2f32:
2406 case nir_op_u2f64:
2407 return DXIL_CAST_UITOFP;
2408
2409 default:
2410 unreachable("unexpected cast op");
2411 }
2412 }
2413
2414 static const struct dxil_type *
get_cast_dest_type(struct ntd_context * ctx,nir_alu_instr * alu)2415 get_cast_dest_type(struct ntd_context *ctx, nir_alu_instr *alu)
2416 {
2417 unsigned dst_bits = alu->def.bit_size;
2418 switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type)) {
2419 case nir_type_bool:
2420 assert(dst_bits == 1);
2421 FALLTHROUGH;
2422 case nir_type_int:
2423 case nir_type_uint:
2424 return dxil_module_get_int_type(&ctx->mod, dst_bits);
2425
2426 case nir_type_float:
2427 return dxil_module_get_float_type(&ctx->mod, dst_bits);
2428
2429 default:
2430 unreachable("unknown nir_alu_type");
2431 }
2432 }
2433
2434 static bool
is_double(nir_alu_type alu_type,unsigned bit_size)2435 is_double(nir_alu_type alu_type, unsigned bit_size)
2436 {
2437 return nir_alu_type_get_base_type(alu_type) == nir_type_float &&
2438 bit_size == 64;
2439 }
2440
2441 static bool
emit_cast(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * value)2442 emit_cast(struct ntd_context *ctx, nir_alu_instr *alu,
2443 const struct dxil_value *value)
2444 {
2445 enum dxil_cast_opcode opcode = get_cast_op(alu);
2446 const struct dxil_type *type = get_cast_dest_type(ctx, alu);
2447 if (!type)
2448 return false;
2449
2450 const nir_op_info *info = &nir_op_infos[alu->op];
2451 switch (opcode) {
2452 case DXIL_CAST_UITOFP:
2453 case DXIL_CAST_SITOFP:
2454 if (is_double(info->output_type, alu->def.bit_size))
2455 ctx->mod.feats.dx11_1_double_extensions = true;
2456 break;
2457 case DXIL_CAST_FPTOUI:
2458 case DXIL_CAST_FPTOSI:
2459 if (is_double(info->input_types[0], nir_src_bit_size(alu->src[0].src)))
2460 ctx->mod.feats.dx11_1_double_extensions = true;
2461 break;
2462 default:
2463 break;
2464 }
2465
2466 if (alu->def.bit_size == 16) {
2467 switch (alu->op) {
2468 case nir_op_f2fmp:
2469 case nir_op_i2imp:
2470 case nir_op_f2imp:
2471 case nir_op_f2ump:
2472 case nir_op_i2fmp:
2473 case nir_op_u2fmp:
2474 break;
2475 default:
2476 ctx->mod.feats.native_low_precision = true;
2477 }
2478 }
2479
2480 const struct dxil_value *v = dxil_emit_cast(&ctx->mod, opcode, type,
2481 value);
2482 if (!v)
2483 return false;
2484 store_alu_dest(ctx, alu, 0, v);
2485 return true;
2486 }
2487
2488 static enum overload_type
get_overload(nir_alu_type alu_type,unsigned bit_size)2489 get_overload(nir_alu_type alu_type, unsigned bit_size)
2490 {
2491 switch (nir_alu_type_get_base_type(alu_type)) {
2492 case nir_type_int:
2493 case nir_type_uint:
2494 case nir_type_bool:
2495 switch (bit_size) {
2496 case 1: return DXIL_I1;
2497 case 16: return DXIL_I16;
2498 case 32: return DXIL_I32;
2499 case 64: return DXIL_I64;
2500 default:
2501 unreachable("unexpected bit_size");
2502 }
2503 case nir_type_float:
2504 switch (bit_size) {
2505 case 16: return DXIL_F16;
2506 case 32: return DXIL_F32;
2507 case 64: return DXIL_F64;
2508 default:
2509 unreachable("unexpected bit_size");
2510 }
2511 case nir_type_invalid:
2512 return DXIL_NONE;
2513 default:
2514 unreachable("unexpected output type");
2515 }
2516 }
2517
2518 static enum overload_type
get_ambiguous_overload(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum overload_type default_type)2519 get_ambiguous_overload(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2520 enum overload_type default_type)
2521 {
2522 if (BITSET_TEST(ctx->int_types, intr->def.index))
2523 return get_overload(nir_type_int, intr->def.bit_size);
2524 if (BITSET_TEST(ctx->float_types, intr->def.index))
2525 return get_overload(nir_type_float, intr->def.bit_size);
2526 return default_type;
2527 }
2528
2529 static enum overload_type
get_ambiguous_overload_alu_type(struct ntd_context * ctx,nir_intrinsic_instr * intr,nir_alu_type alu_type)2530 get_ambiguous_overload_alu_type(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2531 nir_alu_type alu_type)
2532 {
2533 return get_ambiguous_overload(ctx, intr, get_overload(alu_type, intr->def.bit_size));
2534 }
2535
2536 static bool
emit_unary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op)2537 emit_unary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2538 enum dxil_intr intr, const struct dxil_value *op)
2539 {
2540 const nir_op_info *info = &nir_op_infos[alu->op];
2541 unsigned src_bits = nir_src_bit_size(alu->src[0].src);
2542 enum overload_type overload = get_overload(info->input_types[0], src_bits);
2543
2544 const struct dxil_value *v = emit_unary_call(ctx, overload, intr, op);
2545 if (!v)
2546 return false;
2547 store_alu_dest(ctx, alu, 0, v);
2548 return true;
2549 }
2550
2551 static bool
emit_binary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)2552 emit_binary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2553 enum dxil_intr intr,
2554 const struct dxil_value *op0, const struct dxil_value *op1)
2555 {
2556 const nir_op_info *info = &nir_op_infos[alu->op];
2557 assert(info->output_type == info->input_types[0]);
2558 assert(info->output_type == info->input_types[1]);
2559 unsigned dst_bits = alu->def.bit_size;
2560 assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2561 assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2562 enum overload_type overload = get_overload(info->output_type, dst_bits);
2563
2564 const struct dxil_value *v = emit_binary_call(ctx, overload, intr,
2565 op0, op1);
2566 if (!v)
2567 return false;
2568 store_alu_dest(ctx, alu, 0, v);
2569 return true;
2570 }
2571
2572 static bool
emit_tertiary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)2573 emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2574 enum dxil_intr intr,
2575 const struct dxil_value *op0,
2576 const struct dxil_value *op1,
2577 const struct dxil_value *op2)
2578 {
2579 const nir_op_info *info = &nir_op_infos[alu->op];
2580 unsigned dst_bits = alu->def.bit_size;
2581 assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2582 assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2583 assert(nir_src_bit_size(alu->src[2].src) == dst_bits);
2584
2585 assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[0], dst_bits));
2586 assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[1], dst_bits));
2587 assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[2], dst_bits));
2588
2589 enum overload_type overload = get_overload(info->output_type, dst_bits);
2590
2591 const struct dxil_value *v = emit_tertiary_call(ctx, overload, intr,
2592 op0, op1, op2);
2593 if (!v)
2594 return false;
2595 store_alu_dest(ctx, alu, 0, v);
2596 return true;
2597 }
2598
2599 static bool
emit_derivative(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_intr dxil_intr)2600 emit_derivative(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2601 enum dxil_intr dxil_intr)
2602 {
2603 const struct dxil_value *src = get_src(ctx, &intr->src[0], 0, nir_type_float);
2604 enum overload_type overload = get_overload(nir_type_float, intr->src[0].ssa->bit_size);
2605 const struct dxil_value *v = emit_unary_call(ctx, overload, dxil_intr, src);
2606 if (!v)
2607 return false;
2608 store_def(ctx, &intr->def, 0, v);
2609 return true;
2610 }
2611
2612 static bool
emit_bitfield_insert(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * base,const struct dxil_value * insert,const struct dxil_value * offset,const struct dxil_value * width)2613 emit_bitfield_insert(struct ntd_context *ctx, nir_alu_instr *alu,
2614 const struct dxil_value *base,
2615 const struct dxil_value *insert,
2616 const struct dxil_value *offset,
2617 const struct dxil_value *width)
2618 {
2619 /* DXIL is width, offset, insert, base, NIR is base, insert, offset, width */
2620 const struct dxil_value *v = emit_quaternary_call(ctx, DXIL_I32, DXIL_INTR_BFI,
2621 width, offset, insert, base);
2622 if (!v)
2623 return false;
2624
2625 /* DXIL uses the 5 LSB from width/offset. Special-case width >= 32 == copy insert. */
2626 const struct dxil_value *compare_width = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_SGE,
2627 width, dxil_module_get_int32_const(&ctx->mod, 32));
2628 v = dxil_emit_select(&ctx->mod, compare_width, insert, v);
2629 store_alu_dest(ctx, alu, 0, v);
2630 return true;
2631 }
2632
2633 static bool
emit_dot4add_packed(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * src0,const struct dxil_value * src1,const struct dxil_value * accum)2634 emit_dot4add_packed(struct ntd_context *ctx, nir_alu_instr *alu,
2635 enum dxil_intr intr,
2636 const struct dxil_value *src0,
2637 const struct dxil_value *src1,
2638 const struct dxil_value *accum)
2639 {
2640 const struct dxil_func *f = dxil_get_function(&ctx->mod, "dx.op.dot4AddPacked", DXIL_I32);
2641 if (!f)
2642 return false;
2643 const struct dxil_value *srcs[] = { dxil_module_get_int32_const(&ctx->mod, intr), accum, src0, src1 };
2644 const struct dxil_value *v = dxil_emit_call(&ctx->mod, f, srcs, ARRAY_SIZE(srcs));
2645 if (!v)
2646 return false;
2647
2648 store_alu_dest(ctx, alu, 0, v);
2649 return true;
2650 }
2651
emit_select(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * sel,const struct dxil_value * val_true,const struct dxil_value * val_false)2652 static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu,
2653 const struct dxil_value *sel,
2654 const struct dxil_value *val_true,
2655 const struct dxil_value *val_false)
2656 {
2657 assert(sel);
2658 assert(val_true);
2659 assert(val_false);
2660
2661 const struct dxil_value *v = dxil_emit_select(&ctx->mod, sel, val_true, val_false);
2662 if (!v)
2663 return false;
2664
2665 store_alu_dest(ctx, alu, 0, v);
2666 return true;
2667 }
2668
2669 static bool
emit_b2f16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2670 emit_b2f16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2671 {
2672 assert(val);
2673
2674 struct dxil_module *m = &ctx->mod;
2675
2676 const struct dxil_value *c1 = dxil_module_get_float16_const(m, 0x3C00);
2677 const struct dxil_value *c0 = dxil_module_get_float16_const(m, 0);
2678
2679 if (!c0 || !c1)
2680 return false;
2681
2682 return emit_select(ctx, alu, val, c1, c0);
2683 }
2684
2685 static bool
emit_b2f32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2686 emit_b2f32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2687 {
2688 assert(val);
2689
2690 struct dxil_module *m = &ctx->mod;
2691
2692 const struct dxil_value *c1 = dxil_module_get_float_const(m, 1.0f);
2693 const struct dxil_value *c0 = dxil_module_get_float_const(m, 0.0f);
2694
2695 if (!c0 || !c1)
2696 return false;
2697
2698 return emit_select(ctx, alu, val, c1, c0);
2699 }
2700
2701 static bool
emit_b2f64(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2702 emit_b2f64(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2703 {
2704 assert(val);
2705
2706 struct dxil_module *m = &ctx->mod;
2707
2708 const struct dxil_value *c1 = dxil_module_get_double_const(m, 1.0);
2709 const struct dxil_value *c0 = dxil_module_get_double_const(m, 0.0);
2710
2711 if (!c0 || !c1)
2712 return false;
2713
2714 ctx->mod.feats.doubles = 1;
2715 return emit_select(ctx, alu, val, c1, c0);
2716 }
2717
2718 static bool
emit_f16tof32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val,bool shift)2719 emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val, bool shift)
2720 {
2721 if (shift) {
2722 val = dxil_emit_binop(&ctx->mod, DXIL_BINOP_LSHR, val,
2723 dxil_module_get_int32_const(&ctx->mod, 16), 0);
2724 if (!val)
2725 return false;
2726 }
2727
2728 const struct dxil_func *func = dxil_get_function(&ctx->mod,
2729 "dx.op.legacyF16ToF32",
2730 DXIL_NONE);
2731 if (!func)
2732 return false;
2733
2734 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F16TOF32);
2735 if (!opcode)
2736 return false;
2737
2738 const struct dxil_value *args[] = {
2739 opcode,
2740 val
2741 };
2742
2743 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2744 if (!v)
2745 return false;
2746 store_alu_dest(ctx, alu, 0, v);
2747 return true;
2748 }
2749
2750 static bool
emit_f32tof16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val0,const struct dxil_value * val1)2751 emit_f32tof16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val0, const struct dxil_value *val1)
2752 {
2753 const struct dxil_func *func = dxil_get_function(&ctx->mod,
2754 "dx.op.legacyF32ToF16",
2755 DXIL_NONE);
2756 if (!func)
2757 return false;
2758
2759 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F32TOF16);
2760 if (!opcode)
2761 return false;
2762
2763 const struct dxil_value *args[] = {
2764 opcode,
2765 val0
2766 };
2767
2768 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2769 if (!v)
2770 return false;
2771
2772 if (!nir_src_is_const(alu->src[1].src) || nir_src_as_int(alu->src[1].src) != 0) {
2773 args[1] = val1;
2774 const struct dxil_value *v_high = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2775 if (!v_high)
2776 return false;
2777
2778 v_high = dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL, v_high,
2779 dxil_module_get_int32_const(&ctx->mod, 16), 0);
2780 if (!v_high)
2781 return false;
2782
2783 v = dxil_emit_binop(&ctx->mod, DXIL_BINOP_OR, v, v_high, 0);
2784 if (!v)
2785 return false;
2786 }
2787
2788 store_alu_dest(ctx, alu, 0, v);
2789 return true;
2790 }
2791
2792 static bool
emit_vec(struct ntd_context * ctx,nir_alu_instr * alu,unsigned num_inputs)2793 emit_vec(struct ntd_context *ctx, nir_alu_instr *alu, unsigned num_inputs)
2794 {
2795 for (unsigned i = 0; i < num_inputs; i++) {
2796 const struct dxil_value *src =
2797 get_src_ssa(ctx, alu->src[i].src.ssa, alu->src[i].swizzle[0]);
2798 if (!src)
2799 return false;
2800
2801 store_alu_dest(ctx, alu, i, src);
2802 }
2803 return true;
2804 }
2805
2806 static bool
emit_make_double(struct ntd_context * ctx,nir_alu_instr * alu)2807 emit_make_double(struct ntd_context *ctx, nir_alu_instr *alu)
2808 {
2809 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.makeDouble", DXIL_F64);
2810 if (!func)
2811 return false;
2812
2813 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_MAKE_DOUBLE);
2814 if (!opcode)
2815 return false;
2816
2817 const struct dxil_value *args[3] = {
2818 opcode,
2819 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_uint32),
2820 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[1], nir_type_uint32),
2821 };
2822 if (!args[1] || !args[2])
2823 return false;
2824
2825 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2826 if (!v)
2827 return false;
2828 store_def(ctx, &alu->def, 0, v);
2829 return true;
2830 }
2831
2832 static bool
emit_split_double(struct ntd_context * ctx,nir_alu_instr * alu)2833 emit_split_double(struct ntd_context *ctx, nir_alu_instr *alu)
2834 {
2835 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.splitDouble", DXIL_F64);
2836 if (!func)
2837 return false;
2838
2839 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SPLIT_DOUBLE);
2840 if (!opcode)
2841 return false;
2842
2843 const struct dxil_value *args[] = {
2844 opcode,
2845 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_float64)
2846 };
2847 if (!args[1])
2848 return false;
2849
2850 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2851 if (!v)
2852 return false;
2853
2854 const struct dxil_value *hi = dxil_emit_extractval(&ctx->mod, v, 0);
2855 const struct dxil_value *lo = dxil_emit_extractval(&ctx->mod, v, 1);
2856 if (!hi || !lo)
2857 return false;
2858
2859 store_def(ctx, &alu->def, 0, hi);
2860 store_def(ctx, &alu->def, 1, lo);
2861 return true;
2862 }
2863
2864 static bool
emit_alu(struct ntd_context * ctx,nir_alu_instr * alu)2865 emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
2866 {
2867 /* handle vec-instructions first; they are the only ones that produce
2868 * vector results.
2869 */
2870 switch (alu->op) {
2871 case nir_op_vec2:
2872 case nir_op_vec3:
2873 case nir_op_vec4:
2874 case nir_op_vec8:
2875 case nir_op_vec16:
2876 return emit_vec(ctx, alu, nir_op_infos[alu->op].num_inputs);
2877 case nir_op_mov: {
2878 assert(alu->def.num_components == 1);
2879 store_ssa_def(ctx, &alu->def, 0, get_src_ssa(ctx,
2880 alu->src->src.ssa, alu->src->swizzle[0]));
2881 return true;
2882 }
2883 case nir_op_pack_double_2x32_dxil:
2884 return emit_make_double(ctx, alu);
2885 case nir_op_unpack_double_2x32_dxil:
2886 return emit_split_double(ctx, alu);
2887 case nir_op_bcsel: {
2888 /* Handled here to avoid type forced bitcast to int, since bcsel is used for ints and floats.
2889 * Ideally, the back-typing got both sources to match, but if it didn't, explicitly get src1's type */
2890 const struct dxil_value *src1 = get_src_ssa(ctx, alu->src[1].src.ssa, alu->src[1].swizzle[0]);
2891 nir_alu_type src1_type = dxil_type_to_nir_type(dxil_value_get_type(src1));
2892 return emit_select(ctx, alu,
2893 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_bool),
2894 src1,
2895 get_src(ctx, &alu->src[2].src, alu->src[2].swizzle[0], src1_type));
2896 }
2897 default:
2898 /* silence warnings */
2899 ;
2900 }
2901
2902 /* other ops should be scalar */
2903 const struct dxil_value *src[4];
2904 assert(nir_op_infos[alu->op].num_inputs <= 4);
2905 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
2906 src[i] = get_alu_src(ctx, alu, i);
2907 if (!src[i])
2908 return false;
2909 }
2910
2911 switch (alu->op) {
2912 case nir_op_iadd:
2913 case nir_op_fadd: return emit_binop(ctx, alu, DXIL_BINOP_ADD, src[0], src[1]);
2914
2915 case nir_op_isub:
2916 case nir_op_fsub: return emit_binop(ctx, alu, DXIL_BINOP_SUB, src[0], src[1]);
2917
2918 case nir_op_imul:
2919 case nir_op_fmul: return emit_binop(ctx, alu, DXIL_BINOP_MUL, src[0], src[1]);
2920
2921 case nir_op_fdiv:
2922 if (alu->def.bit_size == 64)
2923 ctx->mod.feats.dx11_1_double_extensions = 1;
2924 return emit_binop(ctx, alu, DXIL_BINOP_SDIV, src[0], src[1]);
2925
2926 case nir_op_idiv:
2927 case nir_op_udiv:
2928 if (nir_src_is_const(alu->src[1].src)) {
2929 /* It's illegal to emit a literal divide by 0 in DXIL */
2930 nir_scalar divisor = nir_scalar_chase_alu_src(nir_get_scalar(&alu->def, 0), 1);
2931 if (nir_scalar_as_int(divisor) == 0) {
2932 store_alu_dest(ctx, alu, 0,
2933 dxil_module_get_int_const(&ctx->mod, 0, alu->def.bit_size));
2934 return true;
2935 }
2936 }
2937 return emit_binop(ctx, alu, alu->op == nir_op_idiv ? DXIL_BINOP_SDIV : DXIL_BINOP_UDIV, src[0], src[1]);
2938
2939 case nir_op_irem: return emit_binop(ctx, alu, DXIL_BINOP_SREM, src[0], src[1]);
2940 case nir_op_imod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2941 case nir_op_umod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2942 case nir_op_ishl: return emit_shift(ctx, alu, DXIL_BINOP_SHL, src[0], src[1]);
2943 case nir_op_ishr: return emit_shift(ctx, alu, DXIL_BINOP_ASHR, src[0], src[1]);
2944 case nir_op_ushr: return emit_shift(ctx, alu, DXIL_BINOP_LSHR, src[0], src[1]);
2945 case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]);
2946 case nir_op_ior: return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]);
2947 case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]);
2948 case nir_op_inot: {
2949 unsigned bit_size = alu->def.bit_size;
2950 intmax_t val = bit_size == 1 ? 1 : -1;
2951 const struct dxil_value *negative_one = dxil_module_get_int_const(&ctx->mod, val, bit_size);
2952 return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], negative_one);
2953 }
2954 case nir_op_ieq: return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]);
2955 case nir_op_ine: return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]);
2956 case nir_op_ige: return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]);
2957 case nir_op_uge: return emit_cmp(ctx, alu, DXIL_ICMP_UGE, src[0], src[1]);
2958 case nir_op_ilt: return emit_cmp(ctx, alu, DXIL_ICMP_SLT, src[0], src[1]);
2959 case nir_op_ult: return emit_cmp(ctx, alu, DXIL_ICMP_ULT, src[0], src[1]);
2960 case nir_op_feq: return emit_cmp(ctx, alu, DXIL_FCMP_OEQ, src[0], src[1]);
2961 case nir_op_fneu: return emit_cmp(ctx, alu, DXIL_FCMP_UNE, src[0], src[1]);
2962 case nir_op_flt: return emit_cmp(ctx, alu, DXIL_FCMP_OLT, src[0], src[1]);
2963 case nir_op_fge: return emit_cmp(ctx, alu, DXIL_FCMP_OGE, src[0], src[1]);
2964 case nir_op_ftrunc: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_Z, src[0]);
2965 case nir_op_fabs: return emit_unary_intin(ctx, alu, DXIL_INTR_FABS, src[0]);
2966 case nir_op_fcos: return emit_unary_intin(ctx, alu, DXIL_INTR_FCOS, src[0]);
2967 case nir_op_fsin: return emit_unary_intin(ctx, alu, DXIL_INTR_FSIN, src[0]);
2968 case nir_op_fceil: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_PI, src[0]);
2969 case nir_op_fexp2: return emit_unary_intin(ctx, alu, DXIL_INTR_FEXP2, src[0]);
2970 case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]);
2971 case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]);
2972 case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]);
2973 case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]);
2974 case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]);
2975
2976 case nir_op_fround_even: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NE, src[0]);
2977 case nir_op_frcp: {
2978 const struct dxil_value *one;
2979 switch (alu->def.bit_size) {
2980 case 16:
2981 one = dxil_module_get_float16_const(&ctx->mod, 0x3C00);
2982 break;
2983 case 32:
2984 one = dxil_module_get_float_const(&ctx->mod, 1.0f);
2985 break;
2986 case 64:
2987 one = dxil_module_get_double_const(&ctx->mod, 1.0);
2988 break;
2989 default: unreachable("Invalid float size");
2990 }
2991 return emit_binop(ctx, alu, DXIL_BINOP_SDIV, one, src[0]);
2992 }
2993 case nir_op_fsat: return emit_unary_intin(ctx, alu, DXIL_INTR_SATURATE, src[0]);
2994 case nir_op_bit_count: return emit_unary_intin(ctx, alu, DXIL_INTR_COUNTBITS, src[0]);
2995 case nir_op_bitfield_reverse: return emit_unary_intin(ctx, alu, DXIL_INTR_BFREV, src[0]);
2996 case nir_op_ufind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_HI, src[0]);
2997 case nir_op_ifind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_SHI, src[0]);
2998 case nir_op_find_lsb: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_LO, src[0]);
2999 case nir_op_imax: return emit_binary_intin(ctx, alu, DXIL_INTR_IMAX, src[0], src[1]);
3000 case nir_op_imin: return emit_binary_intin(ctx, alu, DXIL_INTR_IMIN, src[0], src[1]);
3001 case nir_op_umax: return emit_binary_intin(ctx, alu, DXIL_INTR_UMAX, src[0], src[1]);
3002 case nir_op_umin: return emit_binary_intin(ctx, alu, DXIL_INTR_UMIN, src[0], src[1]);
3003 case nir_op_frsq: return emit_unary_intin(ctx, alu, DXIL_INTR_RSQRT, src[0]);
3004 case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]);
3005 case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]);
3006 case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]);
3007 case nir_op_ffma:
3008 if (alu->def.bit_size == 64)
3009 ctx->mod.feats.dx11_1_double_extensions = 1;
3010 return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]);
3011
3012 case nir_op_ibfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_IBFE, src[2], src[1], src[0]);
3013 case nir_op_ubfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_UBFE, src[2], src[1], src[0]);
3014 case nir_op_bitfield_insert: return emit_bitfield_insert(ctx, alu, src[0], src[1], src[2], src[3]);
3015
3016 case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0], false);
3017 case nir_op_unpack_half_2x16_split_y: return emit_f16tof32(ctx, alu, src[0], true);
3018 case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0], src[1]);
3019
3020 case nir_op_sdot_4x8_iadd: return emit_dot4add_packed(ctx, alu, DXIL_INTR_DOT4_ADD_I8_PACKED, src[0], src[1], src[2]);
3021 case nir_op_udot_4x8_uadd: return emit_dot4add_packed(ctx, alu, DXIL_INTR_DOT4_ADD_U8_PACKED, src[0], src[1], src[2]);
3022
3023 case nir_op_i2i1:
3024 case nir_op_u2u1:
3025 case nir_op_b2i16:
3026 case nir_op_i2i16:
3027 case nir_op_i2imp:
3028 case nir_op_f2i16:
3029 case nir_op_f2imp:
3030 case nir_op_f2u16:
3031 case nir_op_f2ump:
3032 case nir_op_u2u16:
3033 case nir_op_u2f16:
3034 case nir_op_u2fmp:
3035 case nir_op_i2f16:
3036 case nir_op_i2fmp:
3037 case nir_op_f2f16_rtz:
3038 case nir_op_f2f16:
3039 case nir_op_f2fmp:
3040 case nir_op_b2i32:
3041 case nir_op_f2f32:
3042 case nir_op_f2i32:
3043 case nir_op_f2u32:
3044 case nir_op_i2f32:
3045 case nir_op_i2i32:
3046 case nir_op_u2f32:
3047 case nir_op_u2u32:
3048 case nir_op_b2i64:
3049 case nir_op_f2f64:
3050 case nir_op_f2i64:
3051 case nir_op_f2u64:
3052 case nir_op_i2f64:
3053 case nir_op_i2i64:
3054 case nir_op_u2f64:
3055 case nir_op_u2u64:
3056 return emit_cast(ctx, alu, src[0]);
3057
3058 case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]);
3059 case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]);
3060 case nir_op_b2f64: return emit_b2f64(ctx, alu, src[0]);
3061 default:
3062 log_nir_instr_unsupported(ctx->logger, "Unimplemented ALU instruction",
3063 &alu->instr);
3064 return false;
3065 }
3066 }
3067
3068 static const struct dxil_value *
load_ubo(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * offset,enum overload_type overload)3069 load_ubo(struct ntd_context *ctx, const struct dxil_value *handle,
3070 const struct dxil_value *offset, enum overload_type overload)
3071 {
3072 assert(handle && offset);
3073
3074 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CBUFFER_LOAD_LEGACY);
3075 if (!opcode)
3076 return NULL;
3077
3078 const struct dxil_value *args[] = {
3079 opcode, handle, offset
3080 };
3081
3082 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cbufferLoadLegacy", overload);
3083 if (!func)
3084 return NULL;
3085 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3086 }
3087
3088 static bool
emit_barrier_impl(struct ntd_context * ctx,nir_variable_mode modes,mesa_scope execution_scope,mesa_scope mem_scope)3089 emit_barrier_impl(struct ntd_context *ctx, nir_variable_mode modes, mesa_scope execution_scope, mesa_scope mem_scope)
3090 {
3091 const struct dxil_value *opcode, *mode;
3092 const struct dxil_func *func;
3093 uint32_t flags = 0;
3094
3095 if (execution_scope == SCOPE_WORKGROUP)
3096 flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP;
3097
3098 bool is_compute = ctx->mod.shader_kind == DXIL_COMPUTE_SHADER;
3099
3100 if ((modes & (nir_var_mem_ssbo | nir_var_mem_global | nir_var_image)) &&
3101 (mem_scope > SCOPE_WORKGROUP || !is_compute)) {
3102 flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL;
3103 } else {
3104 flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP;
3105 }
3106
3107 if ((modes & nir_var_mem_shared) && is_compute)
3108 flags |= DXIL_BARRIER_MODE_GROUPSHARED_MEM_FENCE;
3109
3110 func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE);
3111 if (!func)
3112 return false;
3113
3114 opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER);
3115 if (!opcode)
3116 return false;
3117
3118 mode = dxil_module_get_int32_const(&ctx->mod, flags);
3119 if (!mode)
3120 return false;
3121
3122 const struct dxil_value *args[] = { opcode, mode };
3123
3124 return dxil_emit_call_void(&ctx->mod, func,
3125 args, ARRAY_SIZE(args));
3126 }
3127
3128 static bool
emit_barrier(struct ntd_context * ctx,nir_intrinsic_instr * intr)3129 emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3130 {
3131 return emit_barrier_impl(ctx,
3132 nir_intrinsic_memory_modes(intr),
3133 nir_intrinsic_execution_scope(intr),
3134 nir_intrinsic_memory_scope(intr));
3135 }
3136
3137 static bool
emit_load_global_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3138 emit_load_global_invocation_id(struct ntd_context *ctx,
3139 nir_intrinsic_instr *intr)
3140 {
3141 nir_component_mask_t comps = nir_def_components_read(&intr->def);
3142
3143 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3144 if (comps & (1 << i)) {
3145 const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
3146 if (!idx)
3147 return false;
3148 const struct dxil_value *globalid = emit_threadid_call(ctx, idx);
3149
3150 if (!globalid)
3151 return false;
3152
3153 store_def(ctx, &intr->def, i, globalid);
3154 }
3155 }
3156 return true;
3157 }
3158
3159 static bool
emit_load_local_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3160 emit_load_local_invocation_id(struct ntd_context *ctx,
3161 nir_intrinsic_instr *intr)
3162 {
3163 nir_component_mask_t comps = nir_def_components_read(&intr->def);
3164
3165 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3166 if (comps & (1 << i)) {
3167 const struct dxil_value
3168 *idx = dxil_module_get_int32_const(&ctx->mod, i);
3169 if (!idx)
3170 return false;
3171 const struct dxil_value
3172 *threadidingroup = emit_threadidingroup_call(ctx, idx);
3173 if (!threadidingroup)
3174 return false;
3175 store_def(ctx, &intr->def, i, threadidingroup);
3176 }
3177 }
3178 return true;
3179 }
3180
3181 static bool
emit_load_local_invocation_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)3182 emit_load_local_invocation_index(struct ntd_context *ctx,
3183 nir_intrinsic_instr *intr)
3184 {
3185 const struct dxil_value
3186 *flattenedthreadidingroup = emit_flattenedthreadidingroup_call(ctx);
3187 if (!flattenedthreadidingroup)
3188 return false;
3189 store_def(ctx, &intr->def, 0, flattenedthreadidingroup);
3190
3191 return true;
3192 }
3193
3194 static bool
emit_load_local_workgroup_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3195 emit_load_local_workgroup_id(struct ntd_context *ctx,
3196 nir_intrinsic_instr *intr)
3197 {
3198 nir_component_mask_t comps = nir_def_components_read(&intr->def);
3199
3200 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3201 if (comps & (1 << i)) {
3202 const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
3203 if (!idx)
3204 return false;
3205 const struct dxil_value *groupid = emit_groupid_call(ctx, idx);
3206 if (!groupid)
3207 return false;
3208 store_def(ctx, &intr->def, i, groupid);
3209 }
3210 }
3211 return true;
3212 }
3213
3214 static const struct dxil_value *
call_unary_external_function(struct ntd_context * ctx,const char * name,int32_t dxil_intr,enum overload_type overload)3215 call_unary_external_function(struct ntd_context *ctx,
3216 const char *name,
3217 int32_t dxil_intr,
3218 enum overload_type overload)
3219 {
3220 const struct dxil_func *func =
3221 dxil_get_function(&ctx->mod, name, overload);
3222 if (!func)
3223 return false;
3224
3225 const struct dxil_value *opcode =
3226 dxil_module_get_int32_const(&ctx->mod, dxil_intr);
3227 if (!opcode)
3228 return false;
3229
3230 const struct dxil_value *args[] = {opcode};
3231
3232 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3233 }
3234
3235 static bool
emit_load_unary_external_function(struct ntd_context * ctx,nir_intrinsic_instr * intr,const char * name,int32_t dxil_intr,nir_alu_type type)3236 emit_load_unary_external_function(struct ntd_context *ctx,
3237 nir_intrinsic_instr *intr, const char *name,
3238 int32_t dxil_intr,
3239 nir_alu_type type)
3240 {
3241 const struct dxil_value *value = call_unary_external_function(ctx, name, dxil_intr,
3242 get_overload(type, intr->def.bit_size));
3243 store_def(ctx, &intr->def, 0, value);
3244
3245 return true;
3246 }
3247
3248 static bool
emit_load_sample_mask_in(struct ntd_context * ctx,nir_intrinsic_instr * intr)3249 emit_load_sample_mask_in(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3250 {
3251 const struct dxil_value *value = call_unary_external_function(ctx,
3252 "dx.op.coverage", DXIL_INTR_COVERAGE, DXIL_I32);
3253
3254 /* Mask coverage with (1 << sample index). Note, done as an AND to handle extrapolation cases. */
3255 if (ctx->mod.info.has_per_sample_input) {
3256 value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND, value,
3257 dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL,
3258 dxil_module_get_int32_const(&ctx->mod, 1),
3259 call_unary_external_function(ctx, "dx.op.sampleIndex", DXIL_INTR_SAMPLE_INDEX, DXIL_I32), 0), 0);
3260 }
3261
3262 store_def(ctx, &intr->def, 0, value);
3263 return true;
3264 }
3265
3266 static bool
emit_load_tess_coord(struct ntd_context * ctx,nir_intrinsic_instr * intr)3267 emit_load_tess_coord(struct ntd_context *ctx,
3268 nir_intrinsic_instr *intr)
3269 {
3270 const struct dxil_func *func =
3271 dxil_get_function(&ctx->mod, "dx.op.domainLocation", DXIL_F32);
3272 if (!func)
3273 return false;
3274
3275 const struct dxil_value *opcode =
3276 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DOMAIN_LOCATION);
3277 if (!opcode)
3278 return false;
3279
3280 unsigned num_coords = ctx->shader->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES ? 3 : 2;
3281 for (unsigned i = 0; i < num_coords; ++i) {
3282 unsigned component_idx = i;
3283
3284 const struct dxil_value *component = dxil_module_get_int8_const(&ctx->mod, component_idx);
3285 if (!component)
3286 return false;
3287
3288 const struct dxil_value *args[] = { opcode, component };
3289
3290 const struct dxil_value *value =
3291 dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3292 store_def(ctx, &intr->def, i, value);
3293 }
3294
3295 for (unsigned i = num_coords; i < intr->def.num_components; ++i) {
3296 const struct dxil_value *value = dxil_module_get_float_const(&ctx->mod, 0.0f);
3297 store_def(ctx, &intr->def, i, value);
3298 }
3299
3300 return true;
3301 }
3302
3303 static const struct dxil_value *
get_int32_undef(struct dxil_module * m)3304 get_int32_undef(struct dxil_module *m)
3305 {
3306 const struct dxil_type *int32_type =
3307 dxil_module_get_int_type(m, 32);
3308 if (!int32_type)
3309 return NULL;
3310
3311 return dxil_module_get_undef(m, int32_type);
3312 }
3313
3314 static const struct dxil_value *
get_resource_handle(struct ntd_context * ctx,nir_src * src,enum dxil_resource_class class,enum dxil_resource_kind kind)3315 get_resource_handle(struct ntd_context *ctx, nir_src *src, enum dxil_resource_class class,
3316 enum dxil_resource_kind kind)
3317 {
3318 /* This source might be one of:
3319 * 1. Constant resource index - just look it up in precomputed handle arrays
3320 * If it's null in that array, create a handle
3321 * 2. A handle from load_vulkan_descriptor - just get the stored SSA value
3322 * 3. Dynamic resource index - create a handle for it here
3323 */
3324 assert(src->ssa->num_components == 1 && src->ssa->bit_size == 32);
3325 nir_const_value *const_block_index = nir_src_as_const_value(*src);
3326 const struct dxil_value *handle_entry = NULL;
3327 if (const_block_index) {
3328 assert(ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN);
3329 switch (kind) {
3330 case DXIL_RESOURCE_KIND_CBUFFER:
3331 handle_entry = ctx->cbv_handles[const_block_index->u32];
3332 break;
3333 case DXIL_RESOURCE_KIND_RAW_BUFFER:
3334 if (class == DXIL_RESOURCE_CLASS_UAV)
3335 handle_entry = ctx->ssbo_handles[const_block_index->u32];
3336 else
3337 handle_entry = ctx->srv_handles[const_block_index->u32];
3338 break;
3339 case DXIL_RESOURCE_KIND_SAMPLER:
3340 handle_entry = ctx->sampler_handles[const_block_index->u32];
3341 break;
3342 default:
3343 if (class == DXIL_RESOURCE_CLASS_UAV)
3344 handle_entry = ctx->image_handles[const_block_index->u32];
3345 else
3346 handle_entry = ctx->srv_handles[const_block_index->u32];
3347 break;
3348 }
3349 }
3350
3351 if (handle_entry)
3352 return handle_entry;
3353
3354 if (nir_src_as_deref(*src) ||
3355 ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
3356 return get_src_ssa(ctx, src->ssa, 0);
3357 }
3358
3359 unsigned space = 0;
3360 if (ctx->opts->environment == DXIL_ENVIRONMENT_GL &&
3361 class == DXIL_RESOURCE_CLASS_UAV) {
3362 if (kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
3363 space = 2;
3364 else
3365 space = 1;
3366 }
3367
3368 /* The base binding here will almost always be zero. The only cases where we end
3369 * up in this type of dynamic indexing are:
3370 * 1. GL UBOs
3371 * 2. GL SSBOs
3372 * 3. CL SSBOs
3373 * In all cases except GL UBOs, the resources are a single zero-based array.
3374 * In that case, the base is 1, because uniforms use 0 and cannot by dynamically
3375 * indexed. All other cases should either fall into static indexing (first early return),
3376 * deref-based dynamic handle creation (images, or Vulkan textures/samplers), or
3377 * load_vulkan_descriptor handle creation.
3378 */
3379 unsigned base_binding = 0;
3380 if (ctx->shader->info.first_ubo_is_default_ubo &&
3381 class == DXIL_RESOURCE_CLASS_CBV)
3382 base_binding = 1;
3383
3384 const struct dxil_value *value = get_src(ctx, src, 0, nir_type_uint);
3385 const struct dxil_value *handle = emit_createhandle_call_dynamic(ctx, class,
3386 space, base_binding, value, !const_block_index);
3387
3388 return handle;
3389 }
3390
3391 static const struct dxil_value *
create_image_handle(struct ntd_context * ctx,nir_intrinsic_instr * image_intr)3392 create_image_handle(struct ntd_context *ctx, nir_intrinsic_instr *image_intr)
3393 {
3394 const struct dxil_value *unannotated_handle =
3395 emit_createhandle_heap(ctx, get_src(ctx, &image_intr->src[0], 0, nir_type_uint32), false, true /*TODO: divergence*/);
3396 const struct dxil_value *res_props =
3397 dxil_module_get_uav_res_props_const(&ctx->mod, image_intr);
3398
3399 if (!unannotated_handle || !res_props)
3400 return NULL;
3401
3402 return emit_annotate_handle(ctx, unannotated_handle, res_props);
3403 }
3404
3405 static const struct dxil_value *
create_srv_handle(struct ntd_context * ctx,nir_tex_instr * tex,nir_src * src)3406 create_srv_handle(struct ntd_context *ctx, nir_tex_instr *tex, nir_src *src)
3407 {
3408 const struct dxil_value *unannotated_handle =
3409 emit_createhandle_heap(ctx, get_src(ctx, src, 0, nir_type_uint32), false, true /*TODO: divergence*/);
3410 const struct dxil_value *res_props =
3411 dxil_module_get_srv_res_props_const(&ctx->mod, tex);
3412
3413 if (!unannotated_handle || !res_props)
3414 return NULL;
3415
3416 return emit_annotate_handle(ctx, unannotated_handle, res_props);
3417 }
3418
3419 static const struct dxil_value *
create_sampler_handle(struct ntd_context * ctx,bool is_shadow,nir_src * src)3420 create_sampler_handle(struct ntd_context *ctx, bool is_shadow, nir_src *src)
3421 {
3422 const struct dxil_value *unannotated_handle =
3423 emit_createhandle_heap(ctx, get_src(ctx, src, 0, nir_type_uint32), true, true /*TODO: divergence*/);
3424 const struct dxil_value *res_props =
3425 dxil_module_get_sampler_res_props_const(&ctx->mod, is_shadow);
3426
3427 if (!unannotated_handle || !res_props)
3428 return NULL;
3429
3430 return emit_annotate_handle(ctx, unannotated_handle, res_props);
3431 }
3432
3433 static bool
emit_load_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)3434 emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3435 {
3436 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3437
3438 enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
3439 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
3440 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
3441 if (var && var->data.access & ACCESS_NON_WRITEABLE)
3442 class = DXIL_RESOURCE_CLASS_SRV;
3443 }
3444
3445 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
3446 const struct dxil_value *offset =
3447 get_src(ctx, &intr->src[1], 0, nir_type_uint);
3448 if (!int32_undef || !handle || !offset)
3449 return false;
3450
3451 assert(nir_src_bit_size(intr->src[0]) == 32);
3452 assert(nir_intrinsic_dest_components(intr) <= 4);
3453
3454 const struct dxil_value *coord[2] = {
3455 offset,
3456 int32_undef
3457 };
3458
3459 enum overload_type overload = get_ambiguous_overload_alu_type(ctx, intr, nir_type_uint);
3460 const struct dxil_value *load = ctx->mod.minor_version >= 2 ?
3461 emit_raw_bufferload_call(ctx, handle, coord,
3462 overload,
3463 nir_intrinsic_dest_components(intr),
3464 intr->def.bit_size / 8) :
3465 emit_bufferload_call(ctx, handle, coord, overload);
3466 if (!load)
3467 return false;
3468
3469 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3470 const struct dxil_value *val =
3471 dxil_emit_extractval(&ctx->mod, load, i);
3472 if (!val)
3473 return false;
3474 store_def(ctx, &intr->def, i, val);
3475 }
3476 if (intr->def.bit_size == 16)
3477 ctx->mod.feats.native_low_precision = true;
3478 return true;
3479 }
3480
3481 static bool
emit_store_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)3482 emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3483 {
3484 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[1], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
3485 const struct dxil_value *offset =
3486 get_src(ctx, &intr->src[2], 0, nir_type_uint);
3487 if (!handle || !offset)
3488 return false;
3489
3490 unsigned num_components = nir_src_num_components(intr->src[0]);
3491 assert(num_components <= 4);
3492 if (nir_src_bit_size(intr->src[0]) == 16)
3493 ctx->mod.feats.native_low_precision = true;
3494
3495 nir_alu_type type =
3496 dxil_type_to_nir_type(dxil_value_get_type(get_src_ssa(ctx, intr->src[0].ssa, 0)));
3497 const struct dxil_value *value[4] = { 0 };
3498 for (unsigned i = 0; i < num_components; ++i) {
3499 value[i] = get_src(ctx, &intr->src[0], i, type);
3500 if (!value[i])
3501 return false;
3502 }
3503
3504 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3505 if (!int32_undef)
3506 return false;
3507
3508 const struct dxil_value *coord[2] = {
3509 offset,
3510 int32_undef
3511 };
3512
3513 enum overload_type overload = get_overload(type, intr->src[0].ssa->bit_size);
3514 if (num_components < 4) {
3515 const struct dxil_value *value_undef = dxil_module_get_undef(&ctx->mod, dxil_value_get_type(value[0]));
3516 if (!value_undef)
3517 return false;
3518
3519 for (int i = num_components; i < 4; ++i)
3520 value[i] = value_undef;
3521 }
3522
3523 const struct dxil_value *write_mask =
3524 dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
3525 if (!write_mask)
3526 return false;
3527
3528 return ctx->mod.minor_version >= 2 ?
3529 emit_raw_bufferstore_call(ctx, handle, coord, value, write_mask, overload, intr->src[0].ssa->bit_size / 8) :
3530 emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
3531 }
3532
3533 static bool
emit_load_ubo_vec4(struct ntd_context * ctx,nir_intrinsic_instr * intr)3534 emit_load_ubo_vec4(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3535 {
3536 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER);
3537 const struct dxil_value *offset =
3538 get_src(ctx, &intr->src[1], 0, nir_type_uint);
3539
3540 if (!handle || !offset)
3541 return false;
3542
3543 enum overload_type overload = get_ambiguous_overload_alu_type(ctx, intr, nir_type_uint);
3544 const struct dxil_value *agg = load_ubo(ctx, handle, offset, overload);
3545 if (!agg)
3546 return false;
3547
3548 unsigned first_component = nir_intrinsic_has_component(intr) ?
3549 nir_intrinsic_component(intr) : 0;
3550 for (unsigned i = 0; i < intr->def.num_components; i++)
3551 store_def(ctx, &intr->def, i,
3552 dxil_emit_extractval(&ctx->mod, agg, i + first_component));
3553
3554 if (intr->def.bit_size == 16)
3555 ctx->mod.feats.native_low_precision = true;
3556 return true;
3557 }
3558
3559 /* Need to add patch-ness as a matching parameter, since driver_location is *not* unique
3560 * between control points and patch variables in HS/DS
3561 */
3562 static nir_variable *
find_patch_matching_variable_by_driver_location(nir_shader * s,nir_variable_mode mode,unsigned driver_location,bool patch)3563 find_patch_matching_variable_by_driver_location(nir_shader *s, nir_variable_mode mode, unsigned driver_location, bool patch)
3564 {
3565 nir_foreach_variable_with_modes(var, s, mode) {
3566 if (var->data.driver_location == driver_location &&
3567 var->data.patch == patch)
3568 return var;
3569 }
3570 return NULL;
3571 }
3572
3573 static bool
emit_store_output_via_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3574 emit_store_output_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3575 {
3576 assert(intr->intrinsic == nir_intrinsic_store_output ||
3577 ctx->mod.shader_kind == DXIL_HULL_SHADER);
3578 bool is_patch_constant = intr->intrinsic == nir_intrinsic_store_output &&
3579 ctx->mod.shader_kind == DXIL_HULL_SHADER;
3580 nir_alu_type out_type = nir_intrinsic_src_type(intr);
3581 enum overload_type overload = get_overload(out_type, intr->src[0].ssa->bit_size);
3582 const struct dxil_func *func = dxil_get_function(&ctx->mod, is_patch_constant ?
3583 "dx.op.storePatchConstant" : "dx.op.storeOutput",
3584 overload);
3585
3586 if (!func)
3587 return false;
3588
3589 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, is_patch_constant ?
3590 DXIL_INTR_STORE_PATCH_CONSTANT : DXIL_INTR_STORE_OUTPUT);
3591 uint8_t *io_mappings = is_patch_constant ? ctx->mod.patch_mappings : ctx->mod.output_mappings;
3592 uint8_t io_index = io_mappings[nir_intrinsic_base(intr)];
3593 const struct dxil_value *output_id = dxil_module_get_int32_const(&ctx->mod, io_index);
3594 unsigned row_index = intr->intrinsic == nir_intrinsic_store_output ? 1 : 2;
3595
3596 /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3597 * generation, so muck with them here too.
3598 */
3599 nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3600 bool is_tess_level = is_patch_constant &&
3601 (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3602 semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3603
3604 const struct dxil_value *row = NULL;
3605 const struct dxil_value *col = NULL;
3606 if (is_tess_level)
3607 col = dxil_module_get_int8_const(&ctx->mod, 0);
3608 else
3609 row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3610
3611 bool success = true;
3612 uint32_t writemask = nir_intrinsic_write_mask(intr);
3613
3614 nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_out, nir_intrinsic_base(intr), is_patch_constant);
3615 unsigned var_base_component = var->data.location_frac;
3616 unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3617
3618 if (ctx->mod.minor_validator >= 5) {
3619 struct dxil_signature_record *sig_rec = is_patch_constant ?
3620 &ctx->mod.patch_consts[io_index] :
3621 &ctx->mod.outputs[io_index];
3622 unsigned comp_size = intr->src[0].ssa->bit_size == 64 ? 2 : 1;
3623 unsigned comp_mask = 0;
3624 if (is_tess_level)
3625 comp_mask = 1;
3626 else if (comp_size == 1)
3627 comp_mask = writemask << var_base_component;
3628 else {
3629 for (unsigned i = 0; i < intr->num_components; ++i)
3630 if ((writemask & (1 << i)))
3631 comp_mask |= 3 << ((i + var_base_component) * comp_size);
3632 }
3633 for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3634 sig_rec->elements[r].never_writes_mask &= ~comp_mask;
3635
3636 if (!nir_src_is_const(intr->src[row_index])) {
3637 struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3638 &ctx->mod.psv_patch_consts[io_index] :
3639 &ctx->mod.psv_outputs[io_index];
3640 psv_rec->dynamic_mask_and_stream |= comp_mask;
3641 }
3642 }
3643
3644 for (unsigned i = 0; i < intr->num_components && success; ++i) {
3645 if (writemask & (1 << i)) {
3646 if (is_tess_level)
3647 row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3648 else
3649 col = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3650 const struct dxil_value *value = get_src(ctx, &intr->src[0], i, out_type);
3651 if (!col || !row || !value)
3652 return false;
3653
3654 const struct dxil_value *args[] = {
3655 opcode, output_id, row, col, value
3656 };
3657 success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3658 }
3659 }
3660
3661 return success;
3662 }
3663
3664 static bool
emit_load_input_via_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3665 emit_load_input_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3666 {
3667 bool attr_at_vertex = false;
3668 if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER &&
3669 ctx->opts->interpolate_at_vertex &&
3670 ctx->opts->provoking_vertex != 0 &&
3671 (nir_intrinsic_dest_type(intr) & nir_type_float)) {
3672 nir_variable *var = nir_find_variable_with_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr));
3673
3674 attr_at_vertex = var && var->data.interpolation == INTERP_MODE_FLAT;
3675 }
3676
3677 bool is_patch_constant = (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER &&
3678 intr->intrinsic == nir_intrinsic_load_input) ||
3679 (ctx->mod.shader_kind == DXIL_HULL_SHADER &&
3680 intr->intrinsic == nir_intrinsic_load_output);
3681 bool is_output_control_point = intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3682
3683 unsigned opcode_val;
3684 const char *func_name;
3685 if (attr_at_vertex) {
3686 opcode_val = DXIL_INTR_ATTRIBUTE_AT_VERTEX;
3687 func_name = "dx.op.attributeAtVertex";
3688 if (ctx->mod.minor_validator >= 6)
3689 ctx->mod.feats.barycentrics = 1;
3690 } else if (is_patch_constant) {
3691 opcode_val = DXIL_INTR_LOAD_PATCH_CONSTANT;
3692 func_name = "dx.op.loadPatchConstant";
3693 } else if (is_output_control_point) {
3694 opcode_val = DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT;
3695 func_name = "dx.op.loadOutputControlPoint";
3696 } else {
3697 opcode_val = DXIL_INTR_LOAD_INPUT;
3698 func_name = "dx.op.loadInput";
3699 }
3700
3701 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3702 if (!opcode)
3703 return false;
3704
3705 uint8_t *io_mappings =
3706 is_patch_constant ? ctx->mod.patch_mappings :
3707 is_output_control_point ? ctx->mod.output_mappings :
3708 ctx->mod.input_mappings;
3709 uint8_t io_index = io_mappings[nir_intrinsic_base(intr)];
3710 const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, io_index);
3711 if (!input_id)
3712 return false;
3713
3714 bool is_per_vertex =
3715 intr->intrinsic == nir_intrinsic_load_per_vertex_input ||
3716 intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3717 int row_index = is_per_vertex ? 1 : 0;
3718 const struct dxil_value *vertex_id = NULL;
3719 if (!is_patch_constant) {
3720 if (is_per_vertex) {
3721 vertex_id = get_src(ctx, &intr->src[0], 0, nir_type_int);
3722 } else if (attr_at_vertex) {
3723 vertex_id = dxil_module_get_int8_const(&ctx->mod, ctx->opts->provoking_vertex);
3724 } else {
3725 const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
3726 if (!int32_type)
3727 return false;
3728
3729 vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);
3730 }
3731 if (!vertex_id)
3732 return false;
3733 }
3734
3735 /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3736 * generation, so muck with them here too.
3737 */
3738 nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3739 bool is_tess_level = is_patch_constant &&
3740 (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3741 semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3742
3743 const struct dxil_value *row = NULL;
3744 const struct dxil_value *comp = NULL;
3745 if (is_tess_level)
3746 comp = dxil_module_get_int8_const(&ctx->mod, 0);
3747 else
3748 row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3749
3750 nir_alu_type out_type = nir_intrinsic_dest_type(intr);
3751 enum overload_type overload = get_overload(out_type, intr->def.bit_size);
3752
3753 const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, overload);
3754
3755 if (!func)
3756 return false;
3757
3758 nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), is_patch_constant);
3759 unsigned var_base_component = var ? var->data.location_frac : 0;
3760 unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3761
3762 if (ctx->mod.minor_validator >= 5 &&
3763 !is_output_control_point &&
3764 intr->intrinsic != nir_intrinsic_load_output) {
3765 struct dxil_signature_record *sig_rec = is_patch_constant ?
3766 &ctx->mod.patch_consts[io_index] :
3767 &ctx->mod.inputs[io_index];
3768 unsigned comp_size = intr->def.bit_size == 64 ? 2 : 1;
3769 unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3770 comp_mask <<= (var_base_component * comp_size);
3771 if (is_tess_level)
3772 comp_mask = 1;
3773 for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3774 sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3775
3776 if (!nir_src_is_const(intr->src[row_index])) {
3777 struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3778 &ctx->mod.psv_patch_consts[io_index] :
3779 &ctx->mod.psv_inputs[io_index];
3780 psv_rec->dynamic_mask_and_stream |= comp_mask;
3781 }
3782 }
3783
3784 for (unsigned i = 0; i < intr->num_components; ++i) {
3785 if (is_tess_level)
3786 row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3787 else
3788 comp = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3789
3790 if (!row || !comp)
3791 return false;
3792
3793 const struct dxil_value *args[] = {
3794 opcode, input_id, row, comp, vertex_id
3795 };
3796
3797 unsigned num_args = ARRAY_SIZE(args) - (is_patch_constant ? 1 : 0);
3798 const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3799 if (!retval)
3800 return false;
3801 store_def(ctx, &intr->def, i, retval);
3802 }
3803 return true;
3804 }
3805
3806 static bool
emit_load_interpolated_input(struct ntd_context * ctx,nir_intrinsic_instr * intr)3807 emit_load_interpolated_input(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3808 {
3809 nir_intrinsic_instr *barycentric = nir_src_as_intrinsic(intr->src[0]);
3810
3811 const struct dxil_value *args[6] = { 0 };
3812
3813 unsigned opcode_val;
3814 const char *func_name;
3815 unsigned num_args;
3816 switch (barycentric->intrinsic) {
3817 case nir_intrinsic_load_barycentric_at_offset:
3818 opcode_val = DXIL_INTR_EVAL_SNAPPED;
3819 func_name = "dx.op.evalSnapped";
3820 num_args = 6;
3821 for (unsigned i = 0; i < 2; ++i) {
3822 const struct dxil_value *float_offset = get_src(ctx, &barycentric->src[0], i, nir_type_float);
3823 /* GLSL uses [-0.5f, 0.5f), DXIL uses (-8, 7) */
3824 const struct dxil_value *offset_16 = dxil_emit_binop(&ctx->mod,
3825 DXIL_BINOP_MUL, float_offset, dxil_module_get_float_const(&ctx->mod, 16.0f), 0);
3826 args[i + 4] = dxil_emit_cast(&ctx->mod, DXIL_CAST_FPTOSI,
3827 dxil_module_get_int_type(&ctx->mod, 32), offset_16);
3828 }
3829 break;
3830 case nir_intrinsic_load_barycentric_pixel:
3831 opcode_val = DXIL_INTR_EVAL_SNAPPED;
3832 func_name = "dx.op.evalSnapped";
3833 num_args = 6;
3834 args[4] = args[5] = dxil_module_get_int32_const(&ctx->mod, 0);
3835 break;
3836 case nir_intrinsic_load_barycentric_at_sample:
3837 opcode_val = DXIL_INTR_EVAL_SAMPLE_INDEX;
3838 func_name = "dx.op.evalSampleIndex";
3839 num_args = 5;
3840 args[4] = get_src(ctx, &barycentric->src[0], 0, nir_type_int);
3841 break;
3842 case nir_intrinsic_load_barycentric_centroid:
3843 opcode_val = DXIL_INTR_EVAL_CENTROID;
3844 func_name = "dx.op.evalCentroid";
3845 num_args = 4;
3846 break;
3847 default:
3848 unreachable("Unsupported interpolation barycentric intrinsic");
3849 }
3850 uint8_t io_index = ctx->mod.input_mappings[nir_intrinsic_base(intr)];
3851 args[0] = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3852 args[1] = dxil_module_get_int32_const(&ctx->mod, io_index);
3853 args[2] = get_src(ctx, &intr->src[1], 0, nir_type_int);
3854
3855 const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, DXIL_F32);
3856
3857 if (!func)
3858 return false;
3859
3860 nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), false);
3861 unsigned var_base_component = var ? var->data.location_frac : 0;
3862 unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3863
3864 if (ctx->mod.minor_validator >= 5) {
3865 struct dxil_signature_record *sig_rec = &ctx->mod.inputs[io_index];
3866 unsigned comp_size = intr->def.bit_size == 64 ? 2 : 1;
3867 unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3868 comp_mask <<= (var_base_component * comp_size);
3869 for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3870 sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3871
3872 if (!nir_src_is_const(intr->src[1])) {
3873 struct dxil_psv_signature_element *psv_rec = &ctx->mod.psv_inputs[io_index];
3874 psv_rec->dynamic_mask_and_stream |= comp_mask;
3875 }
3876 }
3877
3878 for (unsigned i = 0; i < intr->num_components; ++i) {
3879 args[3] = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3880
3881 const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3882 if (!retval)
3883 return false;
3884 store_def(ctx, &intr->def, i, retval);
3885 }
3886 return true;
3887 }
3888
3889 static const struct dxil_value *
deref_to_gep(struct ntd_context * ctx,nir_deref_instr * deref)3890 deref_to_gep(struct ntd_context *ctx, nir_deref_instr *deref)
3891 {
3892 nir_deref_path path;
3893 nir_deref_path_init(&path, deref, ctx->ralloc_ctx);
3894 assert(path.path[0]->deref_type == nir_deref_type_var);
3895 uint32_t count = 0;
3896 while (path.path[count])
3897 ++count;
3898
3899 const struct dxil_value **gep_indices = ralloc_array(ctx->ralloc_ctx,
3900 const struct dxil_value *,
3901 count + 1);
3902 nir_variable *var = path.path[0]->var;
3903 const struct dxil_value **var_array;
3904 switch (deref->modes) {
3905 case nir_var_mem_constant: var_array = ctx->consts; break;
3906 case nir_var_mem_shared: var_array = ctx->sharedvars; break;
3907 case nir_var_function_temp: var_array = ctx->scratchvars; break;
3908 default: unreachable("Invalid deref mode");
3909 }
3910 gep_indices[0] = var_array[var->data.driver_location];
3911
3912 for (uint32_t i = 0; i < count; ++i)
3913 gep_indices[i + 1] = get_src_ssa(ctx, &path.path[i]->def, 0);
3914
3915 return dxil_emit_gep_inbounds(&ctx->mod, gep_indices, count + 1);
3916 }
3917
3918 static bool
emit_load_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3919 emit_load_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3920 {
3921 const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3922 if (!ptr)
3923 return false;
3924
3925 const struct dxil_value *retval =
3926 dxil_emit_load(&ctx->mod, ptr, intr->def.bit_size / 8, false);
3927 if (!retval)
3928 return false;
3929
3930 store_def(ctx, &intr->def, 0, retval);
3931 return true;
3932 }
3933
3934 static bool
emit_store_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3935 emit_store_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3936 {
3937 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
3938 const struct dxil_value *ptr = deref_to_gep(ctx, deref);
3939 if (!ptr)
3940 return false;
3941
3942 const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, nir_get_nir_type_for_glsl_type(deref->type));
3943 return dxil_emit_store(&ctx->mod, value, ptr, nir_src_bit_size(intr->src[1]) / 8, false);
3944 }
3945
3946 static bool
emit_atomic_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3947 emit_atomic_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3948 {
3949 const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3950 if (!ptr)
3951 return false;
3952
3953 const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3954 if (!value)
3955 return false;
3956
3957 enum dxil_rmw_op dxil_op = nir_atomic_to_dxil_rmw(nir_intrinsic_atomic_op(intr));
3958 const struct dxil_value *retval = dxil_emit_atomicrmw(&ctx->mod, value, ptr, dxil_op, false,
3959 DXIL_ATOMIC_ORDERING_ACQREL,
3960 DXIL_SYNC_SCOPE_CROSSTHREAD);
3961 if (!retval)
3962 return false;
3963
3964 store_def(ctx, &intr->def, 0, retval);
3965 return true;
3966 }
3967
3968 static bool
emit_atomic_deref_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)3969 emit_atomic_deref_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3970 {
3971 const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3972 if (!ptr)
3973 return false;
3974
3975 const struct dxil_value *cmp = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3976 const struct dxil_value *value = get_src(ctx, &intr->src[2], 0, nir_type_uint);
3977 if (!value)
3978 return false;
3979
3980 const struct dxil_value *retval = dxil_emit_cmpxchg(&ctx->mod, cmp, value, ptr, false,
3981 DXIL_ATOMIC_ORDERING_ACQREL,
3982 DXIL_SYNC_SCOPE_CROSSTHREAD);
3983 if (!retval)
3984 return false;
3985
3986 store_def(ctx, &intr->def, 0, retval);
3987 return true;
3988 }
3989
3990 static bool
emit_discard_if_with_value(struct ntd_context * ctx,const struct dxil_value * value)3991 emit_discard_if_with_value(struct ntd_context *ctx, const struct dxil_value *value)
3992 {
3993 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DISCARD);
3994 if (!opcode)
3995 return false;
3996
3997 const struct dxil_value *args[] = {
3998 opcode,
3999 value
4000 };
4001
4002 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.discard", DXIL_NONE);
4003 if (!func)
4004 return false;
4005
4006 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4007 }
4008
4009 static bool
emit_discard_if(struct ntd_context * ctx,nir_intrinsic_instr * intr)4010 emit_discard_if(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4011 {
4012 const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_bool);
4013 if (!value)
4014 return false;
4015
4016 return emit_discard_if_with_value(ctx, value);
4017 }
4018
4019 static bool
emit_discard(struct ntd_context * ctx)4020 emit_discard(struct ntd_context *ctx)
4021 {
4022 const struct dxil_value *value = dxil_module_get_int1_const(&ctx->mod, true);
4023 return emit_discard_if_with_value(ctx, value);
4024 }
4025
4026 static bool
emit_emit_vertex(struct ntd_context * ctx,nir_intrinsic_instr * intr)4027 emit_emit_vertex(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4028 {
4029 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_EMIT_STREAM);
4030 const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
4031 if (!opcode || !stream_id)
4032 return false;
4033
4034 const struct dxil_value *args[] = {
4035 opcode,
4036 stream_id
4037 };
4038
4039 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.emitStream", DXIL_NONE);
4040 if (!func)
4041 return false;
4042
4043 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4044 }
4045
4046 static bool
emit_end_primitive(struct ntd_context * ctx,nir_intrinsic_instr * intr)4047 emit_end_primitive(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4048 {
4049 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CUT_STREAM);
4050 const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
4051 if (!opcode || !stream_id)
4052 return false;
4053
4054 const struct dxil_value *args[] = {
4055 opcode,
4056 stream_id
4057 };
4058
4059 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cutStream", DXIL_NONE);
4060 if (!func)
4061 return false;
4062
4063 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4064 }
4065
4066 static bool
emit_image_store(struct ntd_context * ctx,nir_intrinsic_instr * intr)4067 emit_image_store(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4068 {
4069 const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_store ?
4070 create_image_handle(ctx, intr) :
4071 get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4072 if (!handle)
4073 return false;
4074
4075 bool is_array = false;
4076 if (intr->intrinsic == nir_intrinsic_image_deref_store)
4077 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4078 else
4079 is_array = nir_intrinsic_image_array(intr);
4080
4081 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4082 if (!int32_undef)
4083 return false;
4084
4085 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4086 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_store ?
4087 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4088 nir_intrinsic_image_dim(intr);
4089 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4090 if (is_array)
4091 ++num_coords;
4092
4093 assert(num_coords <= nir_src_num_components(intr->src[1]));
4094 for (unsigned i = 0; i < num_coords; ++i) {
4095 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4096 if (!coord[i])
4097 return false;
4098 }
4099
4100 nir_alu_type in_type = nir_intrinsic_src_type(intr);
4101 enum overload_type overload = get_overload(in_type, 32);
4102
4103 assert(nir_src_bit_size(intr->src[3]) == 32);
4104 unsigned num_components = nir_src_num_components(intr->src[3]);
4105 assert(num_components <= 4);
4106 const struct dxil_value *value[4];
4107 for (unsigned i = 0; i < num_components; ++i) {
4108 value[i] = get_src(ctx, &intr->src[3], i, in_type);
4109 if (!value[i])
4110 return false;
4111 }
4112
4113 for (int i = num_components; i < 4; ++i)
4114 value[i] = dxil_module_get_undef(&ctx->mod, dxil_value_get_type(value[0]));
4115
4116 const struct dxil_value *write_mask =
4117 dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
4118 if (!write_mask)
4119 return false;
4120
4121 if (image_dim == GLSL_SAMPLER_DIM_BUF) {
4122 coord[1] = int32_undef;
4123 return emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
4124 } else
4125 return emit_texturestore_call(ctx, handle, coord, value, write_mask, overload);
4126 }
4127
4128 static bool
emit_image_load(struct ntd_context * ctx,nir_intrinsic_instr * intr)4129 emit_image_load(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4130 {
4131 const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_load ?
4132 create_image_handle(ctx, intr) :
4133 get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4134 if (!handle)
4135 return false;
4136
4137 bool is_array = false;
4138 if (intr->intrinsic == nir_intrinsic_image_deref_load)
4139 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4140 else
4141 is_array = nir_intrinsic_image_array(intr);
4142
4143 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4144 if (!int32_undef)
4145 return false;
4146
4147 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4148 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_load ?
4149 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4150 nir_intrinsic_image_dim(intr);
4151 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4152 if (is_array)
4153 ++num_coords;
4154
4155 assert(num_coords <= nir_src_num_components(intr->src[1]));
4156 for (unsigned i = 0; i < num_coords; ++i) {
4157 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4158 if (!coord[i])
4159 return false;
4160 }
4161
4162 nir_alu_type out_type = nir_intrinsic_dest_type(intr);
4163 enum overload_type overload = get_overload(out_type, 32);
4164
4165 const struct dxil_value *load_result;
4166 if (image_dim == GLSL_SAMPLER_DIM_BUF) {
4167 coord[1] = int32_undef;
4168 load_result = emit_bufferload_call(ctx, handle, coord, overload);
4169 } else
4170 load_result = emit_textureload_call(ctx, handle, coord, overload);
4171
4172 if (!load_result)
4173 return false;
4174
4175 assert(intr->def.bit_size == 32);
4176 unsigned num_components = intr->def.num_components;
4177 assert(num_components <= 4);
4178 for (unsigned i = 0; i < num_components; ++i) {
4179 const struct dxil_value *component = dxil_emit_extractval(&ctx->mod, load_result, i);
4180 if (!component)
4181 return false;
4182 store_def(ctx, &intr->def, i, component);
4183 }
4184
4185 if (util_format_get_nr_components(nir_intrinsic_format(intr)) > 1)
4186 ctx->mod.feats.typed_uav_load_additional_formats = true;
4187
4188 return true;
4189 }
4190
4191 static bool
emit_image_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4192 emit_image_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4193 {
4194 const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_atomic ?
4195 create_image_handle(ctx, intr) :
4196 get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4197 if (!handle)
4198 return false;
4199
4200 bool is_array = false;
4201 if (intr->intrinsic == nir_intrinsic_image_deref_atomic)
4202 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4203 else
4204 is_array = nir_intrinsic_image_array(intr);
4205
4206 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4207 if (!int32_undef)
4208 return false;
4209
4210 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4211 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_atomic ?
4212 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4213 nir_intrinsic_image_dim(intr);
4214 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4215 if (is_array)
4216 ++num_coords;
4217
4218 assert(num_coords <= nir_src_num_components(intr->src[1]));
4219 for (unsigned i = 0; i < num_coords; ++i) {
4220 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4221 if (!coord[i])
4222 return false;
4223 }
4224
4225 nir_atomic_op nir_op = nir_intrinsic_atomic_op(intr);
4226 enum dxil_atomic_op dxil_op = nir_atomic_to_dxil_atomic(nir_op);
4227 nir_alu_type type = nir_atomic_op_type(nir_op);
4228 const struct dxil_value *value = get_src(ctx, &intr->src[3], 0, type);
4229 if (!value)
4230 return false;
4231
4232 const struct dxil_value *retval =
4233 emit_atomic_binop(ctx, handle, dxil_op, coord, value);
4234
4235 if (!retval)
4236 return false;
4237
4238 store_def(ctx, &intr->def, 0, retval);
4239 return true;
4240 }
4241
4242 static bool
emit_image_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)4243 emit_image_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4244 {
4245 const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_atomic_swap ?
4246 create_image_handle(ctx, intr) :
4247 get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4248 if (!handle)
4249 return false;
4250
4251 bool is_array = false;
4252 if (intr->intrinsic == nir_intrinsic_image_deref_atomic_swap)
4253 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4254 else
4255 is_array = nir_intrinsic_image_array(intr);
4256
4257 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4258 if (!int32_undef)
4259 return false;
4260
4261 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4262 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ?
4263 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4264 nir_intrinsic_image_dim(intr);
4265 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4266 if (is_array)
4267 ++num_coords;
4268
4269 assert(num_coords <= nir_src_num_components(intr->src[1]));
4270 for (unsigned i = 0; i < num_coords; ++i) {
4271 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4272 if (!coord[i])
4273 return false;
4274 }
4275
4276 const struct dxil_value *cmpval = get_src(ctx, &intr->src[3], 0, nir_type_uint);
4277 const struct dxil_value *newval = get_src(ctx, &intr->src[4], 0, nir_type_uint);
4278 if (!cmpval || !newval)
4279 return false;
4280
4281 const struct dxil_value *retval =
4282 emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
4283
4284 if (!retval)
4285 return false;
4286
4287 store_def(ctx, &intr->def, 0, retval);
4288 return true;
4289 }
4290
4291 struct texop_parameters {
4292 const struct dxil_value *tex;
4293 const struct dxil_value *sampler;
4294 const struct dxil_value *bias, *lod_or_sample, *min_lod;
4295 const struct dxil_value *coord[4], *offset[3], *dx[3], *dy[3];
4296 const struct dxil_value *cmp;
4297 enum overload_type overload;
4298 };
4299
4300 static const struct dxil_value *
emit_texture_size(struct ntd_context * ctx,struct texop_parameters * params)4301 emit_texture_size(struct ntd_context *ctx, struct texop_parameters *params)
4302 {
4303 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.getDimensions", DXIL_NONE);
4304 if (!func)
4305 return false;
4306
4307 const struct dxil_value *args[] = {
4308 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_SIZE),
4309 params->tex,
4310 params->lod_or_sample
4311 };
4312
4313 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4314 }
4315
4316 static bool
emit_image_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)4317 emit_image_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4318 {
4319 const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_size ?
4320 create_image_handle(ctx, intr) :
4321 get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4322 if (!handle)
4323 return false;
4324
4325 enum glsl_sampler_dim sampler_dim = intr->intrinsic == nir_intrinsic_image_deref_size ?
4326 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4327 nir_intrinsic_image_dim(intr);
4328 const struct dxil_value *lod = sampler_dim == GLSL_SAMPLER_DIM_BUF ?
4329 dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32)) :
4330 get_src(ctx, &intr->src[1], 0, nir_type_uint);
4331 if (!lod)
4332 return false;
4333
4334 struct texop_parameters params = {
4335 .tex = handle,
4336 .lod_or_sample = lod
4337 };
4338 const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms);
4339 if (!dimensions)
4340 return false;
4341
4342 for (unsigned i = 0; i < intr->def.num_components; ++i) {
4343 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, i);
4344 store_def(ctx, &intr->def, i, retval);
4345 }
4346
4347 return true;
4348 }
4349
4350 static bool
emit_get_ssbo_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)4351 emit_get_ssbo_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4352 {
4353 enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
4354 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
4355 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
4356 if (var && var->data.access & ACCESS_NON_WRITEABLE)
4357 class = DXIL_RESOURCE_CLASS_SRV;
4358 }
4359
4360 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
4361 if (!handle)
4362 return false;
4363
4364 struct texop_parameters params = {
4365 .tex = handle,
4366 .lod_or_sample = dxil_module_get_undef(
4367 &ctx->mod, dxil_module_get_int_type(&ctx->mod, 32))
4368 };
4369
4370 const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms);
4371 if (!dimensions)
4372 return false;
4373
4374 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, 0);
4375 store_def(ctx, &intr->def, 0, retval);
4376
4377 return true;
4378 }
4379
4380 static bool
emit_ssbo_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4381 emit_ssbo_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4382 {
4383 nir_atomic_op nir_op = nir_intrinsic_atomic_op(intr);
4384 enum dxil_atomic_op dxil_op = nir_atomic_to_dxil_atomic(nir_op);
4385 nir_alu_type type = nir_atomic_op_type(nir_op);
4386 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
4387 const struct dxil_value *offset =
4388 get_src(ctx, &intr->src[1], 0, nir_type_uint);
4389 const struct dxil_value *value =
4390 get_src(ctx, &intr->src[2], 0, type);
4391
4392 if (!value || !handle || !offset)
4393 return false;
4394
4395 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4396 if (!int32_undef)
4397 return false;
4398
4399 const struct dxil_value *coord[3] = {
4400 offset, int32_undef, int32_undef
4401 };
4402
4403 const struct dxil_value *retval =
4404 emit_atomic_binop(ctx, handle, dxil_op, coord, value);
4405
4406 if (!retval)
4407 return false;
4408
4409 store_def(ctx, &intr->def, 0, retval);
4410 return true;
4411 }
4412
4413 static bool
emit_ssbo_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)4414 emit_ssbo_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4415 {
4416 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
4417 const struct dxil_value *offset =
4418 get_src(ctx, &intr->src[1], 0, nir_type_uint);
4419 const struct dxil_value *cmpval =
4420 get_src(ctx, &intr->src[2], 0, nir_type_int);
4421 const struct dxil_value *newval =
4422 get_src(ctx, &intr->src[3], 0, nir_type_int);
4423
4424 if (!cmpval || !newval || !handle || !offset)
4425 return false;
4426
4427 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4428 if (!int32_undef)
4429 return false;
4430
4431 const struct dxil_value *coord[3] = {
4432 offset, int32_undef, int32_undef
4433 };
4434
4435 const struct dxil_value *retval =
4436 emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
4437
4438 if (!retval)
4439 return false;
4440
4441 store_def(ctx, &intr->def, 0, retval);
4442 return true;
4443 }
4444
4445 static bool
emit_vulkan_resource_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)4446 emit_vulkan_resource_index(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4447 {
4448 unsigned int binding = nir_intrinsic_binding(intr);
4449
4450 bool const_index = nir_src_is_const(intr->src[0]);
4451 if (const_index) {
4452 binding += nir_src_as_const_value(intr->src[0])->u32;
4453 }
4454
4455 const struct dxil_value *index_value = dxil_module_get_int32_const(&ctx->mod, binding);
4456 if (!index_value)
4457 return false;
4458
4459 if (!const_index) {
4460 const struct dxil_value *offset = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4461 if (!offset)
4462 return false;
4463
4464 index_value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, index_value, offset, 0);
4465 if (!index_value)
4466 return false;
4467 }
4468
4469 store_def(ctx, &intr->def, 0, index_value);
4470 store_def(ctx, &intr->def, 1, dxil_module_get_int32_const(&ctx->mod, 0));
4471 return true;
4472 }
4473
4474 static bool
emit_load_vulkan_descriptor(struct ntd_context * ctx,nir_intrinsic_instr * intr)4475 emit_load_vulkan_descriptor(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4476 {
4477 nir_intrinsic_instr* index = nir_src_as_intrinsic(intr->src[0]);
4478 const struct dxil_value *handle = NULL;
4479
4480 enum dxil_resource_class resource_class;
4481 enum dxil_resource_kind resource_kind;
4482 switch (nir_intrinsic_desc_type(intr)) {
4483 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4484 resource_class = DXIL_RESOURCE_CLASS_CBV;
4485 resource_kind = DXIL_RESOURCE_KIND_CBUFFER;
4486 break;
4487 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4488 resource_class = DXIL_RESOURCE_CLASS_UAV;
4489 resource_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
4490 break;
4491 default:
4492 unreachable("unknown descriptor type");
4493 return false;
4494 }
4495
4496 if (index && index->intrinsic == nir_intrinsic_vulkan_resource_index) {
4497 unsigned binding = nir_intrinsic_binding(index);
4498 unsigned space = nir_intrinsic_desc_set(index);
4499
4500 /* The descriptor_set field for variables is only 5 bits. We shouldn't have intrinsics trying to go beyond that. */
4501 assert(space < 32);
4502
4503 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
4504 if (resource_class == DXIL_RESOURCE_CLASS_UAV &&
4505 (var->data.access & ACCESS_NON_WRITEABLE))
4506 resource_class = DXIL_RESOURCE_CLASS_SRV;
4507
4508 const struct dxil_value *index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4509 if (!index_value)
4510 return false;
4511
4512 handle = emit_createhandle_call_dynamic(ctx, resource_class, space, binding, index_value, false);
4513 } else {
4514 const struct dxil_value *heap_index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4515 if (!heap_index_value)
4516 return false;
4517 const struct dxil_value *unannotated_handle = emit_createhandle_heap(ctx, heap_index_value, false, true);
4518 const struct dxil_value *res_props = dxil_module_get_buffer_res_props_const(&ctx->mod, resource_class, resource_kind);
4519 if (!unannotated_handle || !res_props)
4520 return false;
4521 handle = emit_annotate_handle(ctx, unannotated_handle, res_props);
4522 }
4523
4524 store_ssa_def(ctx, &intr->def, 0, handle);
4525 store_def(ctx, &intr->def, 1, get_src(ctx, &intr->src[0], 1, nir_type_uint32));
4526
4527 return true;
4528 }
4529
4530 static bool
emit_load_sample_pos_from_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4531 emit_load_sample_pos_from_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4532 {
4533 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.renderTargetGetSamplePosition", DXIL_NONE);
4534 if (!func)
4535 return false;
4536
4537 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION);
4538 if (!opcode)
4539 return false;
4540
4541 const struct dxil_value *args[] = {
4542 opcode,
4543 get_src(ctx, &intr->src[0], 0, nir_type_uint32),
4544 };
4545 if (!args[1])
4546 return false;
4547
4548 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4549 if (!v)
4550 return false;
4551
4552 for (unsigned i = 0; i < 2; ++i) {
4553 /* GL coords go from 0 -> 1, D3D from -0.5 -> 0.5 */
4554 const struct dxil_value *coord = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
4555 dxil_emit_extractval(&ctx->mod, v, i),
4556 dxil_module_get_float_const(&ctx->mod, 0.5f), 0);
4557 store_def(ctx, &intr->def, i, coord);
4558 }
4559 return true;
4560 }
4561
4562 static bool
emit_load_sample_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4563 emit_load_sample_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4564 {
4565 assert(ctx->mod.info.has_per_sample_input ||
4566 intr->intrinsic == nir_intrinsic_load_sample_id_no_per_sample);
4567
4568 if (ctx->mod.info.has_per_sample_input)
4569 return emit_load_unary_external_function(ctx, intr, "dx.op.sampleIndex",
4570 DXIL_INTR_SAMPLE_INDEX, nir_type_int);
4571
4572 store_def(ctx, &intr->def, 0, dxil_module_get_int32_const(&ctx->mod, 0));
4573 return true;
4574 }
4575
4576 static bool
emit_read_first_invocation(struct ntd_context * ctx,nir_intrinsic_instr * intr)4577 emit_read_first_invocation(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4578 {
4579 ctx->mod.feats.wave_ops = 1;
4580 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveReadLaneFirst",
4581 get_overload(nir_type_uint, intr->def.bit_size));
4582 const struct dxil_value *args[] = {
4583 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_READ_LANE_FIRST),
4584 get_src(ctx, intr->src, 0, nir_type_uint),
4585 };
4586 if (!func || !args[0] || !args[1])
4587 return false;
4588
4589 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4590 if (!ret)
4591 return false;
4592 store_def(ctx, &intr->def, 0, ret);
4593 return true;
4594 }
4595
4596 static bool
emit_read_invocation(struct ntd_context * ctx,nir_intrinsic_instr * intr)4597 emit_read_invocation(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4598 {
4599 ctx->mod.feats.wave_ops = 1;
4600 bool quad = intr->intrinsic == nir_intrinsic_quad_broadcast;
4601 const struct dxil_func *func = dxil_get_function(&ctx->mod, quad ? "dx.op.quadReadLaneAt" : "dx.op.waveReadLaneAt",
4602 get_overload(nir_type_uint, intr->def.bit_size));
4603 const struct dxil_value *args[] = {
4604 dxil_module_get_int32_const(&ctx->mod, quad ? DXIL_INTR_QUAD_READ_LANE_AT : DXIL_INTR_WAVE_READ_LANE_AT),
4605 get_src(ctx, &intr->src[0], 0, nir_type_uint),
4606 get_src(ctx, &intr->src[1], 0, nir_type_uint),
4607 };
4608 if (!func || !args[0] || !args[1] || !args[2])
4609 return false;
4610
4611 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4612 if (!ret)
4613 return false;
4614 store_def(ctx, &intr->def, 0, ret);
4615 return true;
4616 }
4617
4618 static bool
emit_vote_eq(struct ntd_context * ctx,nir_intrinsic_instr * intr)4619 emit_vote_eq(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4620 {
4621 ctx->mod.feats.wave_ops = 1;
4622 nir_alu_type alu_type = intr->intrinsic == nir_intrinsic_vote_ieq ? nir_type_int : nir_type_float;
4623 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveAllEqual",
4624 get_overload(alu_type, intr->src[0].ssa->bit_size));
4625 const struct dxil_value *args[] = {
4626 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_ALL_EQUAL),
4627 get_src(ctx, intr->src, 0, alu_type),
4628 };
4629 if (!func || !args[0] || !args[1])
4630 return false;
4631
4632 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4633 if (!ret)
4634 return false;
4635 store_def(ctx, &intr->def, 0, ret);
4636 return true;
4637 }
4638
4639 static bool
emit_vote(struct ntd_context * ctx,nir_intrinsic_instr * intr)4640 emit_vote(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4641 {
4642 ctx->mod.feats.wave_ops = 1;
4643 bool any = intr->intrinsic == nir_intrinsic_vote_any;
4644 const struct dxil_func *func = dxil_get_function(&ctx->mod,
4645 any ? "dx.op.waveAnyTrue" : "dx.op.waveAllTrue",
4646 DXIL_NONE);
4647 const struct dxil_value *args[] = {
4648 dxil_module_get_int32_const(&ctx->mod, any ? DXIL_INTR_WAVE_ANY_TRUE : DXIL_INTR_WAVE_ALL_TRUE),
4649 get_src(ctx, intr->src, 0, nir_type_bool),
4650 };
4651 if (!func || !args[0] || !args[1])
4652 return false;
4653
4654 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4655 if (!ret)
4656 return false;
4657 store_def(ctx, &intr->def, 0, ret);
4658 return true;
4659 }
4660
4661 static bool
emit_ballot(struct ntd_context * ctx,nir_intrinsic_instr * intr)4662 emit_ballot(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4663 {
4664 ctx->mod.feats.wave_ops = 1;
4665 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveBallot", DXIL_NONE);
4666 const struct dxil_value *args[] = {
4667 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_BALLOT),
4668 get_src(ctx, intr->src, 0, nir_type_bool),
4669 };
4670 if (!func || !args[0] || !args[1])
4671 return false;
4672
4673 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4674 if (!ret)
4675 return false;
4676 for (uint32_t i = 0; i < 4; ++i)
4677 store_def(ctx, &intr->def, i, dxil_emit_extractval(&ctx->mod, ret, i));
4678 return true;
4679 }
4680
4681 static bool
emit_quad_op(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_quad_op_kind op)4682 emit_quad_op(struct ntd_context *ctx, nir_intrinsic_instr *intr, enum dxil_quad_op_kind op)
4683 {
4684 ctx->mod.feats.wave_ops = 1;
4685 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quadOp",
4686 get_overload(nir_type_uint, intr->def.bit_size));
4687 const struct dxil_value *args[] = {
4688 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_QUAD_OP),
4689 get_src(ctx, intr->src, 0, nir_type_uint),
4690 dxil_module_get_int8_const(&ctx->mod, op),
4691 };
4692 if (!func || !args[0] || !args[1] || !args[2])
4693 return false;
4694
4695 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4696 if (!ret)
4697 return false;
4698 store_def(ctx, &intr->def, 0, ret);
4699 return true;
4700 }
4701
4702 static enum dxil_wave_bit_op_kind
get_reduce_bit_op(nir_op op)4703 get_reduce_bit_op(nir_op op)
4704 {
4705 switch (op) {
4706 case nir_op_ior: return DXIL_WAVE_BIT_OP_OR;
4707 case nir_op_ixor: return DXIL_WAVE_BIT_OP_XOR;
4708 case nir_op_iand: return DXIL_WAVE_BIT_OP_AND;
4709 default:
4710 unreachable("Invalid bit op");
4711 }
4712 }
4713
4714 static bool
emit_reduce_bitwise(struct ntd_context * ctx,nir_intrinsic_instr * intr)4715 emit_reduce_bitwise(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4716 {
4717 enum dxil_wave_bit_op_kind wave_bit_op = get_reduce_bit_op(nir_intrinsic_reduction_op(intr));
4718 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveBit",
4719 get_overload(nir_type_uint, intr->def.bit_size));
4720 const struct dxil_value *args[] = {
4721 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_BIT),
4722 get_src(ctx, intr->src, 0, nir_type_uint),
4723 dxil_module_get_int8_const(&ctx->mod, wave_bit_op),
4724 };
4725 if (!func || !args[0] || !args[1] || !args[2])
4726 return false;
4727
4728 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4729 if (!ret)
4730 return false;
4731 store_def(ctx, &intr->def, 0, ret);
4732 return true;
4733 }
4734
4735 static enum dxil_wave_op_kind
get_reduce_op(nir_op op)4736 get_reduce_op(nir_op op)
4737 {
4738 switch (op) {
4739 case nir_op_iadd:
4740 case nir_op_fadd:
4741 return DXIL_WAVE_OP_SUM;
4742 case nir_op_imul:
4743 case nir_op_fmul:
4744 return DXIL_WAVE_OP_PRODUCT;
4745 case nir_op_imax:
4746 case nir_op_umax:
4747 case nir_op_fmax:
4748 return DXIL_WAVE_OP_MAX;
4749 case nir_op_imin:
4750 case nir_op_umin:
4751 case nir_op_fmin:
4752 return DXIL_WAVE_OP_MIN;
4753 default:
4754 unreachable("Unexpected reduction op");
4755 }
4756 }
4757
4758 static bool
emit_reduce(struct ntd_context * ctx,nir_intrinsic_instr * intr)4759 emit_reduce(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4760 {
4761 ctx->mod.feats.wave_ops = 1;
4762 bool is_prefix = intr->intrinsic == nir_intrinsic_exclusive_scan;
4763 nir_op reduction_op = (nir_op)nir_intrinsic_reduction_op(intr);
4764 switch (reduction_op) {
4765 case nir_op_ior:
4766 case nir_op_ixor:
4767 case nir_op_iand:
4768 assert(!is_prefix);
4769 return emit_reduce_bitwise(ctx, intr);
4770 default:
4771 break;
4772 }
4773 nir_alu_type alu_type = nir_op_infos[reduction_op].input_types[0];
4774 enum dxil_wave_op_kind wave_op = get_reduce_op(reduction_op);
4775 const struct dxil_func *func = dxil_get_function(&ctx->mod, is_prefix ? "dx.op.wavePrefixOp" : "dx.op.waveActiveOp",
4776 get_overload(alu_type, intr->def.bit_size));
4777 bool is_unsigned = alu_type == nir_type_uint;
4778 const struct dxil_value *args[] = {
4779 dxil_module_get_int32_const(&ctx->mod, is_prefix ? DXIL_INTR_WAVE_PREFIX_OP : DXIL_INTR_WAVE_ACTIVE_OP),
4780 get_src(ctx, intr->src, 0, alu_type),
4781 dxil_module_get_int8_const(&ctx->mod, wave_op),
4782 dxil_module_get_int8_const(&ctx->mod, is_unsigned),
4783 };
4784 if (!func || !args[0] || !args[1] || !args[2] || !args[3])
4785 return false;
4786
4787 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4788 if (!ret)
4789 return false;
4790 store_def(ctx, &intr->def, 0, ret);
4791 return true;
4792 }
4793
4794 static bool
emit_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4795 emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4796 {
4797 switch (intr->intrinsic) {
4798 case nir_intrinsic_load_global_invocation_id:
4799 return emit_load_global_invocation_id(ctx, intr);
4800 case nir_intrinsic_load_local_invocation_id:
4801 return emit_load_local_invocation_id(ctx, intr);
4802 case nir_intrinsic_load_local_invocation_index:
4803 return emit_load_local_invocation_index(ctx, intr);
4804 case nir_intrinsic_load_workgroup_id:
4805 return emit_load_local_workgroup_id(ctx, intr);
4806 case nir_intrinsic_load_ssbo:
4807 return emit_load_ssbo(ctx, intr);
4808 case nir_intrinsic_store_ssbo:
4809 return emit_store_ssbo(ctx, intr);
4810 case nir_intrinsic_load_deref:
4811 return emit_load_deref(ctx, intr);
4812 case nir_intrinsic_store_deref:
4813 return emit_store_deref(ctx, intr);
4814 case nir_intrinsic_deref_atomic:
4815 return emit_atomic_deref(ctx, intr);
4816 case nir_intrinsic_deref_atomic_swap:
4817 return emit_atomic_deref_swap(ctx, intr);
4818 case nir_intrinsic_load_ubo_vec4:
4819 return emit_load_ubo_vec4(ctx, intr);
4820 case nir_intrinsic_load_primitive_id:
4821 return emit_load_unary_external_function(ctx, intr, "dx.op.primitiveID",
4822 DXIL_INTR_PRIMITIVE_ID, nir_type_int);
4823 case nir_intrinsic_load_sample_id:
4824 case nir_intrinsic_load_sample_id_no_per_sample:
4825 return emit_load_sample_id(ctx, intr);
4826 case nir_intrinsic_load_invocation_id:
4827 switch (ctx->mod.shader_kind) {
4828 case DXIL_HULL_SHADER:
4829 return emit_load_unary_external_function(ctx, intr, "dx.op.outputControlPointID",
4830 DXIL_INTR_OUTPUT_CONTROL_POINT_ID, nir_type_int);
4831 case DXIL_GEOMETRY_SHADER:
4832 return emit_load_unary_external_function(ctx, intr, "dx.op.gsInstanceID",
4833 DXIL_INTR_GS_INSTANCE_ID, nir_type_int);
4834 default:
4835 unreachable("Unexpected shader kind for invocation ID");
4836 }
4837 case nir_intrinsic_load_view_index:
4838 ctx->mod.feats.view_id = true;
4839 return emit_load_unary_external_function(ctx, intr, "dx.op.viewID",
4840 DXIL_INTR_VIEW_ID, nir_type_int);
4841 case nir_intrinsic_load_sample_mask_in:
4842 return emit_load_sample_mask_in(ctx, intr);
4843 case nir_intrinsic_load_tess_coord:
4844 return emit_load_tess_coord(ctx, intr);
4845 case nir_intrinsic_terminate_if:
4846 case nir_intrinsic_demote_if:
4847 return emit_discard_if(ctx, intr);
4848 case nir_intrinsic_terminate:
4849 case nir_intrinsic_demote:
4850 return emit_discard(ctx);
4851 case nir_intrinsic_emit_vertex:
4852 return emit_emit_vertex(ctx, intr);
4853 case nir_intrinsic_end_primitive:
4854 return emit_end_primitive(ctx, intr);
4855 case nir_intrinsic_barrier:
4856 return emit_barrier(ctx, intr);
4857 case nir_intrinsic_ssbo_atomic:
4858 return emit_ssbo_atomic(ctx, intr);
4859 case nir_intrinsic_ssbo_atomic_swap:
4860 return emit_ssbo_atomic_comp_swap(ctx, intr);
4861 case nir_intrinsic_image_deref_atomic:
4862 case nir_intrinsic_image_atomic:
4863 case nir_intrinsic_bindless_image_atomic:
4864 return emit_image_atomic(ctx, intr);
4865 case nir_intrinsic_image_deref_atomic_swap:
4866 case nir_intrinsic_image_atomic_swap:
4867 case nir_intrinsic_bindless_image_atomic_swap:
4868 return emit_image_atomic_comp_swap(ctx, intr);
4869 case nir_intrinsic_image_store:
4870 case nir_intrinsic_image_deref_store:
4871 case nir_intrinsic_bindless_image_store:
4872 return emit_image_store(ctx, intr);
4873 case nir_intrinsic_image_load:
4874 case nir_intrinsic_image_deref_load:
4875 case nir_intrinsic_bindless_image_load:
4876 return emit_image_load(ctx, intr);
4877 case nir_intrinsic_image_size:
4878 case nir_intrinsic_image_deref_size:
4879 case nir_intrinsic_bindless_image_size:
4880 return emit_image_size(ctx, intr);
4881 case nir_intrinsic_get_ssbo_size:
4882 return emit_get_ssbo_size(ctx, intr);
4883 case nir_intrinsic_load_input:
4884 case nir_intrinsic_load_per_vertex_input:
4885 case nir_intrinsic_load_output:
4886 case nir_intrinsic_load_per_vertex_output:
4887 return emit_load_input_via_intrinsic(ctx, intr);
4888 case nir_intrinsic_store_output:
4889 case nir_intrinsic_store_per_vertex_output:
4890 return emit_store_output_via_intrinsic(ctx, intr);
4891
4892 case nir_intrinsic_load_barycentric_at_offset:
4893 case nir_intrinsic_load_barycentric_at_sample:
4894 case nir_intrinsic_load_barycentric_centroid:
4895 case nir_intrinsic_load_barycentric_pixel:
4896 /* Emit nothing, we only support these as inputs to load_interpolated_input */
4897 return true;
4898 case nir_intrinsic_load_interpolated_input:
4899 return emit_load_interpolated_input(ctx, intr);
4900 break;
4901
4902 case nir_intrinsic_vulkan_resource_index:
4903 return emit_vulkan_resource_index(ctx, intr);
4904 case nir_intrinsic_load_vulkan_descriptor:
4905 return emit_load_vulkan_descriptor(ctx, intr);
4906
4907 case nir_intrinsic_load_sample_pos_from_id:
4908 return emit_load_sample_pos_from_id(ctx, intr);
4909
4910 case nir_intrinsic_is_helper_invocation:
4911 return emit_load_unary_external_function(
4912 ctx, intr, "dx.op.isHelperLane", DXIL_INTR_IS_HELPER_LANE, nir_type_int);
4913 case nir_intrinsic_elect:
4914 ctx->mod.feats.wave_ops = 1;
4915 return emit_load_unary_external_function(
4916 ctx, intr, "dx.op.waveIsFirstLane", DXIL_INTR_WAVE_IS_FIRST_LANE, nir_type_invalid);
4917 case nir_intrinsic_load_subgroup_size:
4918 ctx->mod.feats.wave_ops = 1;
4919 return emit_load_unary_external_function(
4920 ctx, intr, "dx.op.waveGetLaneCount", DXIL_INTR_WAVE_GET_LANE_COUNT, nir_type_invalid);
4921 case nir_intrinsic_load_subgroup_invocation:
4922 ctx->mod.feats.wave_ops = 1;
4923 return emit_load_unary_external_function(
4924 ctx, intr, "dx.op.waveGetLaneIndex", DXIL_INTR_WAVE_GET_LANE_INDEX, nir_type_invalid);
4925
4926 case nir_intrinsic_vote_feq:
4927 case nir_intrinsic_vote_ieq:
4928 return emit_vote_eq(ctx, intr);
4929 case nir_intrinsic_vote_any:
4930 case nir_intrinsic_vote_all:
4931 return emit_vote(ctx, intr);
4932
4933 case nir_intrinsic_ballot:
4934 return emit_ballot(ctx, intr);
4935
4936 case nir_intrinsic_read_first_invocation:
4937 return emit_read_first_invocation(ctx, intr);
4938 case nir_intrinsic_read_invocation:
4939 case nir_intrinsic_shuffle:
4940 case nir_intrinsic_quad_broadcast:
4941 return emit_read_invocation(ctx, intr);
4942
4943 case nir_intrinsic_quad_swap_horizontal:
4944 return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_X);
4945 case nir_intrinsic_quad_swap_vertical:
4946 return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_Y);
4947 case nir_intrinsic_quad_swap_diagonal:
4948 return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_DIAGONAL);
4949
4950 case nir_intrinsic_reduce:
4951 case nir_intrinsic_exclusive_scan:
4952 return emit_reduce(ctx, intr);
4953
4954 case nir_intrinsic_ddx:
4955 case nir_intrinsic_ddx_coarse: return emit_derivative(ctx, intr, DXIL_INTR_DDX_COARSE);
4956 case nir_intrinsic_ddx_fine: return emit_derivative(ctx, intr, DXIL_INTR_DDX_FINE);
4957 case nir_intrinsic_ddy:
4958 case nir_intrinsic_ddy_coarse: return emit_derivative(ctx, intr, DXIL_INTR_DDY_COARSE);
4959 case nir_intrinsic_ddy_fine: return emit_derivative(ctx, intr, DXIL_INTR_DDY_FINE);
4960
4961 case nir_intrinsic_load_first_vertex:
4962 ctx->mod.feats.extended_command_info = true;
4963 return emit_load_unary_external_function(ctx, intr, "dx.op.startVertexLocation",
4964 DXIL_INTR_START_VERTEX_LOCATION, nir_type_int);
4965 case nir_intrinsic_load_base_instance:
4966 ctx->mod.feats.extended_command_info = true;
4967 return emit_load_unary_external_function(ctx, intr, "dx.op.startInstanceLocation",
4968 DXIL_INTR_START_INSTANCE_LOCATION, nir_type_int);
4969
4970 case nir_intrinsic_load_num_workgroups:
4971 case nir_intrinsic_load_workgroup_size:
4972 default:
4973 log_nir_instr_unsupported(
4974 ctx->logger, "Unimplemented intrinsic instruction", &intr->instr);
4975 return false;
4976 }
4977 }
4978
4979 static const struct dxil_type *
dxil_type_for_const(struct ntd_context * ctx,nir_def * def)4980 dxil_type_for_const(struct ntd_context *ctx, nir_def *def)
4981 {
4982 if (BITSET_TEST(ctx->int_types, def->index) ||
4983 !BITSET_TEST(ctx->float_types, def->index))
4984 return dxil_module_get_int_type(&ctx->mod, def->bit_size);
4985 return dxil_module_get_float_type(&ctx->mod, def->bit_size);
4986 }
4987
4988 static bool
emit_load_const(struct ntd_context * ctx,nir_load_const_instr * load_const)4989 emit_load_const(struct ntd_context *ctx, nir_load_const_instr *load_const)
4990 {
4991 for (uint32_t i = 0; i < load_const->def.num_components; ++i) {
4992 const struct dxil_type *type = dxil_type_for_const(ctx, &load_const->def);
4993 store_ssa_def(ctx, &load_const->def, i, get_value_for_const(&ctx->mod, &load_const->value[i], type));
4994 }
4995 return true;
4996 }
4997
4998 static bool
emit_deref(struct ntd_context * ctx,nir_deref_instr * instr)4999 emit_deref(struct ntd_context* ctx, nir_deref_instr* instr)
5000 {
5001 /* There's two possible reasons we might be walking through derefs:
5002 * 1. Computing an index to be used for a texture/sampler/image binding, which
5003 * can only do array indexing and should compute the indices along the way with
5004 * array-of-array sizes.
5005 * 2. Storing an index to be used in a GEP for access to a variable.
5006 */
5007 nir_variable *var = nir_deref_instr_get_variable(instr);
5008 assert(var);
5009
5010 bool is_aoa_size =
5011 glsl_type_is_sampler(glsl_without_array(var->type)) ||
5012 glsl_type_is_image(glsl_without_array(var->type)) ||
5013 glsl_type_is_texture(glsl_without_array(var->type));
5014
5015 if (!is_aoa_size) {
5016 /* Just store the values, we'll use these to build a GEP in the load or store */
5017 switch (instr->deref_type) {
5018 case nir_deref_type_var:
5019 store_def(ctx, &instr->def, 0, dxil_module_get_int_const(&ctx->mod, 0, instr->def.bit_size));
5020 return true;
5021 case nir_deref_type_array:
5022 store_def(ctx, &instr->def, 0, get_src(ctx, &instr->arr.index, 0, nir_type_int));
5023 return true;
5024 case nir_deref_type_struct:
5025 store_def(ctx, &instr->def, 0, dxil_module_get_int_const(&ctx->mod, instr->strct.index, 32));
5026 return true;
5027 default:
5028 unreachable("Other deref types not supported");
5029 }
5030 }
5031
5032 /* In the CL environment, there's nothing to emit. Any references to
5033 * derefs will emit the necessary logic to handle scratch/shared GEP addressing
5034 */
5035 if (ctx->opts->environment == DXIL_ENVIRONMENT_CL)
5036 return true;
5037
5038 const struct glsl_type *type = instr->type;
5039 const struct dxil_value *binding;
5040 unsigned binding_val = ctx->opts->environment == DXIL_ENVIRONMENT_GL ?
5041 var->data.driver_location : var->data.binding;
5042
5043 if (instr->deref_type == nir_deref_type_var) {
5044 binding = dxil_module_get_int32_const(&ctx->mod, binding_val);
5045 } else {
5046 const struct dxil_value *base = get_src(ctx, &instr->parent, 0, nir_type_uint32);
5047 const struct dxil_value *offset = get_src(ctx, &instr->arr.index, 0, nir_type_uint32);
5048 if (!base || !offset)
5049 return false;
5050
5051 if (glsl_type_is_array(instr->type)) {
5052 offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_MUL, offset,
5053 dxil_module_get_int32_const(&ctx->mod, glsl_get_aoa_size(instr->type)), 0);
5054 if (!offset)
5055 return false;
5056 }
5057 binding = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, base, offset, 0);
5058 }
5059
5060 if (!binding)
5061 return false;
5062
5063 /* Haven't finished chasing the deref chain yet, just store the value */
5064 if (glsl_type_is_array(type)) {
5065 store_def(ctx, &instr->def, 0, binding);
5066 return true;
5067 }
5068
5069 assert(glsl_type_is_sampler(type) || glsl_type_is_image(type) || glsl_type_is_texture(type));
5070 enum dxil_resource_class res_class;
5071 if (glsl_type_is_image(type))
5072 res_class = DXIL_RESOURCE_CLASS_UAV;
5073 else if (glsl_type_is_sampler(type))
5074 res_class = DXIL_RESOURCE_CLASS_SAMPLER;
5075 else
5076 res_class = DXIL_RESOURCE_CLASS_SRV;
5077
5078 unsigned descriptor_set = ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN ?
5079 var->data.descriptor_set : (glsl_type_is_image(type) ? 1 : 0);
5080 const struct dxil_value *handle = emit_createhandle_call_dynamic(ctx, res_class,
5081 descriptor_set, binding_val, binding, false);
5082 if (!handle)
5083 return false;
5084
5085 store_ssa_def(ctx, &instr->def, 0, handle);
5086 return true;
5087 }
5088
5089 static bool
emit_cond_branch(struct ntd_context * ctx,const struct dxil_value * cond,int true_block,int false_block)5090 emit_cond_branch(struct ntd_context *ctx, const struct dxil_value *cond,
5091 int true_block, int false_block)
5092 {
5093 assert(cond);
5094 assert(true_block >= 0);
5095 assert(false_block >= 0);
5096 return dxil_emit_branch(&ctx->mod, cond, true_block, false_block);
5097 }
5098
5099 static bool
emit_branch(struct ntd_context * ctx,int block)5100 emit_branch(struct ntd_context *ctx, int block)
5101 {
5102 assert(block >= 0);
5103 return dxil_emit_branch(&ctx->mod, NULL, block, -1);
5104 }
5105
5106 static bool
emit_jump(struct ntd_context * ctx,nir_jump_instr * instr)5107 emit_jump(struct ntd_context *ctx, nir_jump_instr *instr)
5108 {
5109 switch (instr->type) {
5110 case nir_jump_break:
5111 case nir_jump_continue:
5112 assert(instr->instr.block->successors[0]);
5113 assert(!instr->instr.block->successors[1]);
5114 return emit_branch(ctx, instr->instr.block->successors[0]->index);
5115
5116 default:
5117 unreachable("Unsupported jump type\n");
5118 }
5119 }
5120
5121 struct phi_block {
5122 unsigned num_components;
5123 struct dxil_instr *comp[NIR_MAX_VEC_COMPONENTS];
5124 };
5125
5126 static bool
emit_phi(struct ntd_context * ctx,nir_phi_instr * instr)5127 emit_phi(struct ntd_context *ctx, nir_phi_instr *instr)
5128 {
5129 const struct dxil_type *type = NULL;
5130 nir_foreach_phi_src(src, instr) {
5131 /* All sources have the same type, just use the first one */
5132 type = dxil_value_get_type(ctx->defs[src->src.ssa->index].chans[0]);
5133 break;
5134 }
5135
5136 struct phi_block *vphi = ralloc(ctx->phis, struct phi_block);
5137 vphi->num_components = instr->def.num_components;
5138
5139 for (unsigned i = 0; i < vphi->num_components; ++i) {
5140 struct dxil_instr *phi = vphi->comp[i] = dxil_emit_phi(&ctx->mod, type);
5141 if (!phi)
5142 return false;
5143 store_ssa_def(ctx, &instr->def, i, dxil_instr_get_return_value(phi));
5144 }
5145 _mesa_hash_table_insert(ctx->phis, instr, vphi);
5146 return true;
5147 }
5148
5149 static bool
fixup_phi(struct ntd_context * ctx,nir_phi_instr * instr,struct phi_block * vphi)5150 fixup_phi(struct ntd_context *ctx, nir_phi_instr *instr,
5151 struct phi_block *vphi)
5152 {
5153 const struct dxil_value *values[16];
5154 unsigned blocks[16];
5155 for (unsigned i = 0; i < vphi->num_components; ++i) {
5156 size_t num_incoming = 0;
5157 nir_foreach_phi_src(src, instr) {
5158 const struct dxil_value *val = get_src_ssa(ctx, src->src.ssa, i);
5159 values[num_incoming] = val;
5160 blocks[num_incoming] = src->pred->index;
5161 ++num_incoming;
5162 if (num_incoming == ARRAY_SIZE(values)) {
5163 if (!dxil_phi_add_incoming(vphi->comp[i], values, blocks,
5164 num_incoming))
5165 return false;
5166 num_incoming = 0;
5167 }
5168 }
5169 if (num_incoming > 0 && !dxil_phi_add_incoming(vphi->comp[i], values,
5170 blocks, num_incoming))
5171 return false;
5172 }
5173 return true;
5174 }
5175
5176 static unsigned
get_n_src(struct ntd_context * ctx,const struct dxil_value ** values,unsigned max_components,nir_tex_src * src,nir_alu_type type)5177 get_n_src(struct ntd_context *ctx, const struct dxil_value **values,
5178 unsigned max_components, nir_tex_src *src, nir_alu_type type)
5179 {
5180 unsigned num_components = nir_src_num_components(src->src);
5181 unsigned i = 0;
5182
5183 assert(num_components <= max_components);
5184
5185 for (i = 0; i < num_components; ++i) {
5186 values[i] = get_src(ctx, &src->src, i, type);
5187 if (!values[i])
5188 return 0;
5189 }
5190
5191 return num_components;
5192 }
5193
5194 #define PAD_SRC(ctx, array, components, undef) \
5195 for (unsigned i = components; i < ARRAY_SIZE(array); ++i) { \
5196 array[i] = undef; \
5197 }
5198
5199 static const struct dxil_value *
emit_sample(struct ntd_context * ctx,struct texop_parameters * params)5200 emit_sample(struct ntd_context *ctx, struct texop_parameters *params)
5201 {
5202 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sample", params->overload);
5203 if (!func)
5204 return NULL;
5205
5206 const struct dxil_value *args[11] = {
5207 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE),
5208 params->tex, params->sampler,
5209 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5210 params->offset[0], params->offset[1], params->offset[2],
5211 params->min_lod
5212 };
5213
5214 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5215 }
5216
5217 static const struct dxil_value *
emit_sample_bias(struct ntd_context * ctx,struct texop_parameters * params)5218 emit_sample_bias(struct ntd_context *ctx, struct texop_parameters *params)
5219 {
5220 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleBias", params->overload);
5221 if (!func)
5222 return NULL;
5223
5224 assert(params->bias != NULL);
5225
5226 const struct dxil_value *args[12] = {
5227 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_BIAS),
5228 params->tex, params->sampler,
5229 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5230 params->offset[0], params->offset[1], params->offset[2],
5231 params->bias, params->min_lod
5232 };
5233
5234 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5235 }
5236
5237 static const struct dxil_value *
emit_sample_level(struct ntd_context * ctx,struct texop_parameters * params)5238 emit_sample_level(struct ntd_context *ctx, struct texop_parameters *params)
5239 {
5240 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleLevel", params->overload);
5241 if (!func)
5242 return NULL;
5243
5244 assert(params->lod_or_sample != NULL);
5245
5246 const struct dxil_value *args[11] = {
5247 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_LEVEL),
5248 params->tex, params->sampler,
5249 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5250 params->offset[0], params->offset[1], params->offset[2],
5251 params->lod_or_sample
5252 };
5253
5254 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5255 }
5256
5257 static const struct dxil_value *
emit_sample_cmp(struct ntd_context * ctx,struct texop_parameters * params)5258 emit_sample_cmp(struct ntd_context *ctx, struct texop_parameters *params)
5259 {
5260 const struct dxil_func *func;
5261 enum dxil_intr opcode;
5262
5263 func = dxil_get_function(&ctx->mod, "dx.op.sampleCmp", DXIL_F32);
5264 opcode = DXIL_INTR_SAMPLE_CMP;
5265
5266 if (!func)
5267 return NULL;
5268
5269 const struct dxil_value *args[12] = {
5270 dxil_module_get_int32_const(&ctx->mod, opcode),
5271 params->tex, params->sampler,
5272 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5273 params->offset[0], params->offset[1], params->offset[2],
5274 params->cmp, params->min_lod
5275 };
5276
5277 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5278 }
5279
5280 static const struct dxil_value *
emit_sample_cmp_level_zero(struct ntd_context * ctx,struct texop_parameters * params)5281 emit_sample_cmp_level_zero(struct ntd_context *ctx, struct texop_parameters *params)
5282 {
5283 const struct dxil_func *func;
5284 enum dxil_intr opcode;
5285
5286 func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevelZero", DXIL_F32);
5287 opcode = DXIL_INTR_SAMPLE_CMP_LVL_ZERO;
5288
5289 if (!func)
5290 return NULL;
5291
5292 const struct dxil_value *args[11] = {
5293 dxil_module_get_int32_const(&ctx->mod, opcode),
5294 params->tex, params->sampler,
5295 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5296 params->offset[0], params->offset[1], params->offset[2],
5297 params->cmp
5298 };
5299
5300 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5301 }
5302
5303 static const struct dxil_value *
emit_sample_cmp_level(struct ntd_context * ctx,struct texop_parameters * params)5304 emit_sample_cmp_level(struct ntd_context *ctx, struct texop_parameters *params)
5305 {
5306 ctx->mod.feats.advanced_texture_ops = true;
5307 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevel", params->overload);
5308 if (!func)
5309 return NULL;
5310
5311 assert(params->lod_or_sample != NULL);
5312
5313 const struct dxil_value *args[12] = {
5314 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_LEVEL),
5315 params->tex, params->sampler,
5316 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5317 params->offset[0], params->offset[1], params->offset[2],
5318 params->cmp, params->lod_or_sample
5319 };
5320
5321 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5322 }
5323
5324 static const struct dxil_value *
emit_sample_cmp_bias(struct ntd_context * ctx,struct texop_parameters * params)5325 emit_sample_cmp_bias(struct ntd_context *ctx, struct texop_parameters *params)
5326 {
5327 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpBias", params->overload);
5328 if (!func)
5329 return NULL;
5330
5331 assert(params->bias != NULL);
5332 ctx->mod.feats.sample_cmp_bias_gradient = 1;
5333
5334 const struct dxil_value *args[13] = {
5335 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_BIAS),
5336 params->tex, params->sampler,
5337 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5338 params->offset[0], params->offset[1], params->offset[2],
5339 params->cmp, params->bias, params->min_lod
5340 };
5341
5342 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5343 }
5344
5345 static const struct dxil_value *
emit_sample_grad(struct ntd_context * ctx,struct texop_parameters * params)5346 emit_sample_grad(struct ntd_context *ctx, struct texop_parameters *params)
5347 {
5348 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleGrad", params->overload);
5349 if (!func)
5350 return false;
5351
5352 const struct dxil_value *args[17] = {
5353 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_GRAD),
5354 params->tex, params->sampler,
5355 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5356 params->offset[0], params->offset[1], params->offset[2],
5357 params->dx[0], params->dx[1], params->dx[2],
5358 params->dy[0], params->dy[1], params->dy[2],
5359 params->min_lod
5360 };
5361
5362 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5363 }
5364
5365 static const struct dxil_value *
emit_sample_cmp_grad(struct ntd_context * ctx,struct texop_parameters * params)5366 emit_sample_cmp_grad(struct ntd_context *ctx, struct texop_parameters *params)
5367 {
5368 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpGrad", params->overload);
5369 if (!func)
5370 return false;
5371
5372 ctx->mod.feats.sample_cmp_bias_gradient = 1;
5373
5374 const struct dxil_value *args[18] = {
5375 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_GRAD),
5376 params->tex, params->sampler,
5377 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5378 params->offset[0], params->offset[1], params->offset[2],
5379 params->cmp,
5380 params->dx[0], params->dx[1], params->dx[2],
5381 params->dy[0], params->dy[1], params->dy[2],
5382 params->min_lod
5383 };
5384
5385 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5386 }
5387
5388 static const struct dxil_value *
emit_texel_fetch(struct ntd_context * ctx,struct texop_parameters * params)5389 emit_texel_fetch(struct ntd_context *ctx, struct texop_parameters *params)
5390 {
5391 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", params->overload);
5392 if (!func)
5393 return false;
5394
5395 if (!params->lod_or_sample)
5396 params->lod_or_sample = dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32));
5397
5398 const struct dxil_value *args[] = {
5399 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD),
5400 params->tex,
5401 params->lod_or_sample, params->coord[0], params->coord[1], params->coord[2],
5402 params->offset[0], params->offset[1], params->offset[2]
5403 };
5404
5405 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5406 }
5407
5408 static const struct dxil_value *
emit_texture_lod(struct ntd_context * ctx,struct texop_parameters * params,bool clamped)5409 emit_texture_lod(struct ntd_context *ctx, struct texop_parameters *params, bool clamped)
5410 {
5411 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.calculateLOD", DXIL_F32);
5412 if (!func)
5413 return false;
5414
5415 const struct dxil_value *args[] = {
5416 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOD),
5417 params->tex,
5418 params->sampler,
5419 params->coord[0],
5420 params->coord[1],
5421 params->coord[2],
5422 dxil_module_get_int1_const(&ctx->mod, clamped ? 1 : 0)
5423 };
5424
5425 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5426 }
5427
5428 static const struct dxil_value *
emit_texture_gather(struct ntd_context * ctx,struct texop_parameters * params,unsigned component)5429 emit_texture_gather(struct ntd_context *ctx, struct texop_parameters *params, unsigned component)
5430 {
5431 const struct dxil_func *func = dxil_get_function(&ctx->mod,
5432 params->cmp ? "dx.op.textureGatherCmp" : "dx.op.textureGather", params->overload);
5433 if (!func)
5434 return false;
5435
5436 const struct dxil_value *args[] = {
5437 dxil_module_get_int32_const(&ctx->mod, params->cmp ?
5438 DXIL_INTR_TEXTURE_GATHER_CMP : DXIL_INTR_TEXTURE_GATHER),
5439 params->tex,
5440 params->sampler,
5441 params->coord[0],
5442 params->coord[1],
5443 params->coord[2],
5444 params->coord[3],
5445 params->offset[0],
5446 params->offset[1],
5447 dxil_module_get_int32_const(&ctx->mod, component),
5448 params->cmp
5449 };
5450
5451 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args) - (params->cmp ? 0 : 1));
5452 }
5453
5454 static bool
emit_tex(struct ntd_context * ctx,nir_tex_instr * instr)5455 emit_tex(struct ntd_context *ctx, nir_tex_instr *instr)
5456 {
5457 struct texop_parameters params;
5458 memset(¶ms, 0, sizeof(struct texop_parameters));
5459 if (ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN) {
5460 params.tex = ctx->srv_handles[instr->texture_index];
5461 params.sampler = ctx->sampler_handles[instr->sampler_index];
5462 }
5463
5464 const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
5465 const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32);
5466 const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
5467 const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type);
5468
5469 unsigned coord_components = 0, offset_components = 0, dx_components = 0, dy_components = 0;
5470 params.overload = get_overload(instr->dest_type, 32);
5471
5472 bool lod_is_zero = false;
5473 for (unsigned i = 0; i < instr->num_srcs; i++) {
5474 nir_alu_type type = nir_tex_instr_src_type(instr, i);
5475
5476 switch (instr->src[i].src_type) {
5477 case nir_tex_src_coord:
5478 coord_components = get_n_src(ctx, params.coord, ARRAY_SIZE(params.coord),
5479 &instr->src[i], type);
5480 if (!coord_components)
5481 return false;
5482 break;
5483
5484 case nir_tex_src_offset:
5485 offset_components = get_n_src(ctx, params.offset, ARRAY_SIZE(params.offset),
5486 &instr->src[i], nir_type_int);
5487 if (!offset_components)
5488 return false;
5489
5490 /* Dynamic offsets were only allowed with gather, until "advanced texture ops" in SM7 */
5491 if (!nir_src_is_const(instr->src[i].src) && instr->op != nir_texop_tg4)
5492 ctx->mod.feats.advanced_texture_ops = true;
5493 break;
5494
5495 case nir_tex_src_bias:
5496 assert(instr->op == nir_texop_txb);
5497 assert(nir_src_num_components(instr->src[i].src) == 1);
5498 params.bias = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
5499 if (!params.bias)
5500 return false;
5501 break;
5502
5503 case nir_tex_src_lod:
5504 assert(nir_src_num_components(instr->src[i].src) == 1);
5505 if (instr->op == nir_texop_txf_ms) {
5506 assert(nir_src_as_int(instr->src[i].src) == 0);
5507 break;
5508 }
5509
5510 /* Buffers don't have a LOD */
5511 if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF)
5512 params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, type);
5513 else
5514 params.lod_or_sample = int_undef;
5515 if (!params.lod_or_sample)
5516 return false;
5517
5518 if (nir_src_is_const(instr->src[i].src) && nir_src_as_float(instr->src[i].src) == 0.0f)
5519 lod_is_zero = true;
5520 break;
5521
5522 case nir_tex_src_min_lod:
5523 assert(nir_src_num_components(instr->src[i].src) == 1);
5524 params.min_lod = get_src(ctx, &instr->src[i].src, 0, type);
5525 if (!params.min_lod)
5526 return false;
5527 break;
5528
5529 case nir_tex_src_comparator:
5530 assert(nir_src_num_components(instr->src[i].src) == 1);
5531 params.cmp = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
5532 if (!params.cmp)
5533 return false;
5534 break;
5535
5536 case nir_tex_src_ddx:
5537 dx_components = get_n_src(ctx, params.dx, ARRAY_SIZE(params.dx),
5538 &instr->src[i], nir_type_float);
5539 if (!dx_components)
5540 return false;
5541 break;
5542
5543 case nir_tex_src_ddy:
5544 dy_components = get_n_src(ctx, params.dy, ARRAY_SIZE(params.dy),
5545 &instr->src[i], nir_type_float);
5546 if (!dy_components)
5547 return false;
5548 break;
5549
5550 case nir_tex_src_ms_index:
5551 params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, nir_type_int);
5552 if (!params.lod_or_sample)
5553 return false;
5554 break;
5555
5556 case nir_tex_src_texture_deref:
5557 assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
5558 params.tex = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
5559 break;
5560
5561 case nir_tex_src_sampler_deref:
5562 assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
5563 params.sampler = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
5564 break;
5565
5566 case nir_tex_src_texture_offset:
5567 params.tex = emit_createhandle_call_dynamic(ctx, DXIL_RESOURCE_CLASS_SRV,
5568 0, instr->texture_index,
5569 dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
5570 get_src(ctx, &instr->src[i].src, 0, nir_type_uint),
5571 dxil_module_get_int32_const(&ctx->mod, instr->texture_index), 0),
5572 instr->texture_non_uniform);
5573 break;
5574
5575 case nir_tex_src_sampler_offset:
5576 if (nir_tex_instr_need_sampler(instr)) {
5577 params.sampler = emit_createhandle_call_dynamic(ctx, DXIL_RESOURCE_CLASS_SAMPLER,
5578 0, instr->sampler_index,
5579 dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
5580 get_src(ctx, &instr->src[i].src, 0, nir_type_uint),
5581 dxil_module_get_int32_const(&ctx->mod, instr->sampler_index), 0),
5582 instr->sampler_non_uniform);
5583 }
5584 break;
5585
5586 case nir_tex_src_texture_handle:
5587 params.tex = create_srv_handle(ctx, instr, &instr->src[i].src);
5588 break;
5589
5590 case nir_tex_src_sampler_handle:
5591 if (nir_tex_instr_need_sampler(instr))
5592 params.sampler = create_sampler_handle(ctx, instr->is_shadow, &instr->src[i].src);
5593 break;
5594
5595 case nir_tex_src_projector:
5596 unreachable("Texture projector should have been lowered");
5597
5598 default:
5599 fprintf(stderr, "texture source: %d\n", instr->src[i].src_type);
5600 unreachable("unknown texture source");
5601 }
5602 }
5603
5604 assert(params.tex != NULL);
5605 assert(instr->op == nir_texop_txf ||
5606 instr->op == nir_texop_txf_ms ||
5607 nir_tex_instr_is_query(instr) ||
5608 params.sampler != NULL);
5609
5610 PAD_SRC(ctx, params.coord, coord_components, float_undef);
5611 PAD_SRC(ctx, params.offset, offset_components, int_undef);
5612 if (!params.min_lod) params.min_lod = float_undef;
5613
5614 const struct dxil_value *sample = NULL;
5615 switch (instr->op) {
5616 case nir_texop_txb:
5617 if (params.cmp != NULL && ctx->mod.minor_version >= 8)
5618 sample = emit_sample_cmp_bias(ctx, ¶ms);
5619 else
5620 sample = emit_sample_bias(ctx, ¶ms);
5621 break;
5622
5623 case nir_texop_tex:
5624 if (params.cmp != NULL) {
5625 sample = emit_sample_cmp(ctx, ¶ms);
5626 break;
5627 } else if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) {
5628 sample = emit_sample(ctx, ¶ms);
5629 break;
5630 }
5631 params.lod_or_sample = dxil_module_get_float_const(&ctx->mod, 0);
5632 lod_is_zero = true;
5633 FALLTHROUGH;
5634 case nir_texop_txl:
5635 if (lod_is_zero && params.cmp != NULL && ctx->mod.minor_version < 7) {
5636 /* Prior to SM 6.7, if the level is constant 0.0, ignore the LOD argument,
5637 * so level-less DXIL instructions are used. This is needed to avoid emitting
5638 * dx.op.sampleCmpLevel, which would not be available.
5639 */
5640 sample = emit_sample_cmp_level_zero(ctx, ¶ms);
5641 } else {
5642 if (params.cmp != NULL)
5643 sample = emit_sample_cmp_level(ctx, ¶ms);
5644 else
5645 sample = emit_sample_level(ctx, ¶ms);
5646 }
5647 break;
5648
5649 case nir_texop_txd:
5650 PAD_SRC(ctx, params.dx, dx_components, float_undef);
5651 PAD_SRC(ctx, params.dy, dy_components,float_undef);
5652 if (params.cmp != NULL && ctx->mod.minor_version >= 8)
5653 sample = emit_sample_cmp_grad(ctx, ¶ms);
5654 else
5655 sample = emit_sample_grad(ctx, ¶ms);
5656 break;
5657
5658 case nir_texop_txf:
5659 case nir_texop_txf_ms:
5660 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
5661 params.coord[1] = int_undef;
5662 sample = emit_bufferload_call(ctx, params.tex, params.coord, params.overload);
5663 } else {
5664 PAD_SRC(ctx, params.coord, coord_components, int_undef);
5665 sample = emit_texel_fetch(ctx, ¶ms);
5666 }
5667 break;
5668
5669 case nir_texop_txs:
5670 sample = emit_texture_size(ctx, ¶ms);
5671 break;
5672
5673 case nir_texop_tg4:
5674 sample = emit_texture_gather(ctx, ¶ms, instr->component);
5675 break;
5676
5677 case nir_texop_lod:
5678 sample = emit_texture_lod(ctx, ¶ms, true);
5679 store_def(ctx, &instr->def, 0, sample);
5680 sample = emit_texture_lod(ctx, ¶ms, false);
5681 store_def(ctx, &instr->def, 1, sample);
5682 return true;
5683
5684 case nir_texop_query_levels: {
5685 params.lod_or_sample = dxil_module_get_int_const(&ctx->mod, 0, 32);
5686 sample = emit_texture_size(ctx, ¶ms);
5687 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
5688 store_def(ctx, &instr->def, 0, retval);
5689 return true;
5690 }
5691
5692 case nir_texop_texture_samples: {
5693 params.lod_or_sample = int_undef;
5694 sample = emit_texture_size(ctx, ¶ms);
5695 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
5696 store_def(ctx, &instr->def, 0, retval);
5697 return true;
5698 }
5699
5700 default:
5701 fprintf(stderr, "texture op: %d\n", instr->op);
5702 unreachable("unknown texture op");
5703 }
5704
5705 if (!sample)
5706 return false;
5707
5708 for (unsigned i = 0; i < instr->def.num_components; ++i) {
5709 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, i);
5710 store_def(ctx, &instr->def, i, retval);
5711 }
5712
5713 return true;
5714 }
5715
5716 static bool
emit_undefined(struct ntd_context * ctx,nir_undef_instr * undef)5717 emit_undefined(struct ntd_context *ctx, nir_undef_instr *undef)
5718 {
5719 for (unsigned i = 0; i < undef->def.num_components; ++i)
5720 store_ssa_def(ctx, &undef->def, i, dxil_module_get_int32_const(&ctx->mod, 0));
5721 return true;
5722 }
5723
emit_instr(struct ntd_context * ctx,struct nir_instr * instr)5724 static bool emit_instr(struct ntd_context *ctx, struct nir_instr* instr)
5725 {
5726 switch (instr->type) {
5727 case nir_instr_type_alu:
5728 return emit_alu(ctx, nir_instr_as_alu(instr));
5729 case nir_instr_type_intrinsic:
5730 return emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
5731 case nir_instr_type_load_const:
5732 return emit_load_const(ctx, nir_instr_as_load_const(instr));
5733 case nir_instr_type_deref:
5734 return emit_deref(ctx, nir_instr_as_deref(instr));
5735 case nir_instr_type_jump:
5736 return emit_jump(ctx, nir_instr_as_jump(instr));
5737 case nir_instr_type_phi:
5738 return emit_phi(ctx, nir_instr_as_phi(instr));
5739 case nir_instr_type_tex:
5740 return emit_tex(ctx, nir_instr_as_tex(instr));
5741 case nir_instr_type_undef:
5742 return emit_undefined(ctx, nir_instr_as_undef(instr));
5743 default:
5744 log_nir_instr_unsupported(ctx->logger, "Unimplemented instruction type",
5745 instr);
5746 return false;
5747 }
5748 }
5749
5750
5751 static bool
emit_block(struct ntd_context * ctx,struct nir_block * block)5752 emit_block(struct ntd_context *ctx, struct nir_block *block)
5753 {
5754 assert(block->index < ctx->mod.cur_emitting_func->num_basic_block_ids);
5755 ctx->mod.cur_emitting_func->basic_block_ids[block->index] = ctx->mod.cur_emitting_func->curr_block;
5756
5757 nir_foreach_instr(instr, block) {
5758 TRACE_CONVERSION(instr);
5759
5760 if (!emit_instr(ctx, instr)) {
5761 return false;
5762 }
5763 }
5764 return true;
5765 }
5766
5767 static bool
5768 emit_cf_list(struct ntd_context *ctx, struct exec_list *list);
5769
5770 static bool
emit_if(struct ntd_context * ctx,struct nir_if * if_stmt)5771 emit_if(struct ntd_context *ctx, struct nir_if *if_stmt)
5772 {
5773 assert(nir_src_num_components(if_stmt->condition) == 1);
5774 const struct dxil_value *cond = get_src(ctx, &if_stmt->condition, 0,
5775 nir_type_bool);
5776 if (!cond)
5777 return false;
5778
5779 /* prepare blocks */
5780 nir_block *then_block = nir_if_first_then_block(if_stmt);
5781 assert(nir_if_last_then_block(if_stmt)->successors[0]);
5782 assert(!nir_if_last_then_block(if_stmt)->successors[1]);
5783 int then_succ = nir_if_last_then_block(if_stmt)->successors[0]->index;
5784
5785 nir_block *else_block = NULL;
5786 int else_succ = -1;
5787 if (!exec_list_is_empty(&if_stmt->else_list)) {
5788 else_block = nir_if_first_else_block(if_stmt);
5789 assert(nir_if_last_else_block(if_stmt)->successors[0]);
5790 assert(!nir_if_last_else_block(if_stmt)->successors[1]);
5791 else_succ = nir_if_last_else_block(if_stmt)->successors[0]->index;
5792 }
5793
5794 if (!emit_cond_branch(ctx, cond, then_block->index,
5795 else_block ? else_block->index : then_succ))
5796 return false;
5797
5798 /* handle then-block */
5799 if (!emit_cf_list(ctx, &if_stmt->then_list) ||
5800 (!nir_block_ends_in_jump(nir_if_last_then_block(if_stmt)) &&
5801 !emit_branch(ctx, then_succ)))
5802 return false;
5803
5804 if (else_block) {
5805 /* handle else-block */
5806 if (!emit_cf_list(ctx, &if_stmt->else_list) ||
5807 (!nir_block_ends_in_jump(nir_if_last_else_block(if_stmt)) &&
5808 !emit_branch(ctx, else_succ)))
5809 return false;
5810 }
5811
5812 return true;
5813 }
5814
5815 static bool
emit_loop(struct ntd_context * ctx,nir_loop * loop)5816 emit_loop(struct ntd_context *ctx, nir_loop *loop)
5817 {
5818 assert(!nir_loop_has_continue_construct(loop));
5819 nir_block *first_block = nir_loop_first_block(loop);
5820 nir_block *last_block = nir_loop_last_block(loop);
5821
5822 assert(last_block->successors[0]);
5823 assert(!last_block->successors[1]);
5824
5825 if (!emit_branch(ctx, first_block->index))
5826 return false;
5827
5828 if (!emit_cf_list(ctx, &loop->body))
5829 return false;
5830
5831 /* If the loop's last block doesn't explicitly jump somewhere, then there's
5832 * an implicit continue that should take it back to the first loop block
5833 */
5834 nir_instr *last_instr = nir_block_last_instr(last_block);
5835 if ((!last_instr || last_instr->type != nir_instr_type_jump) &&
5836 !emit_branch(ctx, first_block->index))
5837 return false;
5838
5839 return true;
5840 }
5841
5842 static bool
emit_cf_list(struct ntd_context * ctx,struct exec_list * list)5843 emit_cf_list(struct ntd_context *ctx, struct exec_list *list)
5844 {
5845 foreach_list_typed(nir_cf_node, node, node, list) {
5846 switch (node->type) {
5847 case nir_cf_node_block:
5848 if (!emit_block(ctx, nir_cf_node_as_block(node)))
5849 return false;
5850 break;
5851
5852 case nir_cf_node_if:
5853 if (!emit_if(ctx, nir_cf_node_as_if(node)))
5854 return false;
5855 break;
5856
5857 case nir_cf_node_loop:
5858 if (!emit_loop(ctx, nir_cf_node_as_loop(node)))
5859 return false;
5860 break;
5861
5862 default:
5863 unreachable("unsupported cf-list node");
5864 break;
5865 }
5866 }
5867 return true;
5868 }
5869
5870 static void
insert_sorted_by_binding(struct exec_list * var_list,nir_variable * new_var)5871 insert_sorted_by_binding(struct exec_list *var_list, nir_variable *new_var)
5872 {
5873 nir_foreach_variable_in_list(var, var_list) {
5874 if (var->data.binding > new_var->data.binding) {
5875 exec_node_insert_node_before(&var->node, &new_var->node);
5876 return;
5877 }
5878 }
5879 exec_list_push_tail(var_list, &new_var->node);
5880 }
5881
5882
5883 static void
sort_uniforms_by_binding_and_remove_structs(nir_shader * s)5884 sort_uniforms_by_binding_and_remove_structs(nir_shader *s)
5885 {
5886 struct exec_list new_list;
5887 exec_list_make_empty(&new_list);
5888
5889 nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) {
5890 exec_node_remove(&var->node);
5891 const struct glsl_type *type = glsl_without_array(var->type);
5892 if (!glsl_type_is_struct(type))
5893 insert_sorted_by_binding(&new_list, var);
5894 }
5895 exec_list_append(&s->variables, &new_list);
5896 }
5897
5898 static bool
emit_cbvs(struct ntd_context * ctx)5899 emit_cbvs(struct ntd_context *ctx)
5900 {
5901 if (ctx->opts->environment != DXIL_ENVIRONMENT_GL) {
5902 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ubo) {
5903 if (!emit_ubo_var(ctx, var))
5904 return false;
5905 }
5906 } else {
5907 if (ctx->shader->info.num_ubos) {
5908 const unsigned ubo_size = 16384 /*4096 vec4's*/;
5909 uint array_base = ctx->shader->info.first_ubo_is_default_ubo ? 1 : 0;
5910 bool has_ubo0 = ctx->shader->num_uniforms > 0 && ctx->shader->info.first_ubo_is_default_ubo;
5911 bool has_state_vars = ctx->opts->last_ubo_is_not_arrayed;
5912 unsigned ubo1_array_size = ctx->shader->info.num_ubos - array_base -
5913 (has_state_vars ? 1 : 0);
5914
5915 if (has_ubo0 &&
5916 !emit_cbv(ctx, 0, 0, ubo_size, 1, "__ubo_uniforms"))
5917 return false;
5918 if (ubo1_array_size &&
5919 !emit_cbv(ctx, array_base, 0, ubo_size, ubo1_array_size, "__ubos"))
5920 return false;
5921 if (has_state_vars &&
5922 !emit_cbv(ctx, ctx->shader->info.num_ubos - 1, 0, ubo_size, 1, "__ubo_state_vars"))
5923 return false;
5924 }
5925 }
5926
5927 return true;
5928 }
5929
5930 static bool
emit_scratch(struct ntd_context * ctx,nir_function_impl * impl)5931 emit_scratch(struct ntd_context *ctx, nir_function_impl *impl)
5932 {
5933 uint32_t index = 0;
5934 nir_foreach_function_temp_variable(var, impl)
5935 var->data.driver_location = index++;
5936
5937 if (ctx->scratchvars)
5938 ralloc_free((void *)ctx->scratchvars);
5939
5940 ctx->scratchvars = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
5941
5942 nir_foreach_function_temp_variable(var, impl) {
5943 const struct dxil_type *type = get_type_for_glsl_type(&ctx->mod, var->type);
5944 const struct dxil_value *length = dxil_module_get_int32_const(&ctx->mod, 1);
5945 const struct dxil_value *ptr = dxil_emit_alloca(&ctx->mod, type, length, 16);
5946 if (!ptr)
5947 return false;
5948
5949 ctx->scratchvars[var->data.driver_location] = ptr;
5950 }
5951
5952 return true;
5953 }
5954
5955 static bool
emit_function(struct ntd_context * ctx,nir_function * func,nir_function_impl * impl)5956 emit_function(struct ntd_context *ctx, nir_function *func, nir_function_impl *impl)
5957 {
5958 assert(func->num_params == 0);
5959 nir_metadata_require(impl, nir_metadata_block_index);
5960
5961 const char *attr_keys[2] = { NULL };
5962 const char *attr_values[2] = { NULL };
5963 if (ctx->shader->info.float_controls_execution_mode &
5964 (FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32 | FLOAT_CONTROLS_DENORM_PRESERVE_FP32))
5965 attr_keys[0] = "fp32-denorm-mode";
5966 if (ctx->shader->info.float_controls_execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32)
5967 attr_values[0] = "ftz";
5968 else if (ctx->shader->info.float_controls_execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32)
5969 attr_values[0] = "preserve";
5970
5971 const struct dxil_type *void_type = dxil_module_get_void_type(&ctx->mod);
5972 const struct dxil_type *func_type = dxil_module_add_function_type(&ctx->mod, void_type, NULL, 0);
5973 struct dxil_func_def *func_def = dxil_add_function_def(&ctx->mod, func->name, func_type, impl->num_blocks, attr_keys, attr_values);
5974 if (!func_def)
5975 return false;
5976
5977 if (func->is_entrypoint)
5978 ctx->main_func_def = func_def;
5979 else if (func == ctx->tess_ctrl_patch_constant_func)
5980 ctx->tess_ctrl_patch_constant_func_def = func_def;
5981
5982 ctx->defs = rzalloc_array(ctx->ralloc_ctx, struct dxil_def, impl->ssa_alloc);
5983 ctx->float_types = rzalloc_array(ctx->ralloc_ctx, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc));
5984 ctx->int_types = rzalloc_array(ctx->ralloc_ctx, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc));
5985 if (!ctx->defs || !ctx->float_types || !ctx->int_types)
5986 return false;
5987 ctx->num_defs = impl->ssa_alloc;
5988
5989 ctx->phis = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
5990 if (!ctx->phis)
5991 return false;
5992
5993 nir_gather_types(impl, ctx->float_types, ctx->int_types);
5994
5995 if (!emit_scratch(ctx, impl))
5996 return false;
5997
5998 if (!emit_static_indexing_handles(ctx))
5999 return false;
6000
6001 if (!emit_cf_list(ctx, &impl->body))
6002 return false;
6003
6004 hash_table_foreach(ctx->phis, entry) {
6005 if (!fixup_phi(ctx, (nir_phi_instr *)entry->key,
6006 (struct phi_block *)entry->data))
6007 return false;
6008 }
6009
6010 if (!dxil_emit_ret_void(&ctx->mod))
6011 return false;
6012
6013 ralloc_free(ctx->defs);
6014 ctx->defs = NULL;
6015 _mesa_hash_table_destroy(ctx->phis, NULL);
6016 return true;
6017 }
6018
6019 static bool
emit_module(struct ntd_context * ctx,const struct nir_to_dxil_options * opts)6020 emit_module(struct ntd_context *ctx, const struct nir_to_dxil_options *opts)
6021 {
6022 /* The validator forces us to emit resources in a specific order:
6023 * CBVs, Samplers, SRVs, UAVs. While we are at it also remove
6024 * stale struct uniforms, they are lowered but might not have been removed */
6025 sort_uniforms_by_binding_and_remove_structs(ctx->shader);
6026
6027 /* CBVs */
6028 if (!emit_cbvs(ctx))
6029 return false;
6030
6031 /* Samplers */
6032 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
6033 unsigned count = glsl_type_get_sampler_count(var->type);
6034 assert(count == 0 || glsl_type_is_bare_sampler(glsl_without_array(var->type)));
6035 if (count > 0 && !emit_sampler(ctx, var, count))
6036 return false;
6037 }
6038
6039 /* SRVs */
6040 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
6041 unsigned count = glsl_type_get_texture_count(var->type);
6042 assert(count == 0 || glsl_type_is_texture(glsl_without_array(var->type)));
6043 if (count > 0 && !emit_srv(ctx, var, count))
6044 return false;
6045 }
6046
6047 /* Handle read-only SSBOs as SRVs */
6048 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6049 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
6050 if ((var->data.access & ACCESS_NON_WRITEABLE) != 0) {
6051 unsigned count = 1;
6052 if (glsl_type_is_array(var->type))
6053 count = glsl_get_length(var->type);
6054 if (!emit_srv(ctx, var, count))
6055 return false;
6056 }
6057 }
6058 }
6059
6060 if (!emit_shared_vars(ctx))
6061 return false;
6062 if (!emit_global_consts(ctx))
6063 return false;
6064
6065 /* UAVs */
6066 if (ctx->shader->info.stage == MESA_SHADER_KERNEL) {
6067 if (!emit_globals(ctx, opts->num_kernel_globals))
6068 return false;
6069
6070 } else if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6071 /* Handle read/write SSBOs as UAVs */
6072 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
6073 if ((var->data.access & ACCESS_NON_WRITEABLE) == 0) {
6074 unsigned count = 1;
6075 if (glsl_type_is_array(var->type))
6076 count = glsl_get_length(var->type);
6077 if (!emit_uav(ctx, var->data.binding, var->data.descriptor_set,
6078 count, DXIL_COMP_TYPE_INVALID, 1,
6079 DXIL_RESOURCE_KIND_RAW_BUFFER, var->data.access, var->name))
6080 return false;
6081
6082 }
6083 }
6084 } else {
6085 for (unsigned i = 0; i < ctx->shader->info.num_ssbos; ++i) {
6086 char name[64];
6087 snprintf(name, sizeof(name), "__ssbo%d", i);
6088 if (!emit_uav(ctx, i, 0, 1, DXIL_COMP_TYPE_INVALID, 1,
6089 DXIL_RESOURCE_KIND_RAW_BUFFER, 0, name))
6090 return false;
6091 }
6092 /* To work around a WARP bug, bind these descriptors a second time in descriptor
6093 * space 2. Space 0 will be used for static indexing, while space 2 will be used
6094 * for dynamic indexing. Space 0 will be individual SSBOs in the DXIL shader, while
6095 * space 2 will be a single array.
6096 */
6097 if (ctx->shader->info.num_ssbos &&
6098 !emit_uav(ctx, 0, 2, ctx->shader->info.num_ssbos, DXIL_COMP_TYPE_INVALID, 1,
6099 DXIL_RESOURCE_KIND_RAW_BUFFER, 0, "__ssbo_dynamic"))
6100 return false;
6101 }
6102
6103 nir_foreach_image_variable(var, ctx->shader) {
6104 if (!emit_uav_var(ctx, var, glsl_type_get_image_count(var->type)))
6105 return false;
6106 }
6107
6108 ctx->mod.info.has_per_sample_input =
6109 BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
6110 ctx->shader->info.fs.uses_sample_shading ||
6111 ctx->shader->info.fs.uses_sample_qualifier;
6112 if (!ctx->mod.info.has_per_sample_input && ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
6113 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in | nir_var_system_value) {
6114 if (var->data.sample) {
6115 ctx->mod.info.has_per_sample_input = true;
6116 break;
6117 }
6118 }
6119 }
6120
6121 /* From the Vulkan spec 1.3.238, section 15.8:
6122 * When Sample Shading is enabled, the x and y components of FragCoord reflect the location
6123 * of one of the samples corresponding to the shader invocation.
6124 *
6125 * In other words, if the fragment shader is executing per-sample, then the position variable
6126 * should always be per-sample,
6127 *
6128 * Also:
6129 * The Centroid interpolation decoration is ignored, but allowed, on FragCoord.
6130 */
6131 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6132 nir_variable *pos_var = nir_find_variable_with_location(ctx->shader, nir_var_shader_in, VARYING_SLOT_POS);
6133 if (pos_var) {
6134 if (ctx->mod.info.has_per_sample_input)
6135 pos_var->data.sample = true;
6136 pos_var->data.centroid = false;
6137 }
6138 }
6139
6140 unsigned input_clip_size = ctx->mod.shader_kind == DXIL_PIXEL_SHADER ?
6141 ctx->shader->info.clip_distance_array_size : ctx->opts->input_clip_size;
6142 preprocess_signatures(&ctx->mod, ctx->shader, input_clip_size);
6143
6144 nir_foreach_function_with_impl(func, impl, ctx->shader) {
6145 if (!emit_function(ctx, func, impl))
6146 return false;
6147 }
6148
6149 if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
6150 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_out) {
6151 if (var->data.location == FRAG_RESULT_STENCIL) {
6152 ctx->mod.feats.stencil_ref = true;
6153 }
6154 }
6155 } else if (ctx->shader->info.stage == MESA_SHADER_VERTEX ||
6156 ctx->shader->info.stage == MESA_SHADER_TESS_EVAL) {
6157 if (ctx->shader->info.outputs_written &
6158 (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER))
6159 ctx->mod.feats.array_layer_from_vs_or_ds = true;
6160 } else if (ctx->shader->info.stage == MESA_SHADER_GEOMETRY ||
6161 ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) {
6162 if (ctx->shader->info.inputs_read &
6163 (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER))
6164 ctx->mod.feats.array_layer_from_vs_or_ds = true;
6165 }
6166
6167 if (ctx->mod.feats.native_low_precision && ctx->mod.minor_version < 2) {
6168 ctx->logger->log(ctx->logger->priv,
6169 "Shader uses 16bit, which requires shader model 6.2, but 6.2 is unsupported\n");
6170 return false;
6171 }
6172
6173 return emit_metadata(ctx) &&
6174 dxil_emit_module(&ctx->mod);
6175 }
6176
6177 static unsigned int
get_dxil_shader_kind(struct nir_shader * s)6178 get_dxil_shader_kind(struct nir_shader *s)
6179 {
6180 switch (s->info.stage) {
6181 case MESA_SHADER_VERTEX:
6182 return DXIL_VERTEX_SHADER;
6183 case MESA_SHADER_TESS_CTRL:
6184 return DXIL_HULL_SHADER;
6185 case MESA_SHADER_TESS_EVAL:
6186 return DXIL_DOMAIN_SHADER;
6187 case MESA_SHADER_GEOMETRY:
6188 return DXIL_GEOMETRY_SHADER;
6189 case MESA_SHADER_FRAGMENT:
6190 return DXIL_PIXEL_SHADER;
6191 case MESA_SHADER_KERNEL:
6192 case MESA_SHADER_COMPUTE:
6193 return DXIL_COMPUTE_SHADER;
6194 default:
6195 unreachable("unknown shader stage in nir_to_dxil");
6196 return DXIL_COMPUTE_SHADER;
6197 }
6198 }
6199
6200 static unsigned
lower_bit_size_callback(const nir_instr * instr,void * data)6201 lower_bit_size_callback(const nir_instr* instr, void *data)
6202 {
6203 if (instr->type != nir_instr_type_alu)
6204 return 0;
6205 nir_alu_instr *alu = nir_instr_as_alu(instr);
6206
6207 if (nir_op_infos[alu->op].is_conversion)
6208 return 0;
6209
6210 if (nir_op_is_vec_or_mov(alu->op))
6211 return 0;
6212
6213 unsigned num_inputs = nir_op_infos[alu->op].num_inputs;
6214 const struct nir_to_dxil_options *opts = (const struct nir_to_dxil_options*)data;
6215 unsigned min_bit_size = opts->lower_int16 ? 32 : 16;
6216
6217 unsigned ret = 0;
6218 for (unsigned i = 0; i < num_inputs; i++) {
6219 unsigned bit_size = nir_src_bit_size(alu->src[i].src);
6220 if (bit_size != 1 && bit_size < min_bit_size)
6221 ret = min_bit_size;
6222 }
6223
6224 return ret;
6225 }
6226
6227 static bool
vectorize_filter(unsigned align_mul,unsigned align_offset,unsigned bit_size,unsigned num_components,int64_t hole_size,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * data)6228 vectorize_filter(
6229 unsigned align_mul,
6230 unsigned align_offset,
6231 unsigned bit_size,
6232 unsigned num_components,
6233 int64_t hole_size,
6234 nir_intrinsic_instr *low, nir_intrinsic_instr *high,
6235 void *data)
6236 {
6237 return hole_size <= 0 && util_is_power_of_two_nonzero(num_components);
6238 }
6239
6240 struct lower_mem_bit_sizes_data {
6241 const nir_shader_compiler_options *nir_options;
6242 const struct nir_to_dxil_options *dxil_options;
6243 };
6244
6245 static nir_mem_access_size_align
lower_mem_access_bit_sizes_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size_in,uint32_t align_mul,uint32_t align_offset,bool offset_is_const,enum gl_access_qualifier access,const void * cb_data)6246 lower_mem_access_bit_sizes_cb(nir_intrinsic_op intrin,
6247 uint8_t bytes,
6248 uint8_t bit_size_in,
6249 uint32_t align_mul,
6250 uint32_t align_offset,
6251 bool offset_is_const,
6252 enum gl_access_qualifier access,
6253 const void *cb_data)
6254 {
6255 const struct lower_mem_bit_sizes_data *data = cb_data;
6256 unsigned max_bit_size = 32;
6257 unsigned min_bit_size = data->dxil_options->lower_int16 ? 32 : 16;
6258 unsigned closest_bit_size = MAX2(min_bit_size, MIN2(max_bit_size, bit_size_in));
6259 if (intrin == nir_intrinsic_load_ubo) {
6260 /* UBO loads can be done at whatever (supported) bit size, but require 16 byte
6261 * alignment and can load up to 16 bytes per instruction. However this pass requires
6262 * loading 16 bytes of data to get 16-byte alignment. We're going to run lower_ubo_vec4
6263 * which can deal with unaligned vec4s, so for this pass let's just deal with bit size
6264 * and total size restrictions. */
6265 return (nir_mem_access_size_align) {
6266 .align = closest_bit_size / 8,
6267 .bit_size = closest_bit_size,
6268 .num_components = DIV_ROUND_UP(MIN2(bytes, 16) * 8, closest_bit_size),
6269 .shift = nir_mem_access_shift_method_scalar,
6270 };
6271 }
6272
6273 assert(intrin == nir_intrinsic_load_ssbo || intrin == nir_intrinsic_store_ssbo);
6274 uint32_t align = nir_combined_align(align_mul, align_offset);
6275 if (align < min_bit_size / 8) {
6276 /* Unaligned load/store, use the minimum bit size, up to 4 components */
6277 unsigned ideal_num_components = intrin == nir_intrinsic_load_ssbo ?
6278 DIV_ROUND_UP(bytes * 8, min_bit_size) :
6279 (32 / min_bit_size);
6280 return (nir_mem_access_size_align) {
6281 .align = min_bit_size / 8,
6282 .bit_size = min_bit_size,
6283 .num_components = MIN2(4, ideal_num_components),
6284 .shift = nir_mem_access_shift_method_scalar,
6285 };
6286 }
6287
6288 /* Increase/decrease bit size to try to get closer to the requested byte size/align */
6289 unsigned bit_size = closest_bit_size;
6290 unsigned target = MIN2(bytes, align);
6291 while (target < bit_size / 8 && bit_size > min_bit_size)
6292 bit_size /= 2;
6293 while (target > bit_size / 8 * 4 && bit_size < max_bit_size)
6294 bit_size *= 2;
6295
6296 /* This is the best we can do */
6297 unsigned num_components = intrin == nir_intrinsic_load_ssbo ?
6298 DIV_ROUND_UP(bytes * 8, bit_size) :
6299 MAX2(1, (bytes * 8 / bit_size));
6300 return (nir_mem_access_size_align) {
6301 .align = bit_size / 8,
6302 .bit_size = bit_size,
6303 .num_components = MIN2(4, num_components),
6304 .shift = nir_mem_access_shift_method_scalar,
6305 };
6306 }
6307
6308 static void
optimize_nir(struct nir_shader * s,const struct nir_to_dxil_options * opts)6309 optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts)
6310 {
6311 bool progress;
6312 do {
6313 progress = false;
6314 NIR_PASS_V(s, nir_lower_vars_to_ssa);
6315 NIR_PASS(progress, s, nir_lower_indirect_derefs, nir_var_function_temp, 4);
6316 NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
6317 NIR_PASS(progress, s, nir_copy_prop);
6318 NIR_PASS(progress, s, nir_opt_copy_prop_vars);
6319 NIR_PASS(progress, s, nir_lower_bit_size, lower_bit_size_callback, (void*)opts);
6320 NIR_PASS(progress, s, dxil_nir_lower_8bit_conv);
6321 if (opts->lower_int16)
6322 NIR_PASS(progress, s, dxil_nir_lower_16bit_conv);
6323 NIR_PASS(progress, s, nir_opt_remove_phis);
6324 NIR_PASS(progress, s, nir_opt_dce);
6325 NIR_PASS(progress, s, nir_opt_if,
6326 nir_opt_if_optimize_phi_true_false | nir_opt_if_avoid_64bit_phis);
6327 NIR_PASS(progress, s, nir_opt_dead_cf);
6328 NIR_PASS(progress, s, nir_opt_cse);
6329 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
6330 NIR_PASS(progress, s, nir_opt_algebraic);
6331 NIR_PASS(progress, s, dxil_nir_algebraic);
6332 if (s->options->lower_int64_options)
6333 NIR_PASS(progress, s, nir_lower_int64);
6334 NIR_PASS(progress, s, nir_lower_alu);
6335 NIR_PASS(progress, s, nir_opt_constant_folding);
6336 NIR_PASS(progress, s, nir_opt_undef);
6337 NIR_PASS(progress, s, nir_opt_deref);
6338 NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16);
6339 NIR_PASS(progress, s, nir_lower_64bit_phis);
6340 NIR_PASS(progress, s, nir_lower_phis_to_scalar, true);
6341 NIR_PASS(progress, s, nir_opt_loop_unroll);
6342 NIR_PASS(progress, s, nir_lower_pack);
6343 NIR_PASS(progress, s, dxil_nir_remove_oob_array_accesses);
6344 NIR_PASS_V(s, nir_lower_system_values);
6345 } while (progress);
6346
6347 do {
6348 progress = false;
6349 NIR_PASS(progress, s, nir_opt_algebraic_late);
6350 } while (progress);
6351
6352 NIR_PASS_V(s, nir_lower_undef_to_zero);
6353 }
6354
6355 static
dxil_fill_validation_state(struct ntd_context * ctx,struct dxil_validation_state * state)6356 void dxil_fill_validation_state(struct ntd_context *ctx,
6357 struct dxil_validation_state *state)
6358 {
6359 unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
6360 sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
6361 state->num_resources = ctx->resources.size / resource_element_size;
6362 state->resources.v0 = (struct dxil_resource_v0*)ctx->resources.data;
6363 if (ctx->shader->info.subgroup_size >= SUBGROUP_SIZE_REQUIRE_4) {
6364 state->state.psv1.psv0.max_expected_wave_lane_count = ctx->shader->info.subgroup_size;
6365 state->state.psv1.psv0.min_expected_wave_lane_count = ctx->shader->info.subgroup_size;
6366 } else {
6367 state->state.psv1.psv0.max_expected_wave_lane_count = UINT_MAX;
6368 }
6369 state->state.psv1.shader_stage = (uint8_t)ctx->mod.shader_kind;
6370 state->state.psv1.uses_view_id = (uint8_t)ctx->mod.feats.view_id;
6371 state->state.psv1.sig_input_elements = (uint8_t)ctx->mod.num_sig_inputs;
6372 state->state.psv1.sig_output_elements = (uint8_t)ctx->mod.num_sig_outputs;
6373 state->state.psv1.sig_patch_const_or_prim_elements = (uint8_t)ctx->mod.num_sig_patch_consts;
6374
6375 switch (ctx->mod.shader_kind) {
6376 case DXIL_VERTEX_SHADER:
6377 state->state.psv1.psv0.vs.output_position_present = ctx->mod.info.has_out_position;
6378 break;
6379 case DXIL_PIXEL_SHADER:
6380 /* TODO: handle depth outputs */
6381 state->state.psv1.psv0.ps.depth_output = ctx->mod.info.has_out_depth;
6382 state->state.psv1.psv0.ps.sample_frequency =
6383 ctx->mod.info.has_per_sample_input;
6384 break;
6385 case DXIL_COMPUTE_SHADER:
6386 state->state.num_threads_x = MAX2(ctx->shader->info.workgroup_size[0], 1);
6387 state->state.num_threads_y = MAX2(ctx->shader->info.workgroup_size[1], 1);
6388 state->state.num_threads_z = MAX2(ctx->shader->info.workgroup_size[2], 1);
6389 break;
6390 case DXIL_GEOMETRY_SHADER:
6391 state->state.psv1.max_vertex_count = ctx->shader->info.gs.vertices_out;
6392 state->state.psv1.psv0.gs.input_primitive = dxil_get_input_primitive(ctx->shader->info.gs.input_primitive);
6393 state->state.psv1.psv0.gs.output_toplology = dxil_get_primitive_topology(ctx->shader->info.gs.output_primitive);
6394 state->state.psv1.psv0.gs.output_stream_mask = MAX2(ctx->shader->info.gs.active_stream_mask, 1);
6395 state->state.psv1.psv0.gs.output_position_present = ctx->mod.info.has_out_position;
6396 break;
6397 case DXIL_HULL_SHADER:
6398 state->state.psv1.psv0.hs.input_control_point_count = ctx->tess_input_control_point_count;
6399 state->state.psv1.psv0.hs.output_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
6400 state->state.psv1.psv0.hs.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
6401 state->state.psv1.psv0.hs.tessellator_output_primitive = get_tessellator_output_primitive(&ctx->shader->info);
6402 state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
6403 break;
6404 case DXIL_DOMAIN_SHADER:
6405 state->state.psv1.psv0.ds.input_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
6406 state->state.psv1.psv0.ds.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
6407 state->state.psv1.psv0.ds.output_position_present = ctx->mod.info.has_out_position;
6408 state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
6409 break;
6410 default:
6411 assert(0 && "Shader type not (yet) supported");
6412 }
6413 }
6414
6415 static nir_variable *
add_sysvalue(struct ntd_context * ctx,uint8_t value,char * name,int driver_location)6416 add_sysvalue(struct ntd_context *ctx,
6417 uint8_t value, char *name,
6418 int driver_location)
6419 {
6420
6421 nir_variable *var = rzalloc(ctx->shader, nir_variable);
6422 if (!var)
6423 return NULL;
6424 var->data.driver_location = driver_location;
6425 var->data.location = value;
6426 var->type = glsl_uint_type();
6427 var->name = name;
6428 var->data.mode = nir_var_system_value;
6429 var->data.interpolation = INTERP_MODE_FLAT;
6430 return var;
6431 }
6432
6433 static bool
append_input_or_sysvalue(struct ntd_context * ctx,int input_loc,int sv_slot,char * name,int driver_location)6434 append_input_or_sysvalue(struct ntd_context *ctx,
6435 int input_loc, int sv_slot,
6436 char *name, int driver_location)
6437 {
6438 if (input_loc >= 0) {
6439 /* Check inputs whether a variable is available the corresponds
6440 * to the sysvalue */
6441 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
6442 if (var->data.location == input_loc) {
6443 ctx->system_value[sv_slot] = var;
6444 return true;
6445 }
6446 }
6447 }
6448
6449 ctx->system_value[sv_slot] = add_sysvalue(ctx, sv_slot, name, driver_location);
6450 if (!ctx->system_value[sv_slot])
6451 return false;
6452
6453 nir_shader_add_variable(ctx->shader, ctx->system_value[sv_slot]);
6454 return true;
6455 }
6456
6457 struct sysvalue_name {
6458 gl_system_value value;
6459 int slot;
6460 char *name;
6461 gl_shader_stage only_in_shader;
6462 } possible_sysvalues[] = {
6463 {SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, -1, "SV_VertexID", MESA_SHADER_NONE},
6464 {SYSTEM_VALUE_INSTANCE_ID, -1, "SV_InstanceID", MESA_SHADER_NONE},
6465 {SYSTEM_VALUE_FRONT_FACE, VARYING_SLOT_FACE, "SV_IsFrontFace", MESA_SHADER_NONE},
6466 {SYSTEM_VALUE_PRIMITIVE_ID, VARYING_SLOT_PRIMITIVE_ID, "SV_PrimitiveID", MESA_SHADER_GEOMETRY},
6467 {SYSTEM_VALUE_SAMPLE_ID, -1, "SV_SampleIndex", MESA_SHADER_NONE},
6468 };
6469
6470 static bool
allocate_sysvalues(struct ntd_context * ctx)6471 allocate_sysvalues(struct ntd_context *ctx)
6472 {
6473 unsigned driver_location = 0;
6474 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in)
6475 driver_location = MAX2(driver_location, var->data.driver_location + 1);
6476 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_system_value)
6477 driver_location = MAX2(driver_location, var->data.driver_location + 1);
6478
6479 if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
6480 !BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID)) {
6481 bool need_sample_id = ctx->shader->info.fs.uses_sample_shading;
6482
6483 /* "var->data.sample = true" sometimes just mean, "I want per-sample
6484 * shading", which explains why we can end up with vars having flat
6485 * interpolation with the per-sample bit set. If there's only such
6486 * type of variables, we need to tell DXIL that we read SV_SampleIndex
6487 * to make DXIL validation happy.
6488 */
6489 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
6490 bool var_can_be_sample_rate = !var->data.centroid && var->data.interpolation != INTERP_MODE_FLAT;
6491 /* If there's an input that will actually force sample-rate shading, then we don't
6492 * need SV_SampleIndex. */
6493 if (var->data.sample && var_can_be_sample_rate) {
6494 need_sample_id = false;
6495 break;
6496 }
6497 /* If there's an input that wants to be sample-rate, but can't be, then we might
6498 * need SV_SampleIndex. */
6499 if (var->data.sample && !var_can_be_sample_rate)
6500 need_sample_id = true;
6501 }
6502
6503 if (need_sample_id)
6504 BITSET_SET(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
6505 }
6506
6507 for (unsigned i = 0; i < ARRAY_SIZE(possible_sysvalues); ++i) {
6508 struct sysvalue_name *info = &possible_sysvalues[i];
6509 if (info->only_in_shader != MESA_SHADER_NONE &&
6510 info->only_in_shader != ctx->shader->info.stage)
6511 continue;
6512 if (BITSET_TEST(ctx->shader->info.system_values_read, info->value)) {
6513 if (!append_input_or_sysvalue(ctx, info->slot,
6514 info->value, info->name,
6515 driver_location++))
6516 return false;
6517 }
6518 }
6519 return true;
6520 }
6521
6522 static int
type_size_vec4(const struct glsl_type * type,bool bindless)6523 type_size_vec4(const struct glsl_type *type, bool bindless)
6524 {
6525 return glsl_count_attribute_slots(type, false);
6526 }
6527
6528 static const unsigned dxil_validator_min_capable_version = DXIL_VALIDATOR_1_4;
6529 static const unsigned dxil_validator_max_capable_version = DXIL_VALIDATOR_1_8;
6530 static const unsigned dxil_min_shader_model = SHADER_MODEL_6_0;
6531 static const unsigned dxil_max_shader_model = SHADER_MODEL_6_8;
6532
6533 bool
nir_to_dxil(struct nir_shader * s,const struct nir_to_dxil_options * opts,const struct dxil_logger * logger,struct blob * blob)6534 nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts,
6535 const struct dxil_logger *logger, struct blob *blob)
6536 {
6537 assert(opts);
6538 bool retval = true;
6539 debug_dxil = (int)debug_get_option_debug_dxil();
6540 blob_init(blob);
6541
6542 if (opts->shader_model_max < dxil_min_shader_model) {
6543 debug_printf("D3D12: cannot support emitting shader models lower than %d.%d\n",
6544 dxil_min_shader_model >> 16,
6545 dxil_min_shader_model & 0xffff);
6546 return false;
6547 }
6548
6549 if (opts->shader_model_max > dxil_max_shader_model) {
6550 debug_printf("D3D12: cannot support emitting higher than shader model %d.%d\n",
6551 dxil_max_shader_model >> 16,
6552 dxil_max_shader_model & 0xffff);
6553 return false;
6554 }
6555
6556 if (opts->validator_version_max != NO_DXIL_VALIDATION &&
6557 opts->validator_version_max < dxil_validator_min_capable_version) {
6558 debug_printf("D3D12: Invalid validator version %d.%d, must be 1.4 or greater\n",
6559 opts->validator_version_max >> 16,
6560 opts->validator_version_max & 0xffff);
6561 return false;
6562 }
6563
6564 /* If no validation, write a blob as if it was going to be validated by the newest understood validator.
6565 * Same if the validator is newer than we know how to write for.
6566 */
6567 uint32_t validator_version =
6568 opts->validator_version_max == NO_DXIL_VALIDATION ||
6569 opts->validator_version_max > dxil_validator_max_capable_version ?
6570 dxil_validator_max_capable_version : opts->validator_version_max;
6571
6572 struct ntd_context *ctx = calloc(1, sizeof(*ctx));
6573 if (!ctx)
6574 return false;
6575
6576 ctx->opts = opts;
6577 ctx->shader = s;
6578 ctx->logger = logger ? logger : &default_logger;
6579
6580 ctx->ralloc_ctx = ralloc_context(NULL);
6581 if (!ctx->ralloc_ctx) {
6582 retval = false;
6583 goto out;
6584 }
6585
6586 util_dynarray_init(&ctx->srv_metadata_nodes, ctx->ralloc_ctx);
6587 util_dynarray_init(&ctx->uav_metadata_nodes, ctx->ralloc_ctx);
6588 util_dynarray_init(&ctx->cbv_metadata_nodes, ctx->ralloc_ctx);
6589 util_dynarray_init(&ctx->sampler_metadata_nodes, ctx->ralloc_ctx);
6590 util_dynarray_init(&ctx->resources, ctx->ralloc_ctx);
6591 dxil_module_init(&ctx->mod, ctx->ralloc_ctx);
6592 ctx->mod.shader_kind = get_dxil_shader_kind(s);
6593 ctx->mod.major_version = 6;
6594 /* Use the highest shader model that's supported and can be validated */
6595 ctx->mod.minor_version =
6596 MIN2(opts->shader_model_max & 0xffff, validator_version & 0xffff);
6597 ctx->mod.major_validator = validator_version >> 16;
6598 ctx->mod.minor_validator = validator_version & 0xffff;
6599
6600 if (s->info.stage <= MESA_SHADER_FRAGMENT) {
6601 uint64_t in_mask =
6602 s->info.stage == MESA_SHADER_VERTEX ?
6603 0 : (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
6604 uint64_t out_mask =
6605 s->info.stage == MESA_SHADER_FRAGMENT ?
6606 ((1ull << FRAG_RESULT_STENCIL) | (1ull << FRAG_RESULT_SAMPLE_MASK)) :
6607 (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
6608
6609 NIR_PASS_V(s, dxil_nir_fix_io_uint_type, in_mask, out_mask);
6610 }
6611
6612 NIR_PASS_V(s, dxil_nir_lower_fquantize2f16);
6613 NIR_PASS_V(s, nir_lower_frexp);
6614 NIR_PASS_V(s, nir_lower_flrp, 16 | 32 | 64, true);
6615 NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32);
6616 NIR_PASS_V(s, dxil_nir_ensure_position_writes);
6617 NIR_PASS_V(s, dxil_nir_lower_system_values);
6618 NIR_PASS_V(s, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_system_value | nir_var_shader_out, NULL, NULL);
6619
6620 /* Do a round of optimization to try to vectorize loads/stores. Otherwise the addresses used for loads
6621 * might be too opaque for the pass to see that they're next to each other. */
6622 optimize_nir(s, opts);
6623
6624 /* Vectorize UBO/SSBO accesses aggressively. This can help increase alignment to enable us to do better
6625 * chunking of loads and stores after lowering bit sizes. Ignore load/store size limitations here, we'll
6626 * address them with lower_mem_access_bit_sizes */
6627 nir_load_store_vectorize_options vectorize_opts = {
6628 .callback = vectorize_filter,
6629 .modes = nir_var_mem_ubo | nir_var_mem_ssbo,
6630 };
6631 NIR_PASS_V(s, nir_opt_load_store_vectorize, &vectorize_opts);
6632
6633 /* Now that they're bloated to the max, address bit size restrictions and overall size limitations for
6634 * a single load/store op. */
6635 struct lower_mem_bit_sizes_data mem_size_data = { s->options, opts };
6636 nir_lower_mem_access_bit_sizes_options mem_size_options = {
6637 .modes = nir_var_mem_ubo | nir_var_mem_ssbo,
6638 .callback = lower_mem_access_bit_sizes_cb,
6639 .may_lower_unaligned_stores_to_atomics = true,
6640 .cb_data = &mem_size_data
6641 };
6642 NIR_PASS_V(s, nir_lower_mem_access_bit_sizes, &mem_size_options);
6643
6644 /* Lastly, conver byte-address UBO loads to vec-addressed. This pass can also deal with selecting sub-
6645 * components from the load and dealing with vec-straddling loads. */
6646 NIR_PASS_V(s, nir_lower_ubo_vec4);
6647
6648 if (opts->shader_model_max < SHADER_MODEL_6_6) {
6649 /* In a later pass, load_helper_invocation will be lowered to sample mask based fallback,
6650 * so both load- and is- will be emulated eventually.
6651 */
6652 NIR_PASS_V(s, nir_lower_is_helper_invocation);
6653 }
6654
6655 if (ctx->mod.shader_kind == DXIL_HULL_SHADER)
6656 NIR_PASS_V(s, dxil_nir_split_tess_ctrl, &ctx->tess_ctrl_patch_constant_func);
6657
6658 if (ctx->mod.shader_kind == DXIL_HULL_SHADER ||
6659 ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
6660 /* Make sure any derefs are gone after lower_io before updating tess level vars */
6661 NIR_PASS_V(s, nir_opt_dce);
6662 NIR_PASS_V(s, dxil_nir_fixup_tess_level_for_domain);
6663 }
6664
6665 optimize_nir(s, opts);
6666
6667 NIR_PASS_V(s, nir_remove_dead_variables,
6668 nir_var_function_temp | nir_var_mem_constant | nir_var_mem_shared, NULL);
6669
6670 if (!allocate_sysvalues(ctx))
6671 return false;
6672
6673 NIR_PASS_V(s, dxil_nir_lower_sysval_to_load_input, ctx->system_value);
6674 NIR_PASS_V(s, nir_opt_dce);
6675
6676 /* This needs to be after any copy prop is done to prevent these movs from being erased */
6677 NIR_PASS_V(s, dxil_nir_move_consts);
6678 NIR_PASS_V(s, nir_opt_dce);
6679
6680 NIR_PASS_V(s, dxil_nir_guess_image_formats);
6681
6682 if (debug_dxil & DXIL_DEBUG_VERBOSE)
6683 nir_print_shader(s, stderr);
6684
6685 if (!emit_module(ctx, opts)) {
6686 debug_printf("D3D12: dxil_container_add_module failed\n");
6687 retval = false;
6688 goto out;
6689 }
6690
6691 if (debug_dxil & DXIL_DEBUG_DUMP_MODULE) {
6692 struct dxil_dumper *dumper = dxil_dump_create();
6693 dxil_dump_module(dumper, &ctx->mod);
6694 fprintf(stderr, "\n");
6695 dxil_dump_buf_to_file(dumper, stderr);
6696 fprintf(stderr, "\n\n");
6697 dxil_dump_free(dumper);
6698 }
6699
6700 struct dxil_container container;
6701 dxil_container_init(&container);
6702 /* Native low precision disables min-precision */
6703 if (ctx->mod.feats.native_low_precision)
6704 ctx->mod.feats.min_precision = false;
6705 if (!dxil_container_add_features(&container, &ctx->mod.feats)) {
6706 debug_printf("D3D12: dxil_container_add_features failed\n");
6707 retval = false;
6708 goto out;
6709 }
6710
6711 if (!dxil_container_add_io_signature(&container,
6712 DXIL_ISG1,
6713 ctx->mod.num_sig_inputs,
6714 ctx->mod.inputs,
6715 ctx->mod.minor_validator >= 7)) {
6716 debug_printf("D3D12: failed to write input signature\n");
6717 retval = false;
6718 goto out;
6719 }
6720
6721 if (!dxil_container_add_io_signature(&container,
6722 DXIL_OSG1,
6723 ctx->mod.num_sig_outputs,
6724 ctx->mod.outputs,
6725 ctx->mod.minor_validator >= 7)) {
6726 debug_printf("D3D12: failed to write output signature\n");
6727 retval = false;
6728 goto out;
6729 }
6730
6731 if ((ctx->mod.shader_kind == DXIL_HULL_SHADER ||
6732 ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) &&
6733 !dxil_container_add_io_signature(&container,
6734 DXIL_PSG1,
6735 ctx->mod.num_sig_patch_consts,
6736 ctx->mod.patch_consts,
6737 ctx->mod.minor_validator >= 7)) {
6738 debug_printf("D3D12: failed to write patch constant signature\n");
6739 retval = false;
6740 goto out;
6741 }
6742
6743 struct dxil_validation_state validation_state;
6744 memset(&validation_state, 0, sizeof(validation_state));
6745 dxil_fill_validation_state(ctx, &validation_state);
6746
6747 if (!dxil_container_add_state_validation(&container,&ctx->mod,
6748 &validation_state)) {
6749 debug_printf("D3D12: failed to write state-validation\n");
6750 retval = false;
6751 goto out;
6752 }
6753
6754 if (!dxil_container_add_module(&container, &ctx->mod)) {
6755 debug_printf("D3D12: failed to write module\n");
6756 retval = false;
6757 goto out;
6758 }
6759
6760 if (!dxil_container_write(&container, blob)) {
6761 debug_printf("D3D12: dxil_container_write failed\n");
6762 retval = false;
6763 goto out;
6764 }
6765 dxil_container_finish(&container);
6766
6767 if (debug_dxil & DXIL_DEBUG_DUMP_BLOB) {
6768 static int shader_id = 0;
6769 char buffer[64];
6770 snprintf(buffer, sizeof(buffer), "shader_%s_%d.blob",
6771 get_shader_kind_str(ctx->mod.shader_kind), shader_id++);
6772 debug_printf("Try to write blob to %s\n", buffer);
6773 FILE *f = fopen(buffer, "wb");
6774 if (f) {
6775 fwrite(blob->data, 1, blob->size, f);
6776 fclose(f);
6777 }
6778 }
6779
6780 out:
6781 dxil_module_release(&ctx->mod);
6782 ralloc_free(ctx->ralloc_ctx);
6783 free(ctx);
6784 return retval;
6785 }
6786