1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_to_dxil.h"
25
26 #include "dxil_container.h"
27 #include "dxil_dump.h"
28 #include "dxil_enums.h"
29 #include "dxil_function.h"
30 #include "dxil_module.h"
31 #include "dxil_nir.h"
32 #include "dxil_signature.h"
33
34 #include "nir/nir_builder.h"
35 #include "nir_deref.h"
36 #include "util/ralloc.h"
37 #include "util/u_debug.h"
38 #include "util/u_dynarray.h"
39 #include "util/u_math.h"
40
41 #include "git_sha1.h"
42
43 #include "vulkan/vulkan_core.h"
44
45 #include <stdint.h>
46
47 int debug_dxil = 0;
48
49 static const struct debug_named_value
50 dxil_debug_options[] = {
51 { "verbose", DXIL_DEBUG_VERBOSE, NULL },
52 { "dump_blob", DXIL_DEBUG_DUMP_BLOB , "Write shader blobs" },
53 { "trace", DXIL_DEBUG_TRACE , "Trace instruction conversion" },
54 { "dump_module", DXIL_DEBUG_DUMP_MODULE, "dump module tree to stderr"},
55 DEBUG_NAMED_VALUE_END
56 };
57
58 DEBUG_GET_ONCE_FLAGS_OPTION(debug_dxil, "DXIL_DEBUG", dxil_debug_options, 0)
59
60 static void
log_nir_instr_unsupported(const struct dxil_logger * logger,const char * message_prefix,const nir_instr * instr)61 log_nir_instr_unsupported(const struct dxil_logger *logger,
62 const char *message_prefix, const nir_instr *instr)
63 {
64 char *msg = NULL;
65 char *instr_str = nir_instr_as_str(instr, NULL);
66 asprintf(&msg, "%s: %s\n", message_prefix, instr_str);
67 ralloc_free(instr_str);
68 assert(msg);
69 logger->log(logger->priv, msg);
70 free(msg);
71 }
72
73 static void
default_logger_func(void * priv,const char * msg)74 default_logger_func(void *priv, const char *msg)
75 {
76 fprintf(stderr, "%s", msg);
77 unreachable("Unhandled error");
78 }
79
80 static const struct dxil_logger default_logger = { .priv = NULL, .log = default_logger_func };
81
82 #define TRACE_CONVERSION(instr) \
83 if (debug_dxil & DXIL_DEBUG_TRACE) \
84 do { \
85 fprintf(stderr, "Convert '"); \
86 nir_print_instr(instr, stderr); \
87 fprintf(stderr, "'\n"); \
88 } while (0)
89
90 static const nir_shader_compiler_options
91 nir_options = {
92 .lower_ineg = true,
93 .lower_fneg = true,
94 .lower_ffma16 = true,
95 .lower_ffma32 = true,
96 .lower_isign = true,
97 .lower_fsign = true,
98 .lower_iabs = true,
99 .lower_fmod = true,
100 .lower_fpow = true,
101 .lower_scmp = true,
102 .lower_ldexp = true,
103 .lower_flrp16 = true,
104 .lower_flrp32 = true,
105 .lower_flrp64 = true,
106 .lower_bitfield_extract = true,
107 .lower_ifind_msb = true,
108 .lower_ufind_msb = true,
109 .lower_extract_word = true,
110 .lower_extract_byte = true,
111 .lower_insert_word = true,
112 .lower_insert_byte = true,
113 .lower_all_io_to_elements = true,
114 .lower_hadd = true,
115 .lower_uadd_sat = true,
116 .lower_usub_sat = true,
117 .lower_iadd_sat = true,
118 .lower_uadd_carry = true,
119 .lower_usub_borrow = true,
120 .lower_mul_high = true,
121 .lower_pack_half_2x16 = true,
122 .lower_pack_unorm_4x8 = true,
123 .lower_pack_snorm_4x8 = true,
124 .lower_pack_snorm_2x16 = true,
125 .lower_pack_unorm_2x16 = true,
126 .lower_pack_64_2x32_split = true,
127 .lower_pack_32_2x16_split = true,
128 .lower_pack_64_4x16 = true,
129 .lower_unpack_64_2x32_split = true,
130 .lower_unpack_32_2x16_split = true,
131 .lower_unpack_half_2x16 = true,
132 .lower_unpack_snorm_2x16 = true,
133 .lower_unpack_snorm_4x8 = true,
134 .lower_unpack_unorm_2x16 = true,
135 .lower_unpack_unorm_4x8 = true,
136 .lower_interpolate_at = true,
137 .has_fsub = true,
138 .has_isub = true,
139 .has_bfe = true,
140 .has_find_msb_rev = true,
141 .vertex_id_zero_based = true,
142 .lower_base_vertex = true,
143 .lower_helper_invocation = true,
144 .has_cs_global_id = true,
145 .lower_mul_2x32_64 = true,
146 .lower_doubles_options =
147 nir_lower_drcp |
148 nir_lower_dsqrt |
149 nir_lower_drsq |
150 nir_lower_dfract |
151 nir_lower_dtrunc |
152 nir_lower_dfloor |
153 nir_lower_dceil |
154 nir_lower_dround_even,
155 .max_unroll_iterations = 32, /* arbitrary */
156 .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out),
157 .lower_device_index_to_zero = true,
158 .linker_ignore_precision = true,
159 .support_16bit_alu = true,
160 .preserve_mediump = true,
161 };
162
163 const nir_shader_compiler_options*
dxil_get_base_nir_compiler_options(void)164 dxil_get_base_nir_compiler_options(void)
165 {
166 return &nir_options;
167 }
168
169 void
dxil_get_nir_compiler_options(nir_shader_compiler_options * options,enum dxil_shader_model shader_model_max,unsigned supported_int_sizes,unsigned supported_float_sizes)170 dxil_get_nir_compiler_options(nir_shader_compiler_options *options,
171 enum dxil_shader_model shader_model_max,
172 unsigned supported_int_sizes,
173 unsigned supported_float_sizes)
174 {
175 *options = nir_options;
176 if (!(supported_int_sizes & 64)) {
177 options->lower_pack_64_2x32_split = false;
178 options->lower_unpack_64_2x32_split = false;
179 options->lower_int64_options = ~0;
180 }
181 if (!(supported_float_sizes & 64))
182 options->lower_doubles_options = ~0;
183 if (shader_model_max >= SHADER_MODEL_6_4) {
184 options->has_sdot_4x8 = true;
185 options->has_udot_4x8 = true;
186 }
187 }
188
189 static bool
emit_llvm_ident(struct dxil_module * m)190 emit_llvm_ident(struct dxil_module *m)
191 {
192 const struct dxil_mdnode *compiler = dxil_get_metadata_string(m, "Mesa version " PACKAGE_VERSION MESA_GIT_SHA1);
193 if (!compiler)
194 return false;
195
196 const struct dxil_mdnode *llvm_ident = dxil_get_metadata_node(m, &compiler, 1);
197 return llvm_ident &&
198 dxil_add_metadata_named_node(m, "llvm.ident", &llvm_ident, 1);
199 }
200
201 static bool
emit_named_version(struct dxil_module * m,const char * name,int major,int minor)202 emit_named_version(struct dxil_module *m, const char *name,
203 int major, int minor)
204 {
205 const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, major);
206 const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, minor);
207 const struct dxil_mdnode *version_nodes[] = { major_node, minor_node };
208 const struct dxil_mdnode *version = dxil_get_metadata_node(m, version_nodes,
209 ARRAY_SIZE(version_nodes));
210 return dxil_add_metadata_named_node(m, name, &version, 1);
211 }
212
213 static const char *
get_shader_kind_str(enum dxil_shader_kind kind)214 get_shader_kind_str(enum dxil_shader_kind kind)
215 {
216 switch (kind) {
217 case DXIL_PIXEL_SHADER:
218 return "ps";
219 case DXIL_VERTEX_SHADER:
220 return "vs";
221 case DXIL_GEOMETRY_SHADER:
222 return "gs";
223 case DXIL_HULL_SHADER:
224 return "hs";
225 case DXIL_DOMAIN_SHADER:
226 return "ds";
227 case DXIL_COMPUTE_SHADER:
228 return "cs";
229 default:
230 unreachable("invalid shader kind");
231 }
232 }
233
234 static bool
emit_dx_shader_model(struct dxil_module * m)235 emit_dx_shader_model(struct dxil_module *m)
236 {
237 const struct dxil_mdnode *type_node = dxil_get_metadata_string(m, get_shader_kind_str(m->shader_kind));
238 const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, m->major_version);
239 const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, m->minor_version);
240 const struct dxil_mdnode *shader_model[] = { type_node, major_node,
241 minor_node };
242 const struct dxil_mdnode *dx_shader_model = dxil_get_metadata_node(m, shader_model, ARRAY_SIZE(shader_model));
243
244 return dxil_add_metadata_named_node(m, "dx.shaderModel",
245 &dx_shader_model, 1);
246 }
247
248 enum {
249 DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG = 0,
250 DXIL_STRUCTURED_BUFFER_ELEMENT_STRIDE_TAG = 1
251 };
252
253 enum dxil_intr {
254 DXIL_INTR_LOAD_INPUT = 4,
255 DXIL_INTR_STORE_OUTPUT = 5,
256 DXIL_INTR_FABS = 6,
257 DXIL_INTR_SATURATE = 7,
258
259 DXIL_INTR_ISFINITE = 10,
260 DXIL_INTR_ISNORMAL = 11,
261
262 DXIL_INTR_FCOS = 12,
263 DXIL_INTR_FSIN = 13,
264
265 DXIL_INTR_FEXP2 = 21,
266 DXIL_INTR_FRC = 22,
267 DXIL_INTR_FLOG2 = 23,
268
269 DXIL_INTR_SQRT = 24,
270 DXIL_INTR_RSQRT = 25,
271 DXIL_INTR_ROUND_NE = 26,
272 DXIL_INTR_ROUND_NI = 27,
273 DXIL_INTR_ROUND_PI = 28,
274 DXIL_INTR_ROUND_Z = 29,
275
276 DXIL_INTR_BFREV = 30,
277 DXIL_INTR_COUNTBITS = 31,
278 DXIL_INTR_FIRSTBIT_LO = 32,
279 DXIL_INTR_FIRSTBIT_HI = 33,
280 DXIL_INTR_FIRSTBIT_SHI = 34,
281
282 DXIL_INTR_FMAX = 35,
283 DXIL_INTR_FMIN = 36,
284 DXIL_INTR_IMAX = 37,
285 DXIL_INTR_IMIN = 38,
286 DXIL_INTR_UMAX = 39,
287 DXIL_INTR_UMIN = 40,
288
289 DXIL_INTR_FMA = 47,
290
291 DXIL_INTR_IBFE = 51,
292 DXIL_INTR_UBFE = 52,
293 DXIL_INTR_BFI = 53,
294
295 DXIL_INTR_CREATE_HANDLE = 57,
296 DXIL_INTR_CBUFFER_LOAD_LEGACY = 59,
297
298 DXIL_INTR_SAMPLE = 60,
299 DXIL_INTR_SAMPLE_BIAS = 61,
300 DXIL_INTR_SAMPLE_LEVEL = 62,
301 DXIL_INTR_SAMPLE_GRAD = 63,
302 DXIL_INTR_SAMPLE_CMP = 64,
303 DXIL_INTR_SAMPLE_CMP_LVL_ZERO = 65,
304
305 DXIL_INTR_TEXTURE_LOAD = 66,
306 DXIL_INTR_TEXTURE_STORE = 67,
307
308 DXIL_INTR_BUFFER_LOAD = 68,
309 DXIL_INTR_BUFFER_STORE = 69,
310
311 DXIL_INTR_TEXTURE_SIZE = 72,
312 DXIL_INTR_TEXTURE_GATHER = 73,
313 DXIL_INTR_TEXTURE_GATHER_CMP = 74,
314
315 DXIL_INTR_TEXTURE2DMS_GET_SAMPLE_POSITION = 75,
316 DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION = 76,
317 DXIL_INTR_RENDER_TARGET_GET_SAMPLE_COUNT = 77,
318
319 DXIL_INTR_ATOMIC_BINOP = 78,
320 DXIL_INTR_ATOMIC_CMPXCHG = 79,
321 DXIL_INTR_BARRIER = 80,
322 DXIL_INTR_TEXTURE_LOD = 81,
323
324 DXIL_INTR_DISCARD = 82,
325 DXIL_INTR_DDX_COARSE = 83,
326 DXIL_INTR_DDY_COARSE = 84,
327 DXIL_INTR_DDX_FINE = 85,
328 DXIL_INTR_DDY_FINE = 86,
329
330 DXIL_INTR_EVAL_SNAPPED = 87,
331 DXIL_INTR_EVAL_SAMPLE_INDEX = 88,
332 DXIL_INTR_EVAL_CENTROID = 89,
333
334 DXIL_INTR_SAMPLE_INDEX = 90,
335 DXIL_INTR_COVERAGE = 91,
336
337 DXIL_INTR_THREAD_ID = 93,
338 DXIL_INTR_GROUP_ID = 94,
339 DXIL_INTR_THREAD_ID_IN_GROUP = 95,
340 DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP = 96,
341
342 DXIL_INTR_EMIT_STREAM = 97,
343 DXIL_INTR_CUT_STREAM = 98,
344
345 DXIL_INTR_GS_INSTANCE_ID = 100,
346
347 DXIL_INTR_MAKE_DOUBLE = 101,
348 DXIL_INTR_SPLIT_DOUBLE = 102,
349
350 DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT = 103,
351 DXIL_INTR_LOAD_PATCH_CONSTANT = 104,
352 DXIL_INTR_DOMAIN_LOCATION = 105,
353 DXIL_INTR_STORE_PATCH_CONSTANT = 106,
354 DXIL_INTR_OUTPUT_CONTROL_POINT_ID = 107,
355 DXIL_INTR_PRIMITIVE_ID = 108,
356
357 DXIL_INTR_WAVE_IS_FIRST_LANE = 110,
358 DXIL_INTR_WAVE_GET_LANE_INDEX = 111,
359 DXIL_INTR_WAVE_GET_LANE_COUNT = 112,
360 DXIL_INTR_WAVE_ANY_TRUE = 113,
361 DXIL_INTR_WAVE_ALL_TRUE = 114,
362 DXIL_INTR_WAVE_ACTIVE_ALL_EQUAL = 115,
363 DXIL_INTR_WAVE_ACTIVE_BALLOT = 116,
364 DXIL_INTR_WAVE_READ_LANE_AT = 117,
365 DXIL_INTR_WAVE_READ_LANE_FIRST = 118,
366 DXIL_INTR_WAVE_ACTIVE_OP = 119,
367 DXIL_INTR_WAVE_ACTIVE_BIT = 120,
368 DXIL_INTR_WAVE_PREFIX_OP = 121,
369 DXIL_INTR_QUAD_READ_LANE_AT = 122,
370 DXIL_INTR_QUAD_OP = 123,
371
372 DXIL_INTR_LEGACY_F32TOF16 = 130,
373 DXIL_INTR_LEGACY_F16TOF32 = 131,
374
375 DXIL_INTR_ATTRIBUTE_AT_VERTEX = 137,
376 DXIL_INTR_VIEW_ID = 138,
377
378 DXIL_INTR_RAW_BUFFER_LOAD = 139,
379 DXIL_INTR_RAW_BUFFER_STORE = 140,
380
381 DXIL_INTR_DOT4_ADD_I8_PACKED = 163,
382 DXIL_INTR_DOT4_ADD_U8_PACKED = 164,
383
384 DXIL_INTR_ANNOTATE_HANDLE = 216,
385 DXIL_INTR_CREATE_HANDLE_FROM_BINDING = 217,
386 DXIL_INTR_CREATE_HANDLE_FROM_HEAP = 218,
387
388 DXIL_INTR_IS_HELPER_LANE = 221,
389 DXIL_INTR_SAMPLE_CMP_LEVEL = 224,
390 DXIL_INTR_SAMPLE_CMP_GRAD = 254,
391 DXIL_INTR_SAMPLE_CMP_BIAS = 255,
392 };
393
394 enum dxil_atomic_op {
395 DXIL_ATOMIC_ADD = 0,
396 DXIL_ATOMIC_AND = 1,
397 DXIL_ATOMIC_OR = 2,
398 DXIL_ATOMIC_XOR = 3,
399 DXIL_ATOMIC_IMIN = 4,
400 DXIL_ATOMIC_IMAX = 5,
401 DXIL_ATOMIC_UMIN = 6,
402 DXIL_ATOMIC_UMAX = 7,
403 DXIL_ATOMIC_EXCHANGE = 8,
404 };
405
406 static enum dxil_atomic_op
nir_atomic_to_dxil_atomic(nir_atomic_op op)407 nir_atomic_to_dxil_atomic(nir_atomic_op op)
408 {
409 switch (op) {
410 case nir_atomic_op_iadd: return DXIL_ATOMIC_ADD;
411 case nir_atomic_op_iand: return DXIL_ATOMIC_AND;
412 case nir_atomic_op_ior: return DXIL_ATOMIC_OR;
413 case nir_atomic_op_ixor: return DXIL_ATOMIC_XOR;
414 case nir_atomic_op_imin: return DXIL_ATOMIC_IMIN;
415 case nir_atomic_op_imax: return DXIL_ATOMIC_IMAX;
416 case nir_atomic_op_umin: return DXIL_ATOMIC_UMIN;
417 case nir_atomic_op_umax: return DXIL_ATOMIC_UMAX;
418 case nir_atomic_op_xchg: return DXIL_ATOMIC_EXCHANGE;
419 default: unreachable("Unsupported atomic op");
420 }
421 }
422
423 static enum dxil_rmw_op
nir_atomic_to_dxil_rmw(nir_atomic_op op)424 nir_atomic_to_dxil_rmw(nir_atomic_op op)
425 {
426 switch (op) {
427 case nir_atomic_op_iadd: return DXIL_RMWOP_ADD;
428 case nir_atomic_op_iand: return DXIL_RMWOP_AND;
429 case nir_atomic_op_ior: return DXIL_RMWOP_OR;
430 case nir_atomic_op_ixor: return DXIL_RMWOP_XOR;
431 case nir_atomic_op_imin: return DXIL_RMWOP_MIN;
432 case nir_atomic_op_imax: return DXIL_RMWOP_MAX;
433 case nir_atomic_op_umin: return DXIL_RMWOP_UMIN;
434 case nir_atomic_op_umax: return DXIL_RMWOP_UMAX;
435 case nir_atomic_op_xchg: return DXIL_RMWOP_XCHG;
436 default: unreachable("Unsupported atomic op");
437 }
438 }
439
440 typedef struct {
441 unsigned id;
442 unsigned binding;
443 unsigned size;
444 unsigned space;
445 } resource_array_layout;
446
447 static void
fill_resource_metadata(struct dxil_module * m,const struct dxil_mdnode ** fields,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout)448 fill_resource_metadata(struct dxil_module *m, const struct dxil_mdnode **fields,
449 const struct dxil_type *struct_type,
450 const char *name, const resource_array_layout *layout)
451 {
452 const struct dxil_type *pointer_type = dxil_module_get_pointer_type(m, struct_type);
453 const struct dxil_value *pointer_undef = dxil_module_get_undef(m, pointer_type);
454
455 fields[0] = dxil_get_metadata_int32(m, layout->id); // resource ID
456 fields[1] = dxil_get_metadata_value(m, pointer_type, pointer_undef); // global constant symbol
457 fields[2] = dxil_get_metadata_string(m, name ? name : ""); // name
458 fields[3] = dxil_get_metadata_int32(m, layout->space); // space ID
459 fields[4] = dxil_get_metadata_int32(m, layout->binding); // lower bound
460 fields[5] = dxil_get_metadata_int32(m, layout->size); // range size
461 }
462
463 static const struct dxil_mdnode *
emit_srv_metadata(struct dxil_module * m,const struct dxil_type * elem_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind)464 emit_srv_metadata(struct dxil_module *m, const struct dxil_type *elem_type,
465 const char *name, const resource_array_layout *layout,
466 enum dxil_component_type comp_type,
467 enum dxil_resource_kind res_kind)
468 {
469 const struct dxil_mdnode *fields[9];
470
471 const struct dxil_mdnode *metadata_tag_nodes[2];
472
473 fill_resource_metadata(m, fields, elem_type, name, layout);
474 fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
475 fields[7] = dxil_get_metadata_int1(m, 0); // sample count
476 if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
477 res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
478 metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
479 metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
480 fields[8] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
481 } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
482 fields[8] = NULL;
483 else
484 unreachable("Structured buffers not supported yet");
485
486 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
487 }
488
489 static const struct dxil_mdnode *
emit_uav_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind,enum gl_access_qualifier access)490 emit_uav_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
491 const char *name, const resource_array_layout *layout,
492 enum dxil_component_type comp_type,
493 enum dxil_resource_kind res_kind,
494 enum gl_access_qualifier access)
495 {
496 const struct dxil_mdnode *fields[11];
497
498 const struct dxil_mdnode *metadata_tag_nodes[2];
499
500 fill_resource_metadata(m, fields, struct_type, name, layout);
501 fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
502 fields[7] = dxil_get_metadata_int1(m, (access & ACCESS_COHERENT) != 0); // globally-coherent
503 fields[8] = dxil_get_metadata_int1(m, false); // has counter
504 fields[9] = dxil_get_metadata_int1(m, false); // is ROV
505 if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
506 res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
507 metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
508 metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
509 fields[10] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
510 } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
511 fields[10] = NULL;
512 else
513 unreachable("Structured buffers not supported yet");
514
515 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
516 }
517
518 static const struct dxil_mdnode *
emit_cbv_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,unsigned size)519 emit_cbv_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
520 const char *name, const resource_array_layout *layout,
521 unsigned size)
522 {
523 const struct dxil_mdnode *fields[8];
524
525 fill_resource_metadata(m, fields, struct_type, name, layout);
526 fields[6] = dxil_get_metadata_int32(m, size); // constant buffer size
527 fields[7] = NULL; // metadata
528
529 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
530 }
531
532 static const struct dxil_mdnode *
emit_sampler_metadata(struct dxil_module * m,const struct dxil_type * struct_type,nir_variable * var,const resource_array_layout * layout)533 emit_sampler_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
534 nir_variable *var, const resource_array_layout *layout)
535 {
536 const struct dxil_mdnode *fields[8];
537 const struct glsl_type *type = glsl_without_array(var->type);
538
539 fill_resource_metadata(m, fields, struct_type, var->name, layout);
540 enum dxil_sampler_kind sampler_kind = glsl_sampler_type_is_shadow(type) ?
541 DXIL_SAMPLER_KIND_COMPARISON : DXIL_SAMPLER_KIND_DEFAULT;
542 fields[6] = dxil_get_metadata_int32(m, sampler_kind); // sampler kind
543 fields[7] = NULL; // metadata
544
545 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
546 }
547
548
549 #define MAX_SRVS 128
550 #define MAX_UAVS 64
551 #define MAX_CBVS 64 // ??
552 #define MAX_SAMPLERS 64 // ??
553
554 struct dxil_def {
555 const struct dxil_value *chans[NIR_MAX_VEC_COMPONENTS];
556 };
557
558 struct ntd_context {
559 void *ralloc_ctx;
560 const struct nir_to_dxil_options *opts;
561 struct nir_shader *shader;
562
563 struct dxil_module mod;
564
565 struct util_dynarray srv_metadata_nodes;
566 const struct dxil_value *srv_handles[MAX_SRVS];
567
568 struct util_dynarray uav_metadata_nodes;
569 const struct dxil_value *ssbo_handles[MAX_UAVS];
570 const struct dxil_value *image_handles[MAX_UAVS];
571 uint32_t num_uavs;
572
573 struct util_dynarray cbv_metadata_nodes;
574 const struct dxil_value *cbv_handles[MAX_CBVS];
575
576 struct util_dynarray sampler_metadata_nodes;
577 const struct dxil_value *sampler_handles[MAX_SAMPLERS];
578
579 struct util_dynarray resources;
580
581 const struct dxil_mdnode *shader_property_nodes[6];
582 size_t num_shader_property_nodes;
583
584 struct dxil_def *defs;
585 unsigned num_defs;
586 struct hash_table *phis;
587
588 const struct dxil_value **sharedvars;
589 const struct dxil_value **scratchvars;
590 const struct dxil_value **consts;
591
592 nir_variable *ps_front_face;
593 nir_variable *system_value[SYSTEM_VALUE_MAX];
594
595 nir_function *tess_ctrl_patch_constant_func;
596 unsigned tess_input_control_point_count;
597
598 struct dxil_func_def *main_func_def;
599 struct dxil_func_def *tess_ctrl_patch_constant_func_def;
600 unsigned unnamed_ubo_count;
601
602 BITSET_WORD *float_types;
603 BITSET_WORD *int_types;
604
605 const struct dxil_logger *logger;
606 };
607
608 static const char*
unary_func_name(enum dxil_intr intr)609 unary_func_name(enum dxil_intr intr)
610 {
611 switch (intr) {
612 case DXIL_INTR_COUNTBITS:
613 case DXIL_INTR_FIRSTBIT_HI:
614 case DXIL_INTR_FIRSTBIT_SHI:
615 case DXIL_INTR_FIRSTBIT_LO:
616 return "dx.op.unaryBits";
617 case DXIL_INTR_ISFINITE:
618 case DXIL_INTR_ISNORMAL:
619 return "dx.op.isSpecialFloat";
620 default:
621 return "dx.op.unary";
622 }
623 }
624
625 static const struct dxil_value *
emit_unary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0)626 emit_unary_call(struct ntd_context *ctx, enum overload_type overload,
627 enum dxil_intr intr,
628 const struct dxil_value *op0)
629 {
630 const struct dxil_func *func = dxil_get_function(&ctx->mod,
631 unary_func_name(intr),
632 overload);
633 if (!func)
634 return NULL;
635
636 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
637 if (!opcode)
638 return NULL;
639
640 const struct dxil_value *args[] = {
641 opcode,
642 op0
643 };
644
645 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
646 }
647
648 static const struct dxil_value *
emit_binary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)649 emit_binary_call(struct ntd_context *ctx, enum overload_type overload,
650 enum dxil_intr intr,
651 const struct dxil_value *op0, const struct dxil_value *op1)
652 {
653 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.binary", overload);
654 if (!func)
655 return NULL;
656
657 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
658 if (!opcode)
659 return NULL;
660
661 const struct dxil_value *args[] = {
662 opcode,
663 op0,
664 op1
665 };
666
667 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
668 }
669
670 static const struct dxil_value *
emit_tertiary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)671 emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload,
672 enum dxil_intr intr,
673 const struct dxil_value *op0,
674 const struct dxil_value *op1,
675 const struct dxil_value *op2)
676 {
677 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.tertiary", overload);
678 if (!func)
679 return NULL;
680
681 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
682 if (!opcode)
683 return NULL;
684
685 const struct dxil_value *args[] = {
686 opcode,
687 op0,
688 op1,
689 op2
690 };
691
692 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
693 }
694
695 static const struct dxil_value *
emit_quaternary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2,const struct dxil_value * op3)696 emit_quaternary_call(struct ntd_context *ctx, enum overload_type overload,
697 enum dxil_intr intr,
698 const struct dxil_value *op0,
699 const struct dxil_value *op1,
700 const struct dxil_value *op2,
701 const struct dxil_value *op3)
702 {
703 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quaternary", overload);
704 if (!func)
705 return NULL;
706
707 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
708 if (!opcode)
709 return NULL;
710
711 const struct dxil_value *args[] = {
712 opcode,
713 op0,
714 op1,
715 op2,
716 op3
717 };
718
719 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
720 }
721
722 static const struct dxil_value *
emit_threadid_call(struct ntd_context * ctx,const struct dxil_value * comp)723 emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp)
724 {
725 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadId", DXIL_I32);
726 if (!func)
727 return NULL;
728
729 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
730 DXIL_INTR_THREAD_ID);
731 if (!opcode)
732 return NULL;
733
734 const struct dxil_value *args[] = {
735 opcode,
736 comp
737 };
738
739 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
740 }
741
742 static const struct dxil_value *
emit_threadidingroup_call(struct ntd_context * ctx,const struct dxil_value * comp)743 emit_threadidingroup_call(struct ntd_context *ctx,
744 const struct dxil_value *comp)
745 {
746 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadIdInGroup", DXIL_I32);
747
748 if (!func)
749 return NULL;
750
751 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
752 DXIL_INTR_THREAD_ID_IN_GROUP);
753 if (!opcode)
754 return NULL;
755
756 const struct dxil_value *args[] = {
757 opcode,
758 comp
759 };
760
761 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
762 }
763
764 static const struct dxil_value *
emit_flattenedthreadidingroup_call(struct ntd_context * ctx)765 emit_flattenedthreadidingroup_call(struct ntd_context *ctx)
766 {
767 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.flattenedThreadIdInGroup", DXIL_I32);
768
769 if (!func)
770 return NULL;
771
772 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
773 DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP);
774 if (!opcode)
775 return NULL;
776
777 const struct dxil_value *args[] = {
778 opcode
779 };
780
781 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
782 }
783
784 static const struct dxil_value *
emit_groupid_call(struct ntd_context * ctx,const struct dxil_value * comp)785 emit_groupid_call(struct ntd_context *ctx, const struct dxil_value *comp)
786 {
787 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.groupId", DXIL_I32);
788
789 if (!func)
790 return NULL;
791
792 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
793 DXIL_INTR_GROUP_ID);
794 if (!opcode)
795 return NULL;
796
797 const struct dxil_value *args[] = {
798 opcode,
799 comp
800 };
801
802 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
803 }
804
805 static const struct dxil_value *
emit_raw_bufferload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],enum overload_type overload,unsigned component_count,unsigned alignment)806 emit_raw_bufferload_call(struct ntd_context *ctx,
807 const struct dxil_value *handle,
808 const struct dxil_value *coord[2],
809 enum overload_type overload,
810 unsigned component_count,
811 unsigned alignment)
812 {
813 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.rawBufferLoad", overload);
814 if (!func)
815 return NULL;
816
817 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
818 DXIL_INTR_RAW_BUFFER_LOAD);
819 const struct dxil_value *args[] = {
820 opcode, handle, coord[0], coord[1],
821 dxil_module_get_int8_const(&ctx->mod, (1 << component_count) - 1),
822 dxil_module_get_int32_const(&ctx->mod, alignment),
823 };
824
825 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
826 }
827
828 static const struct dxil_value *
emit_bufferload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],enum overload_type overload)829 emit_bufferload_call(struct ntd_context *ctx,
830 const struct dxil_value *handle,
831 const struct dxil_value *coord[2],
832 enum overload_type overload)
833 {
834 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferLoad", overload);
835 if (!func)
836 return NULL;
837
838 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
839 DXIL_INTR_BUFFER_LOAD);
840 const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1] };
841
842 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
843 }
844
845 static bool
emit_raw_bufferstore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload,unsigned alignment)846 emit_raw_bufferstore_call(struct ntd_context *ctx,
847 const struct dxil_value *handle,
848 const struct dxil_value *coord[2],
849 const struct dxil_value *value[4],
850 const struct dxil_value *write_mask,
851 enum overload_type overload,
852 unsigned alignment)
853 {
854 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.rawBufferStore", overload);
855
856 if (!func)
857 return false;
858
859 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
860 DXIL_INTR_RAW_BUFFER_STORE);
861 const struct dxil_value *args[] = {
862 opcode, handle, coord[0], coord[1],
863 value[0], value[1], value[2], value[3],
864 write_mask,
865 dxil_module_get_int32_const(&ctx->mod, alignment),
866 };
867
868 return dxil_emit_call_void(&ctx->mod, func,
869 args, ARRAY_SIZE(args));
870 }
871
872 static bool
emit_bufferstore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)873 emit_bufferstore_call(struct ntd_context *ctx,
874 const struct dxil_value *handle,
875 const struct dxil_value *coord[2],
876 const struct dxil_value *value[4],
877 const struct dxil_value *write_mask,
878 enum overload_type overload)
879 {
880 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferStore", overload);
881
882 if (!func)
883 return false;
884
885 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
886 DXIL_INTR_BUFFER_STORE);
887 const struct dxil_value *args[] = {
888 opcode, handle, coord[0], coord[1],
889 value[0], value[1], value[2], value[3],
890 write_mask
891 };
892
893 return dxil_emit_call_void(&ctx->mod, func,
894 args, ARRAY_SIZE(args));
895 }
896
897 static const struct dxil_value *
emit_textureload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],enum overload_type overload)898 emit_textureload_call(struct ntd_context *ctx,
899 const struct dxil_value *handle,
900 const struct dxil_value *coord[3],
901 enum overload_type overload)
902 {
903 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", overload);
904 if (!func)
905 return NULL;
906 const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
907 const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
908
909 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
910 DXIL_INTR_TEXTURE_LOAD);
911 const struct dxil_value *args[] = { opcode, handle,
912 /*lod_or_sample*/ int_undef,
913 coord[0], coord[1], coord[2],
914 /* offsets */ int_undef, int_undef, int_undef};
915
916 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
917 }
918
919 static bool
emit_texturestore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)920 emit_texturestore_call(struct ntd_context *ctx,
921 const struct dxil_value *handle,
922 const struct dxil_value *coord[3],
923 const struct dxil_value *value[4],
924 const struct dxil_value *write_mask,
925 enum overload_type overload)
926 {
927 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureStore", overload);
928
929 if (!func)
930 return false;
931
932 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
933 DXIL_INTR_TEXTURE_STORE);
934 const struct dxil_value *args[] = {
935 opcode, handle, coord[0], coord[1], coord[2],
936 value[0], value[1], value[2], value[3],
937 write_mask
938 };
939
940 return dxil_emit_call_void(&ctx->mod, func,
941 args, ARRAY_SIZE(args));
942 }
943
944 static const struct dxil_value *
emit_atomic_binop(struct ntd_context * ctx,const struct dxil_value * handle,enum dxil_atomic_op atomic_op,const struct dxil_value * coord[3],const struct dxil_value * value)945 emit_atomic_binop(struct ntd_context *ctx,
946 const struct dxil_value *handle,
947 enum dxil_atomic_op atomic_op,
948 const struct dxil_value *coord[3],
949 const struct dxil_value *value)
950 {
951 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicBinOp", DXIL_I32);
952
953 if (!func)
954 return false;
955
956 const struct dxil_value *opcode =
957 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_BINOP);
958 const struct dxil_value *atomic_op_value =
959 dxil_module_get_int32_const(&ctx->mod, atomic_op);
960 const struct dxil_value *args[] = {
961 opcode, handle, atomic_op_value,
962 coord[0], coord[1], coord[2], value
963 };
964
965 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
966 }
967
968 static const struct dxil_value *
emit_atomic_cmpxchg(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * cmpval,const struct dxil_value * newval)969 emit_atomic_cmpxchg(struct ntd_context *ctx,
970 const struct dxil_value *handle,
971 const struct dxil_value *coord[3],
972 const struct dxil_value *cmpval,
973 const struct dxil_value *newval)
974 {
975 const struct dxil_func *func =
976 dxil_get_function(&ctx->mod, "dx.op.atomicCompareExchange", DXIL_I32);
977
978 if (!func)
979 return false;
980
981 const struct dxil_value *opcode =
982 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_CMPXCHG);
983 const struct dxil_value *args[] = {
984 opcode, handle, coord[0], coord[1], coord[2], cmpval, newval
985 };
986
987 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
988 }
989
990 static const struct dxil_value *
emit_createhandle_call_pre_6_6(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)991 emit_createhandle_call_pre_6_6(struct ntd_context *ctx,
992 enum dxil_resource_class resource_class,
993 unsigned lower_bound,
994 unsigned upper_bound,
995 unsigned space,
996 unsigned resource_range_id,
997 const struct dxil_value *resource_range_index,
998 bool non_uniform_resource_index)
999 {
1000 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE);
1001 const struct dxil_value *resource_class_value = dxil_module_get_int8_const(&ctx->mod, resource_class);
1002 const struct dxil_value *resource_range_id_value = dxil_module_get_int32_const(&ctx->mod, resource_range_id);
1003 const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1004 if (!opcode || !resource_class_value || !resource_range_id_value ||
1005 !non_uniform_resource_index_value)
1006 return NULL;
1007
1008 const struct dxil_value *args[] = {
1009 opcode,
1010 resource_class_value,
1011 resource_range_id_value,
1012 resource_range_index,
1013 non_uniform_resource_index_value
1014 };
1015
1016 const struct dxil_func *func =
1017 dxil_get_function(&ctx->mod, "dx.op.createHandle", DXIL_NONE);
1018
1019 if (!func)
1020 return NULL;
1021
1022 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1023 }
1024
1025 static const struct dxil_value *
emit_annotate_handle(struct ntd_context * ctx,const struct dxil_value * unannotated_handle,const struct dxil_value * res_props)1026 emit_annotate_handle(struct ntd_context *ctx,
1027 const struct dxil_value *unannotated_handle,
1028 const struct dxil_value *res_props)
1029 {
1030 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ANNOTATE_HANDLE);
1031 if (!opcode)
1032 return NULL;
1033
1034 const struct dxil_value *args[] = {
1035 opcode,
1036 unannotated_handle,
1037 res_props
1038 };
1039
1040 const struct dxil_func *func =
1041 dxil_get_function(&ctx->mod, "dx.op.annotateHandle", DXIL_NONE);
1042
1043 if (!func)
1044 return NULL;
1045
1046 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1047 }
1048
1049 static const struct dxil_value *
emit_annotate_handle_from_metadata(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned resource_range_id,const struct dxil_value * unannotated_handle)1050 emit_annotate_handle_from_metadata(struct ntd_context *ctx,
1051 enum dxil_resource_class resource_class,
1052 unsigned resource_range_id,
1053 const struct dxil_value *unannotated_handle)
1054 {
1055
1056 const struct util_dynarray *mdnodes;
1057 switch (resource_class) {
1058 case DXIL_RESOURCE_CLASS_SRV:
1059 mdnodes = &ctx->srv_metadata_nodes;
1060 break;
1061 case DXIL_RESOURCE_CLASS_UAV:
1062 mdnodes = &ctx->uav_metadata_nodes;
1063 break;
1064 case DXIL_RESOURCE_CLASS_CBV:
1065 mdnodes = &ctx->cbv_metadata_nodes;
1066 break;
1067 case DXIL_RESOURCE_CLASS_SAMPLER:
1068 mdnodes = &ctx->sampler_metadata_nodes;
1069 break;
1070 default:
1071 unreachable("Invalid resource class");
1072 }
1073
1074 const struct dxil_mdnode *mdnode = *util_dynarray_element(mdnodes, const struct dxil_mdnode *, resource_range_id);
1075 const struct dxil_value *res_props = dxil_module_get_res_props_const(&ctx->mod, resource_class, mdnode);
1076 if (!res_props)
1077 return NULL;
1078
1079 return emit_annotate_handle(ctx, unannotated_handle, res_props);
1080 }
1081
1082 static const struct dxil_value *
emit_createhandle_and_annotate(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1083 emit_createhandle_and_annotate(struct ntd_context *ctx,
1084 enum dxil_resource_class resource_class,
1085 unsigned lower_bound,
1086 unsigned upper_bound,
1087 unsigned space,
1088 unsigned resource_range_id,
1089 const struct dxil_value *resource_range_index,
1090 bool non_uniform_resource_index)
1091 {
1092 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE_FROM_BINDING);
1093 const struct dxil_value *res_bind = dxil_module_get_res_bind_const(&ctx->mod, lower_bound, upper_bound, space, resource_class);
1094 const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1095 if (!opcode || !res_bind || !non_uniform_resource_index_value)
1096 return NULL;
1097
1098 const struct dxil_value *args[] = {
1099 opcode,
1100 res_bind,
1101 resource_range_index,
1102 non_uniform_resource_index_value
1103 };
1104
1105 const struct dxil_func *func =
1106 dxil_get_function(&ctx->mod, "dx.op.createHandleFromBinding", DXIL_NONE);
1107
1108 if (!func)
1109 return NULL;
1110
1111 const struct dxil_value *unannotated_handle = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1112 if (!unannotated_handle)
1113 return NULL;
1114
1115 return emit_annotate_handle_from_metadata(ctx, resource_class, resource_range_id, unannotated_handle);
1116 }
1117
1118 static const struct dxil_value *
emit_createhandle_call(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1119 emit_createhandle_call(struct ntd_context *ctx,
1120 enum dxil_resource_class resource_class,
1121 unsigned lower_bound,
1122 unsigned upper_bound,
1123 unsigned space,
1124 unsigned resource_range_id,
1125 const struct dxil_value *resource_range_index,
1126 bool non_uniform_resource_index)
1127 {
1128 if (ctx->mod.minor_version < 6)
1129 return emit_createhandle_call_pre_6_6(ctx, resource_class, lower_bound, upper_bound, space, resource_range_id, resource_range_index, non_uniform_resource_index);
1130 else
1131 return emit_createhandle_and_annotate(ctx, resource_class, lower_bound, upper_bound, space, resource_range_id, resource_range_index, non_uniform_resource_index);
1132 }
1133
1134 static const struct dxil_value *
emit_createhandle_call_const_index(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,unsigned resource_range_index,bool non_uniform_resource_index)1135 emit_createhandle_call_const_index(struct ntd_context *ctx,
1136 enum dxil_resource_class resource_class,
1137 unsigned lower_bound,
1138 unsigned upper_bound,
1139 unsigned space,
1140 unsigned resource_range_id,
1141 unsigned resource_range_index,
1142 bool non_uniform_resource_index)
1143 {
1144
1145 const struct dxil_value *resource_range_index_value = dxil_module_get_int32_const(&ctx->mod, resource_range_index);
1146 if (!resource_range_index_value)
1147 return NULL;
1148
1149 return emit_createhandle_call(ctx, resource_class, lower_bound, upper_bound, space,
1150 resource_range_id, resource_range_index_value,
1151 non_uniform_resource_index);
1152 }
1153
1154 static const struct dxil_value *
emit_createhandle_heap(struct ntd_context * ctx,const struct dxil_value * resource_range_index,bool is_sampler,bool non_uniform_resource_index)1155 emit_createhandle_heap(struct ntd_context *ctx,
1156 const struct dxil_value *resource_range_index,
1157 bool is_sampler,
1158 bool non_uniform_resource_index)
1159 {
1160 if (is_sampler)
1161 ctx->mod.feats.sampler_descriptor_heap_indexing = true;
1162 else
1163 ctx->mod.feats.resource_descriptor_heap_indexing = true;
1164
1165 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE_FROM_HEAP);
1166 const struct dxil_value *sampler = dxil_module_get_int1_const(&ctx->mod, is_sampler);
1167 const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1168 if (!opcode || !sampler || !non_uniform_resource_index_value)
1169 return NULL;
1170
1171 const struct dxil_value *args[] = {
1172 opcode,
1173 resource_range_index,
1174 sampler,
1175 non_uniform_resource_index_value
1176 };
1177
1178 const struct dxil_func *func =
1179 dxil_get_function(&ctx->mod, "dx.op.createHandleFromHeap", DXIL_NONE);
1180
1181 if (!func)
1182 return NULL;
1183
1184 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1185 }
1186
1187 static void
add_resource(struct ntd_context * ctx,enum dxil_resource_type type,enum dxil_resource_kind kind,const resource_array_layout * layout)1188 add_resource(struct ntd_context *ctx, enum dxil_resource_type type,
1189 enum dxil_resource_kind kind,
1190 const resource_array_layout *layout)
1191 {
1192 struct dxil_resource_v0 *resource_v0 = NULL;
1193 struct dxil_resource_v1 *resource_v1 = NULL;
1194 if (ctx->mod.minor_validator >= 6) {
1195 resource_v1 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v1, 1);
1196 resource_v0 = &resource_v1->v0;
1197 } else {
1198 resource_v0 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v0, 1);
1199 }
1200 resource_v0->resource_type = type;
1201 resource_v0->space = layout->space;
1202 resource_v0->lower_bound = layout->binding;
1203 if (layout->size == 0 || (uint64_t)layout->size + layout->binding >= UINT_MAX)
1204 resource_v0->upper_bound = UINT_MAX;
1205 else
1206 resource_v0->upper_bound = layout->binding + layout->size - 1;
1207 if (type == DXIL_RES_UAV_TYPED ||
1208 type == DXIL_RES_UAV_RAW ||
1209 type == DXIL_RES_UAV_STRUCTURED) {
1210 uint32_t new_uav_count = ctx->num_uavs + layout->size;
1211 if (layout->size == 0 || new_uav_count < ctx->num_uavs)
1212 ctx->num_uavs = UINT_MAX;
1213 else
1214 ctx->num_uavs = new_uav_count;
1215 if (ctx->mod.minor_validator >= 6 && ctx->num_uavs > 8)
1216 ctx->mod.feats.use_64uavs = 1;
1217 }
1218
1219 if (resource_v1) {
1220 resource_v1->resource_kind = kind;
1221 /* No flags supported yet */
1222 resource_v1->resource_flags = 0;
1223 }
1224 }
1225
1226 static const struct dxil_value *
emit_createhandle_call_dynamic(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned space,unsigned binding,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1227 emit_createhandle_call_dynamic(struct ntd_context *ctx,
1228 enum dxil_resource_class resource_class,
1229 unsigned space,
1230 unsigned binding,
1231 const struct dxil_value *resource_range_index,
1232 bool non_uniform_resource_index)
1233 {
1234 unsigned offset = 0;
1235 unsigned count = 0;
1236
1237 unsigned num_srvs = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
1238 unsigned num_uavs = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
1239 unsigned num_cbvs = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1240 unsigned num_samplers = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1241
1242 switch (resource_class) {
1243 case DXIL_RESOURCE_CLASS_UAV:
1244 offset = num_srvs + num_samplers + num_cbvs;
1245 count = num_uavs;
1246 break;
1247 case DXIL_RESOURCE_CLASS_SRV:
1248 offset = num_samplers + num_cbvs;
1249 count = num_srvs;
1250 break;
1251 case DXIL_RESOURCE_CLASS_SAMPLER:
1252 offset = num_cbvs;
1253 count = num_samplers;
1254 break;
1255 case DXIL_RESOURCE_CLASS_CBV:
1256 offset = 0;
1257 count = num_cbvs;
1258 break;
1259 }
1260
1261 unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
1262 sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
1263 assert(offset + count <= ctx->resources.size / resource_element_size);
1264 for (unsigned i = offset; i < offset + count; ++i) {
1265 const struct dxil_resource_v0 *resource = (const struct dxil_resource_v0 *)((const char *)ctx->resources.data + resource_element_size * i);
1266 if (resource->space == space &&
1267 resource->lower_bound <= binding &&
1268 resource->upper_bound >= binding) {
1269 return emit_createhandle_call(ctx, resource_class, resource->lower_bound,
1270 resource->upper_bound, space,
1271 i - offset,
1272 resource_range_index,
1273 non_uniform_resource_index);
1274 }
1275 }
1276
1277 unreachable("Resource access for undeclared range");
1278 }
1279
1280 static bool
emit_srv(struct ntd_context * ctx,nir_variable * var,unsigned count)1281 emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned count)
1282 {
1283 unsigned id = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
1284 unsigned binding = var->data.binding;
1285 resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1286
1287 enum dxil_component_type comp_type;
1288 enum dxil_resource_kind res_kind;
1289 enum dxil_resource_type res_type;
1290 if (var->data.mode == nir_var_mem_ssbo) {
1291 comp_type = DXIL_COMP_TYPE_INVALID;
1292 res_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
1293 res_type = DXIL_RES_SRV_RAW;
1294 } else {
1295 comp_type = dxil_get_comp_type(var->type);
1296 res_kind = dxil_get_resource_kind(var->type);
1297 res_type = DXIL_RES_SRV_TYPED;
1298 }
1299 const struct dxil_type *res_type_as_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, 4, false /* readwrite */);
1300
1301 if (glsl_type_is_array(var->type))
1302 res_type_as_type = dxil_module_get_array_type(&ctx->mod, res_type_as_type, count);
1303
1304 const struct dxil_mdnode *srv_meta = emit_srv_metadata(&ctx->mod, res_type_as_type, var->name,
1305 &layout, comp_type, res_kind);
1306
1307 if (!srv_meta)
1308 return false;
1309
1310 util_dynarray_append(&ctx->srv_metadata_nodes, const struct dxil_mdnode *, srv_meta);
1311 add_resource(ctx, res_type, res_kind, &layout);
1312 if (res_type == DXIL_RES_SRV_RAW)
1313 ctx->mod.raw_and_structured_buffers = true;
1314
1315 return true;
1316 }
1317
1318 static bool
emit_globals(struct ntd_context * ctx,unsigned size)1319 emit_globals(struct ntd_context *ctx, unsigned size)
1320 {
1321 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo)
1322 size++;
1323
1324 if (!size)
1325 return true;
1326
1327 const struct dxil_type *struct_type = dxil_module_get_res_type(&ctx->mod,
1328 DXIL_RESOURCE_KIND_RAW_BUFFER, DXIL_COMP_TYPE_INVALID, 1, true /* readwrite */);
1329 if (!struct_type)
1330 return false;
1331
1332 const struct dxil_type *array_type =
1333 dxil_module_get_array_type(&ctx->mod, struct_type, size);
1334 if (!array_type)
1335 return false;
1336
1337 resource_array_layout layout = {0, 0, size, 0};
1338 const struct dxil_mdnode *uav_meta =
1339 emit_uav_metadata(&ctx->mod, array_type,
1340 "globals", &layout,
1341 DXIL_COMP_TYPE_INVALID,
1342 DXIL_RESOURCE_KIND_RAW_BUFFER, 0);
1343 if (!uav_meta)
1344 return false;
1345
1346 util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
1347 if (ctx->mod.minor_validator < 6 &&
1348 util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
1349 ctx->mod.feats.use_64uavs = 1;
1350 /* Handles to UAVs used for kernel globals are created on-demand */
1351 add_resource(ctx, DXIL_RES_UAV_RAW, DXIL_RESOURCE_KIND_RAW_BUFFER, &layout);
1352 ctx->mod.raw_and_structured_buffers = true;
1353 return true;
1354 }
1355
1356 static bool
emit_uav(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned count,enum dxil_component_type comp_type,unsigned num_comps,enum dxil_resource_kind res_kind,enum gl_access_qualifier access,const char * name)1357 emit_uav(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned count,
1358 enum dxil_component_type comp_type, unsigned num_comps, enum dxil_resource_kind res_kind,
1359 enum gl_access_qualifier access, const char *name)
1360 {
1361 unsigned id = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
1362 resource_array_layout layout = { id, binding, count, space };
1363
1364 const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, num_comps, true /* readwrite */);
1365 res_type = dxil_module_get_array_type(&ctx->mod, res_type, count);
1366 const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, res_type, name,
1367 &layout, comp_type, res_kind, access);
1368
1369 if (!uav_meta)
1370 return false;
1371
1372 util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
1373 if (ctx->mod.minor_validator < 6 &&
1374 util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
1375 ctx->mod.feats.use_64uavs = 1;
1376
1377 add_resource(ctx, res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER ? DXIL_RES_UAV_RAW : DXIL_RES_UAV_TYPED, res_kind, &layout);
1378 if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
1379 ctx->mod.raw_and_structured_buffers = true;
1380 if (ctx->mod.shader_kind != DXIL_PIXEL_SHADER &&
1381 ctx->mod.shader_kind != DXIL_COMPUTE_SHADER)
1382 ctx->mod.feats.uavs_at_every_stage = true;
1383
1384 return true;
1385 }
1386
1387 static bool
emit_uav_var(struct ntd_context * ctx,nir_variable * var,unsigned count)1388 emit_uav_var(struct ntd_context *ctx, nir_variable *var, unsigned count)
1389 {
1390 unsigned binding, space;
1391 if (ctx->opts->environment == DXIL_ENVIRONMENT_GL) {
1392 /* For GL, the image intrinsics are already lowered, using driver_location
1393 * as the 0-based image index. Use space 1 so that we can keep using these
1394 * NIR constants without having to remap them, and so they don't overlap
1395 * SSBOs, which are also 0-based UAV bindings.
1396 */
1397 binding = var->data.driver_location;
1398 space = 1;
1399 } else {
1400 binding = var->data.binding;
1401 space = var->data.descriptor_set;
1402 }
1403 enum dxil_component_type comp_type = dxil_get_comp_type(var->type);
1404 enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type);
1405 const char *name = var->name;
1406
1407 return emit_uav(ctx, binding, space, count, comp_type,
1408 util_format_get_nr_components(var->data.image.format),
1409 res_kind, var->data.access, name);
1410 }
1411
1412 static const struct dxil_value *
get_value_for_const(struct dxil_module * mod,nir_const_value * c,const struct dxil_type * type)1413 get_value_for_const(struct dxil_module *mod, nir_const_value *c, const struct dxil_type *type)
1414 {
1415 if (type == mod->int1_type) return dxil_module_get_int1_const(mod, c->b);
1416 if (type == mod->float32_type) return dxil_module_get_float_const(mod, c->f32);
1417 if (type == mod->int32_type) return dxil_module_get_int32_const(mod, c->i32);
1418 if (type == mod->int16_type) {
1419 mod->feats.min_precision = true;
1420 return dxil_module_get_int16_const(mod, c->i16);
1421 }
1422 if (type == mod->int64_type) {
1423 mod->feats.int64_ops = true;
1424 return dxil_module_get_int64_const(mod, c->i64);
1425 }
1426 if (type == mod->float16_type) {
1427 mod->feats.min_precision = true;
1428 return dxil_module_get_float16_const(mod, c->u16);
1429 }
1430 if (type == mod->float64_type) {
1431 mod->feats.doubles = true;
1432 return dxil_module_get_double_const(mod, c->f64);
1433 }
1434 unreachable("Invalid type");
1435 }
1436
1437 static const struct dxil_type *
get_type_for_glsl_base_type(struct dxil_module * mod,enum glsl_base_type type)1438 get_type_for_glsl_base_type(struct dxil_module *mod, enum glsl_base_type type)
1439 {
1440 uint32_t bit_size = glsl_base_type_bit_size(type);
1441 if (nir_alu_type_get_base_type(nir_get_nir_type_for_glsl_base_type(type)) == nir_type_float)
1442 return dxil_module_get_float_type(mod, bit_size);
1443 return dxil_module_get_int_type(mod, bit_size);
1444 }
1445
1446 static const struct dxil_type *
get_type_for_glsl_type(struct dxil_module * mod,const struct glsl_type * type)1447 get_type_for_glsl_type(struct dxil_module *mod, const struct glsl_type *type)
1448 {
1449 if (glsl_type_is_scalar(type))
1450 return get_type_for_glsl_base_type(mod, glsl_get_base_type(type));
1451
1452 if (glsl_type_is_vector(type))
1453 return dxil_module_get_vector_type(mod, get_type_for_glsl_base_type(mod, glsl_get_base_type(type)),
1454 glsl_get_vector_elements(type));
1455
1456 if (glsl_type_is_array(type))
1457 return dxil_module_get_array_type(mod, get_type_for_glsl_type(mod, glsl_get_array_element(type)),
1458 glsl_array_size(type));
1459
1460 assert(glsl_type_is_struct(type));
1461 uint32_t size = glsl_get_length(type);
1462 const struct dxil_type **fields = calloc(sizeof(const struct dxil_type *), size);
1463 for (uint32_t i = 0; i < size; ++i)
1464 fields[i] = get_type_for_glsl_type(mod, glsl_get_struct_field(type, i));
1465 const struct dxil_type *ret = dxil_module_get_struct_type(mod, glsl_get_type_name(type), fields, size);
1466 free((void *)fields);
1467 return ret;
1468 }
1469
1470 static const struct dxil_value *
get_value_for_const_aggregate(struct dxil_module * mod,nir_constant * c,const struct glsl_type * type)1471 get_value_for_const_aggregate(struct dxil_module *mod, nir_constant *c, const struct glsl_type *type)
1472 {
1473 const struct dxil_type *dxil_type = get_type_for_glsl_type(mod, type);
1474 if (glsl_type_is_vector_or_scalar(type)) {
1475 const struct dxil_type *element_type = get_type_for_glsl_base_type(mod, glsl_get_base_type(type));
1476 const struct dxil_value *elements[NIR_MAX_VEC_COMPONENTS];
1477 for (uint32_t i = 0; i < glsl_get_vector_elements(type); ++i)
1478 elements[i] = get_value_for_const(mod, &c->values[i], element_type);
1479 if (glsl_type_is_scalar(type))
1480 return elements[0];
1481 return dxil_module_get_vector_const(mod, dxil_type, elements);
1482 }
1483
1484 uint32_t num_values = glsl_get_length(type);
1485 assert(num_values == c->num_elements);
1486 const struct dxil_value **values = calloc(sizeof(const struct dxil_value *), num_values);
1487 const struct dxil_value *ret;
1488 if (glsl_type_is_array(type)) {
1489 const struct glsl_type *element_type = glsl_get_array_element(type);
1490 for (uint32_t i = 0; i < num_values; ++i)
1491 values[i] = get_value_for_const_aggregate(mod, c->elements[i], element_type);
1492 ret = dxil_module_get_array_const(mod, dxil_type, values);
1493 } else {
1494 for (uint32_t i = 0; i < num_values; ++i)
1495 values[i] = get_value_for_const_aggregate(mod, c->elements[i], glsl_get_struct_field(type, i));
1496 ret = dxil_module_get_struct_const(mod, dxil_type, values);
1497 }
1498 free((void *)values);
1499 return ret;
1500 }
1501
1502 static bool
emit_global_consts(struct ntd_context * ctx)1503 emit_global_consts(struct ntd_context *ctx)
1504 {
1505 uint32_t index = 0;
1506 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_constant) {
1507 assert(var->constant_initializer);
1508 var->data.driver_location = index++;
1509 }
1510
1511 ctx->consts = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
1512
1513 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_constant) {
1514 if (!var->name)
1515 var->name = ralloc_asprintf(var, "const_%d", var->data.driver_location);
1516
1517 const struct dxil_value *agg_vals =
1518 get_value_for_const_aggregate(&ctx->mod, var->constant_initializer, var->type);
1519 if (!agg_vals)
1520 return false;
1521
1522 const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name,
1523 dxil_value_get_type(agg_vals),
1524 DXIL_AS_DEFAULT, 16,
1525 agg_vals);
1526 if (!gvar)
1527 return false;
1528
1529 ctx->consts[var->data.driver_location] = gvar;
1530 }
1531
1532 return true;
1533 }
1534
1535 static bool
emit_shared_vars(struct ntd_context * ctx)1536 emit_shared_vars(struct ntd_context *ctx)
1537 {
1538 uint32_t index = 0;
1539 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_shared)
1540 var->data.driver_location = index++;
1541
1542 ctx->sharedvars = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
1543
1544 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_shared) {
1545 if (!var->name)
1546 var->name = ralloc_asprintf(var, "shared_%d", var->data.driver_location);
1547 const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name,
1548 get_type_for_glsl_type(&ctx->mod, var->type),
1549 DXIL_AS_GROUPSHARED, 16,
1550 NULL);
1551 if (!gvar)
1552 return false;
1553
1554 ctx->sharedvars[var->data.driver_location] = gvar;
1555 }
1556
1557 return true;
1558 }
1559
1560 static bool
emit_cbv(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned size,unsigned count,char * name)1561 emit_cbv(struct ntd_context *ctx, unsigned binding, unsigned space,
1562 unsigned size, unsigned count, char *name)
1563 {
1564 assert(count != 0);
1565
1566 unsigned idx = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1567
1568 const struct dxil_type *float32 = dxil_module_get_float_type(&ctx->mod, 32);
1569 const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, float32, size);
1570 const struct dxil_type *buffer_type = dxil_module_get_struct_type(&ctx->mod, name,
1571 &array_type, 1);
1572 // All ubo[1]s should have been lowered to ubo with static indexing
1573 const struct dxil_type *final_type = count != 1 ? dxil_module_get_array_type(&ctx->mod, buffer_type, count) : buffer_type;
1574 resource_array_layout layout = {idx, binding, count, space};
1575 const struct dxil_mdnode *cbv_meta = emit_cbv_metadata(&ctx->mod, final_type,
1576 name, &layout, 4 * size);
1577
1578 if (!cbv_meta)
1579 return false;
1580
1581 util_dynarray_append(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *, cbv_meta);
1582 add_resource(ctx, DXIL_RES_CBV, DXIL_RESOURCE_KIND_CBUFFER, &layout);
1583
1584 return true;
1585 }
1586
1587 static bool
emit_ubo_var(struct ntd_context * ctx,nir_variable * var)1588 emit_ubo_var(struct ntd_context *ctx, nir_variable *var)
1589 {
1590 unsigned count = 1;
1591 if (glsl_type_is_array(var->type))
1592 count = glsl_get_length(var->type);
1593
1594 char *name = var->name;
1595 char temp_name[30];
1596 if (name && strlen(name) == 0) {
1597 snprintf(temp_name, sizeof(temp_name), "__unnamed_ubo_%d",
1598 ctx->unnamed_ubo_count++);
1599 name = temp_name;
1600 }
1601
1602 const struct glsl_type *type = glsl_without_array(var->type);
1603 assert(glsl_type_is_struct(type) || glsl_type_is_interface(type));
1604 unsigned dwords = ALIGN_POT(glsl_get_explicit_size(type, false), 16) / 4;
1605
1606 return emit_cbv(ctx, var->data.binding, var->data.descriptor_set,
1607 dwords, count, name);
1608 }
1609
1610 static bool
emit_sampler(struct ntd_context * ctx,nir_variable * var,unsigned count)1611 emit_sampler(struct ntd_context *ctx, nir_variable *var, unsigned count)
1612 {
1613 unsigned id = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1614 unsigned binding = var->data.binding;
1615 resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1616 const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
1617 const struct dxil_type *sampler_type = dxil_module_get_struct_type(&ctx->mod, "struct.SamplerState", &int32_type, 1);
1618
1619 if (glsl_type_is_array(var->type))
1620 sampler_type = dxil_module_get_array_type(&ctx->mod, sampler_type, count);
1621
1622 const struct dxil_mdnode *sampler_meta = emit_sampler_metadata(&ctx->mod, sampler_type, var, &layout);
1623
1624 if (!sampler_meta)
1625 return false;
1626
1627 util_dynarray_append(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *, sampler_meta);
1628 add_resource(ctx, DXIL_RES_SAMPLER, DXIL_RESOURCE_KIND_SAMPLER, &layout);
1629
1630 return true;
1631 }
1632
1633 static bool
emit_static_indexing_handles(struct ntd_context * ctx)1634 emit_static_indexing_handles(struct ntd_context *ctx)
1635 {
1636 /* Vulkan always uses dynamic handles, from instructions in the NIR */
1637 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN)
1638 return true;
1639
1640 unsigned last_res_class = -1;
1641 unsigned id = 0;
1642
1643 unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
1644 sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
1645 for (struct dxil_resource_v0 *res = (struct dxil_resource_v0 *)ctx->resources.data;
1646 res < (struct dxil_resource_v0 *)((char *)ctx->resources.data + ctx->resources.size);
1647 res = (struct dxil_resource_v0 *)((char *)res + resource_element_size)) {
1648 enum dxil_resource_class res_class;
1649 const struct dxil_value **handle_array;
1650 switch (res->resource_type) {
1651 case DXIL_RES_SRV_TYPED:
1652 case DXIL_RES_SRV_RAW:
1653 case DXIL_RES_SRV_STRUCTURED:
1654 res_class = DXIL_RESOURCE_CLASS_SRV;
1655 handle_array = ctx->srv_handles;
1656 break;
1657 case DXIL_RES_CBV:
1658 res_class = DXIL_RESOURCE_CLASS_CBV;
1659 handle_array = ctx->cbv_handles;
1660 break;
1661 case DXIL_RES_SAMPLER:
1662 res_class = DXIL_RESOURCE_CLASS_SAMPLER;
1663 handle_array = ctx->sampler_handles;
1664 break;
1665 case DXIL_RES_UAV_RAW:
1666 res_class = DXIL_RESOURCE_CLASS_UAV;
1667 handle_array = ctx->ssbo_handles;
1668 break;
1669 case DXIL_RES_UAV_TYPED:
1670 case DXIL_RES_UAV_STRUCTURED:
1671 case DXIL_RES_UAV_STRUCTURED_WITH_COUNTER:
1672 res_class = DXIL_RESOURCE_CLASS_UAV;
1673 handle_array = ctx->image_handles;
1674 break;
1675 default:
1676 unreachable("Unexpected resource type");
1677 }
1678
1679 if (last_res_class != res_class)
1680 id = 0;
1681 else
1682 id++;
1683 last_res_class = res_class;
1684
1685 if (res->space > 1)
1686 continue;
1687 assert(res->space == 0 ||
1688 (res->space == 1 &&
1689 res->resource_type != DXIL_RES_UAV_RAW &&
1690 ctx->opts->environment == DXIL_ENVIRONMENT_GL));
1691
1692 /* CL uses dynamic handles for the "globals" UAV array, but uses static
1693 * handles for UBOs, textures, and samplers.
1694 */
1695 if (ctx->opts->environment == DXIL_ENVIRONMENT_CL &&
1696 res->resource_type == DXIL_RES_UAV_RAW)
1697 continue;
1698
1699 for (unsigned i = res->lower_bound; i <= res->upper_bound; ++i) {
1700 handle_array[i] = emit_createhandle_call_const_index(ctx,
1701 res_class,
1702 res->lower_bound,
1703 res->upper_bound,
1704 res->space,
1705 id,
1706 i,
1707 false);
1708 if (!handle_array[i])
1709 return false;
1710 }
1711 }
1712 return true;
1713 }
1714
1715 static const struct dxil_mdnode *
emit_gs_state(struct ntd_context * ctx)1716 emit_gs_state(struct ntd_context *ctx)
1717 {
1718 const struct dxil_mdnode *gs_state_nodes[5];
1719 const nir_shader *s = ctx->shader;
1720
1721 gs_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, dxil_get_input_primitive(s->info.gs.input_primitive));
1722 gs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.vertices_out);
1723 gs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.gs.active_stream_mask, 1));
1724 gs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, dxil_get_primitive_topology(s->info.gs.output_primitive));
1725 gs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.invocations);
1726
1727 for (unsigned i = 0; i < ARRAY_SIZE(gs_state_nodes); ++i) {
1728 if (!gs_state_nodes[i])
1729 return NULL;
1730 }
1731
1732 return dxil_get_metadata_node(&ctx->mod, gs_state_nodes, ARRAY_SIZE(gs_state_nodes));
1733 }
1734
1735 static enum dxil_tessellator_domain
get_tessellator_domain(enum tess_primitive_mode primitive_mode)1736 get_tessellator_domain(enum tess_primitive_mode primitive_mode)
1737 {
1738 switch (primitive_mode) {
1739 case TESS_PRIMITIVE_QUADS: return DXIL_TESSELLATOR_DOMAIN_QUAD;
1740 case TESS_PRIMITIVE_TRIANGLES: return DXIL_TESSELLATOR_DOMAIN_TRI;
1741 case TESS_PRIMITIVE_ISOLINES: return DXIL_TESSELLATOR_DOMAIN_ISOLINE;
1742 default:
1743 unreachable("Invalid tessellator primitive mode");
1744 }
1745 }
1746
1747 static enum dxil_tessellator_partitioning
get_tessellator_partitioning(enum gl_tess_spacing spacing)1748 get_tessellator_partitioning(enum gl_tess_spacing spacing)
1749 {
1750 switch (spacing) {
1751 default:
1752 case TESS_SPACING_EQUAL:
1753 return DXIL_TESSELLATOR_PARTITIONING_INTEGER;
1754 case TESS_SPACING_FRACTIONAL_EVEN:
1755 return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
1756 case TESS_SPACING_FRACTIONAL_ODD:
1757 return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
1758 }
1759 }
1760
1761 static enum dxil_tessellator_output_primitive
get_tessellator_output_primitive(const struct shader_info * info)1762 get_tessellator_output_primitive(const struct shader_info *info)
1763 {
1764 if (info->tess.point_mode)
1765 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_POINT;
1766 if (info->tess._primitive_mode == TESS_PRIMITIVE_ISOLINES)
1767 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_LINE;
1768 /* Note: GL tessellation domain is inverted from D3D, which means triangle
1769 * winding needs to be inverted.
1770 */
1771 if (info->tess.ccw)
1772 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CW;
1773 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CCW;
1774 }
1775
1776 static const struct dxil_mdnode *
emit_hs_state(struct ntd_context * ctx)1777 emit_hs_state(struct ntd_context *ctx)
1778 {
1779 const struct dxil_mdnode *hs_state_nodes[7];
1780
1781 hs_state_nodes[0] = dxil_get_metadata_func(&ctx->mod, ctx->tess_ctrl_patch_constant_func_def->func);
1782 hs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->tess_input_control_point_count);
1783 hs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1784 hs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1785 hs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_partitioning(ctx->shader->info.tess.spacing));
1786 hs_state_nodes[5] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_output_primitive(&ctx->shader->info));
1787 hs_state_nodes[6] = dxil_get_metadata_float32(&ctx->mod, 64.0f);
1788
1789 return dxil_get_metadata_node(&ctx->mod, hs_state_nodes, ARRAY_SIZE(hs_state_nodes));
1790 }
1791
1792 static const struct dxil_mdnode *
emit_ds_state(struct ntd_context * ctx)1793 emit_ds_state(struct ntd_context *ctx)
1794 {
1795 const struct dxil_mdnode *ds_state_nodes[2];
1796
1797 ds_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1798 ds_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1799
1800 return dxil_get_metadata_node(&ctx->mod, ds_state_nodes, ARRAY_SIZE(ds_state_nodes));
1801 }
1802
1803 static const struct dxil_mdnode *
emit_threads(struct ntd_context * ctx)1804 emit_threads(struct ntd_context *ctx)
1805 {
1806 const nir_shader *s = ctx->shader;
1807 const struct dxil_mdnode *threads_x = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[0], 1));
1808 const struct dxil_mdnode *threads_y = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[1], 1));
1809 const struct dxil_mdnode *threads_z = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[2], 1));
1810 if (!threads_x || !threads_y || !threads_z)
1811 return false;
1812
1813 const struct dxil_mdnode *threads_nodes[] = { threads_x, threads_y, threads_z };
1814 return dxil_get_metadata_node(&ctx->mod, threads_nodes, ARRAY_SIZE(threads_nodes));
1815 }
1816
1817 static const struct dxil_mdnode *
emit_wave_size(struct ntd_context * ctx)1818 emit_wave_size(struct ntd_context *ctx)
1819 {
1820 const nir_shader *s = ctx->shader;
1821 const struct dxil_mdnode *wave_size_node = dxil_get_metadata_int32(&ctx->mod, s->info.subgroup_size);
1822 return dxil_get_metadata_node(&ctx->mod, &wave_size_node, 1);
1823 }
1824
1825 static const struct dxil_mdnode *
emit_wave_size_range(struct ntd_context * ctx)1826 emit_wave_size_range(struct ntd_context *ctx)
1827 {
1828 const nir_shader *s = ctx->shader;
1829 const struct dxil_mdnode *wave_size_nodes[3];
1830 wave_size_nodes[0] = dxil_get_metadata_int32(&ctx->mod, s->info.subgroup_size);
1831 wave_size_nodes[1] = wave_size_nodes[0];
1832 wave_size_nodes[2] = wave_size_nodes[0];
1833 return dxil_get_metadata_node(&ctx->mod, wave_size_nodes, ARRAY_SIZE(wave_size_nodes));
1834 }
1835
1836 static int64_t
get_module_flags(struct ntd_context * ctx)1837 get_module_flags(struct ntd_context *ctx)
1838 {
1839 /* See the DXIL documentation for the definition of these flags:
1840 *
1841 * https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-flags
1842 */
1843
1844 uint64_t flags = 0;
1845 if (ctx->mod.feats.doubles)
1846 flags |= (1 << 2);
1847 if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
1848 ctx->shader->info.fs.early_fragment_tests)
1849 flags |= (1 << 3);
1850 if (ctx->mod.raw_and_structured_buffers)
1851 flags |= (1 << 4);
1852 if (ctx->mod.feats.min_precision)
1853 flags |= (1 << 5);
1854 if (ctx->mod.feats.dx11_1_double_extensions)
1855 flags |= (1 << 6);
1856 if (ctx->mod.feats.array_layer_from_vs_or_ds)
1857 flags |= (1 << 9);
1858 if (ctx->mod.feats.inner_coverage)
1859 flags |= (1 << 10);
1860 if (ctx->mod.feats.stencil_ref)
1861 flags |= (1 << 11);
1862 if (ctx->mod.feats.tiled_resources)
1863 flags |= (1 << 12);
1864 if (ctx->mod.feats.typed_uav_load_additional_formats)
1865 flags |= (1 << 13);
1866 if (ctx->mod.feats.use_64uavs)
1867 flags |= (1 << 15);
1868 if (ctx->mod.feats.uavs_at_every_stage)
1869 flags |= (1 << 16);
1870 if (ctx->mod.feats.cs_4x_raw_sb)
1871 flags |= (1 << 17);
1872 if (ctx->mod.feats.rovs)
1873 flags |= (1 << 18);
1874 if (ctx->mod.feats.wave_ops)
1875 flags |= (1 << 19);
1876 if (ctx->mod.feats.int64_ops)
1877 flags |= (1 << 20);
1878 if (ctx->mod.feats.view_id)
1879 flags |= (1 << 21);
1880 if (ctx->mod.feats.barycentrics)
1881 flags |= (1 << 22);
1882 if (ctx->mod.feats.native_low_precision)
1883 flags |= (1 << 23) | (1 << 5);
1884 if (ctx->mod.feats.shading_rate)
1885 flags |= (1 << 24);
1886 if (ctx->mod.feats.raytracing_tier_1_1)
1887 flags |= (1 << 25);
1888 if (ctx->mod.feats.sampler_feedback)
1889 flags |= (1 << 26);
1890 if (ctx->mod.feats.atomic_int64_typed)
1891 flags |= (1 << 27);
1892 if (ctx->mod.feats.atomic_int64_tgsm)
1893 flags |= (1 << 28);
1894 if (ctx->mod.feats.derivatives_in_mesh_or_amp)
1895 flags |= (1 << 29);
1896 if (ctx->mod.feats.resource_descriptor_heap_indexing)
1897 flags |= (1 << 30);
1898 if (ctx->mod.feats.sampler_descriptor_heap_indexing)
1899 flags |= (1ull << 31);
1900 if (ctx->mod.feats.atomic_int64_heap_resource)
1901 flags |= (1ull << 32);
1902 if (ctx->mod.feats.advanced_texture_ops)
1903 flags |= (1ull << 34);
1904 if (ctx->mod.feats.writable_msaa)
1905 flags |= (1ull << 35);
1906 // Bit 36 is wave MMA
1907 if (ctx->mod.feats.sample_cmp_bias_gradient)
1908 flags |= (1ull << 37);
1909 if (ctx->mod.feats.extended_command_info)
1910 flags |= (1ull << 38);
1911
1912 if (ctx->opts->disable_math_refactoring)
1913 flags |= (1 << 1);
1914
1915 /* Work around https://github.com/microsoft/DirectXShaderCompiler/issues/4616
1916 * When targeting SM6.7 and with at least one UAV, if no other flags are present,
1917 * set the resources-may-not-alias flag, or else the DXIL validator may end up
1918 * with uninitialized memory which will fail validation, due to missing that flag.
1919 */
1920 if (flags == 0 && ctx->mod.minor_version >= 7 && ctx->num_uavs > 0)
1921 flags |= (1ull << 33);
1922
1923 return flags;
1924 }
1925
1926 static const struct dxil_mdnode *
emit_entrypoint(struct ntd_context * ctx,const struct dxil_func * func,const char * name,const struct dxil_mdnode * signatures,const struct dxil_mdnode * resources,const struct dxil_mdnode * shader_props)1927 emit_entrypoint(struct ntd_context *ctx,
1928 const struct dxil_func *func, const char *name,
1929 const struct dxil_mdnode *signatures,
1930 const struct dxil_mdnode *resources,
1931 const struct dxil_mdnode *shader_props)
1932 {
1933 char truncated_name[254] = { 0 };
1934 strncpy(truncated_name, name, ARRAY_SIZE(truncated_name) - 1);
1935
1936 const struct dxil_mdnode *func_md = dxil_get_metadata_func(&ctx->mod, func);
1937 const struct dxil_mdnode *name_md = dxil_get_metadata_string(&ctx->mod, truncated_name);
1938 const struct dxil_mdnode *nodes[] = {
1939 func_md,
1940 name_md,
1941 signatures,
1942 resources,
1943 shader_props
1944 };
1945 return dxil_get_metadata_node(&ctx->mod, nodes,
1946 ARRAY_SIZE(nodes));
1947 }
1948
1949 static const struct dxil_mdnode *
emit_resources(struct ntd_context * ctx)1950 emit_resources(struct ntd_context *ctx)
1951 {
1952 bool emit_resources = false;
1953 const struct dxil_mdnode *resources_nodes[] = {
1954 NULL, NULL, NULL, NULL
1955 };
1956
1957 #define ARRAY_AND_SIZE(arr) arr.data, util_dynarray_num_elements(&arr, const struct dxil_mdnode *)
1958
1959 if (ctx->srv_metadata_nodes.size) {
1960 resources_nodes[0] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->srv_metadata_nodes));
1961 emit_resources = true;
1962 }
1963
1964 if (ctx->uav_metadata_nodes.size) {
1965 resources_nodes[1] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->uav_metadata_nodes));
1966 emit_resources = true;
1967 }
1968
1969 if (ctx->cbv_metadata_nodes.size) {
1970 resources_nodes[2] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->cbv_metadata_nodes));
1971 emit_resources = true;
1972 }
1973
1974 if (ctx->sampler_metadata_nodes.size) {
1975 resources_nodes[3] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->sampler_metadata_nodes));
1976 emit_resources = true;
1977 }
1978
1979 #undef ARRAY_AND_SIZE
1980
1981 return emit_resources ?
1982 dxil_get_metadata_node(&ctx->mod, resources_nodes, ARRAY_SIZE(resources_nodes)): NULL;
1983 }
1984
1985 static bool
emit_tag(struct ntd_context * ctx,enum dxil_shader_tag tag,const struct dxil_mdnode * value_node)1986 emit_tag(struct ntd_context *ctx, enum dxil_shader_tag tag,
1987 const struct dxil_mdnode *value_node)
1988 {
1989 const struct dxil_mdnode *tag_node = dxil_get_metadata_int32(&ctx->mod, tag);
1990 if (!tag_node || !value_node)
1991 return false;
1992 assert(ctx->num_shader_property_nodes <= ARRAY_SIZE(ctx->shader_property_nodes) - 2);
1993 ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = tag_node;
1994 ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = value_node;
1995
1996 return true;
1997 }
1998
1999 static bool
emit_metadata(struct ntd_context * ctx)2000 emit_metadata(struct ntd_context *ctx)
2001 {
2002 /* DXIL versions are 1.x for shader model 6.x */
2003 assert(ctx->mod.major_version == 6);
2004 unsigned dxilMajor = 1;
2005 unsigned dxilMinor = ctx->mod.minor_version;
2006 unsigned valMajor = ctx->mod.major_validator;
2007 unsigned valMinor = ctx->mod.minor_validator;
2008 if (!emit_llvm_ident(&ctx->mod) ||
2009 !emit_named_version(&ctx->mod, "dx.version", dxilMajor, dxilMinor) ||
2010 !emit_named_version(&ctx->mod, "dx.valver", valMajor, valMinor) ||
2011 !emit_dx_shader_model(&ctx->mod))
2012 return false;
2013
2014 const struct dxil_func_def *main_func_def = ctx->main_func_def;
2015 if (!main_func_def)
2016 return false;
2017 const struct dxil_func *main_func = main_func_def->func;
2018
2019 const struct dxil_mdnode *resources_node = emit_resources(ctx);
2020
2021 const struct dxil_mdnode *main_entrypoint = dxil_get_metadata_func(&ctx->mod, main_func);
2022 const struct dxil_mdnode *node27 = dxil_get_metadata_node(&ctx->mod, NULL, 0);
2023
2024 const struct dxil_mdnode *node4 = dxil_get_metadata_int32(&ctx->mod, 0);
2025 const struct dxil_mdnode *nodes_4_27_27[] = {
2026 node4, node27, node27
2027 };
2028 const struct dxil_mdnode *node28 = dxil_get_metadata_node(&ctx->mod, nodes_4_27_27,
2029 ARRAY_SIZE(nodes_4_27_27));
2030
2031 const struct dxil_mdnode *node29 = dxil_get_metadata_node(&ctx->mod, &node28, 1);
2032
2033 const struct dxil_mdnode *node3 = dxil_get_metadata_int32(&ctx->mod, 1);
2034 const struct dxil_mdnode *main_type_annotation_nodes[] = {
2035 node3, main_entrypoint, node29
2036 };
2037 const struct dxil_mdnode *main_type_annotation = dxil_get_metadata_node(&ctx->mod, main_type_annotation_nodes,
2038 ARRAY_SIZE(main_type_annotation_nodes));
2039
2040 if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {
2041 if (!emit_tag(ctx, DXIL_SHADER_TAG_GS_STATE, emit_gs_state(ctx)))
2042 return false;
2043 } else if (ctx->mod.shader_kind == DXIL_HULL_SHADER) {
2044 ctx->tess_input_control_point_count = 32;
2045 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
2046 if (nir_is_arrayed_io(var, MESA_SHADER_TESS_CTRL)) {
2047 ctx->tess_input_control_point_count = glsl_array_size(var->type);
2048 break;
2049 }
2050 }
2051
2052 if (!emit_tag(ctx, DXIL_SHADER_TAG_HS_STATE, emit_hs_state(ctx)))
2053 return false;
2054 } else if (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
2055 if (!emit_tag(ctx, DXIL_SHADER_TAG_DS_STATE, emit_ds_state(ctx)))
2056 return false;
2057 } else if (ctx->mod.shader_kind == DXIL_COMPUTE_SHADER) {
2058 if (!emit_tag(ctx, DXIL_SHADER_TAG_NUM_THREADS, emit_threads(ctx)))
2059 return false;
2060 if (ctx->mod.minor_version >= 6 &&
2061 ctx->shader->info.subgroup_size >= SUBGROUP_SIZE_REQUIRE_8) {
2062 if (ctx->mod.minor_version < 8) {
2063 if (!emit_tag(ctx, DXIL_SHADER_TAG_WAVE_SIZE, emit_wave_size(ctx)))
2064 return false;
2065 } else {
2066 if (!emit_tag(ctx, DXIL_SHADER_TAG_WAVE_SIZE_RANGE, emit_wave_size_range(ctx)))
2067 return false;
2068 }
2069 }
2070 }
2071
2072 uint64_t flags = get_module_flags(ctx);
2073 if (flags != 0) {
2074 if (!emit_tag(ctx, DXIL_SHADER_TAG_FLAGS, dxil_get_metadata_int64(&ctx->mod, flags)))
2075 return false;
2076 }
2077 const struct dxil_mdnode *shader_properties = NULL;
2078 if (ctx->num_shader_property_nodes > 0) {
2079 shader_properties = dxil_get_metadata_node(&ctx->mod, ctx->shader_property_nodes,
2080 ctx->num_shader_property_nodes);
2081 if (!shader_properties)
2082 return false;
2083 }
2084
2085 nir_function_impl *entry_func_impl = nir_shader_get_entrypoint(ctx->shader);
2086 const struct dxil_mdnode *dx_entry_point = emit_entrypoint(ctx, main_func,
2087 entry_func_impl->function->name, get_signatures(&ctx->mod), resources_node, shader_properties);
2088 if (!dx_entry_point)
2089 return false;
2090
2091 if (resources_node) {
2092 const struct dxil_mdnode *dx_resources = resources_node;
2093 dxil_add_metadata_named_node(&ctx->mod, "dx.resources",
2094 &dx_resources, 1);
2095 }
2096
2097 if (ctx->mod.minor_version >= 2 &&
2098 dxil_nir_analyze_io_dependencies(&ctx->mod, ctx->shader)) {
2099 const struct dxil_type *i32_type = dxil_module_get_int_type(&ctx->mod, 32);
2100 if (!i32_type)
2101 return false;
2102
2103 const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, i32_type, ctx->mod.serialized_dependency_table_size);
2104 if (!array_type)
2105 return false;
2106
2107 const struct dxil_value **array_entries = malloc(sizeof(const struct value *) * ctx->mod.serialized_dependency_table_size);
2108 if (!array_entries)
2109 return false;
2110
2111 for (uint32_t i = 0; i < ctx->mod.serialized_dependency_table_size; ++i)
2112 array_entries[i] = dxil_module_get_int32_const(&ctx->mod, ctx->mod.serialized_dependency_table[i]);
2113 const struct dxil_value *array_val = dxil_module_get_array_const(&ctx->mod, array_type, array_entries);
2114 free((void *)array_entries);
2115
2116 const struct dxil_mdnode *view_id_state_val = dxil_get_metadata_value(&ctx->mod, array_type, array_val);
2117 if (!view_id_state_val)
2118 return false;
2119
2120 const struct dxil_mdnode *view_id_state_node = dxil_get_metadata_node(&ctx->mod, &view_id_state_val, 1);
2121
2122 dxil_add_metadata_named_node(&ctx->mod, "dx.viewIdState", &view_id_state_node, 1);
2123 }
2124
2125 const struct dxil_mdnode *dx_type_annotations[] = { main_type_annotation };
2126 return dxil_add_metadata_named_node(&ctx->mod, "dx.typeAnnotations",
2127 dx_type_annotations,
2128 ARRAY_SIZE(dx_type_annotations)) &&
2129 dxil_add_metadata_named_node(&ctx->mod, "dx.entryPoints",
2130 &dx_entry_point, 1);
2131 }
2132
2133 static const struct dxil_value *
bitcast_to_int(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)2134 bitcast_to_int(struct ntd_context *ctx, unsigned bit_size,
2135 const struct dxil_value *value)
2136 {
2137 const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size);
2138 if (!type)
2139 return NULL;
2140
2141 return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
2142 }
2143
2144 static const struct dxil_value *
bitcast_to_float(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)2145 bitcast_to_float(struct ntd_context *ctx, unsigned bit_size,
2146 const struct dxil_value *value)
2147 {
2148 const struct dxil_type *type = dxil_module_get_float_type(&ctx->mod, bit_size);
2149 if (!type)
2150 return NULL;
2151
2152 return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
2153 }
2154
2155 static bool
is_phi_src(nir_def * ssa)2156 is_phi_src(nir_def *ssa)
2157 {
2158 nir_foreach_use(src, ssa)
2159 if (nir_src_parent_instr(src)->type == nir_instr_type_phi)
2160 return true;
2161 return false;
2162 }
2163
2164 static void
store_ssa_def(struct ntd_context * ctx,nir_def * ssa,unsigned chan,const struct dxil_value * value)2165 store_ssa_def(struct ntd_context *ctx, nir_def *ssa, unsigned chan,
2166 const struct dxil_value *value)
2167 {
2168 assert(ssa->index < ctx->num_defs);
2169 assert(chan < ssa->num_components);
2170 /* Insert bitcasts for phi srcs in the parent block */
2171 if (is_phi_src(ssa)) {
2172 /* Prefer ints over floats if it could be both or if we have no type info */
2173 nir_alu_type expect_type =
2174 BITSET_TEST(ctx->int_types, ssa->index) ? nir_type_int :
2175 (BITSET_TEST(ctx->float_types, ssa->index) ? nir_type_float :
2176 nir_type_int);
2177 assert(ssa->bit_size != 1 || expect_type == nir_type_int);
2178 if (ssa->bit_size != 1 && expect_type != dxil_type_to_nir_type(dxil_value_get_type(value)))
2179 value = dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST,
2180 expect_type == nir_type_int ?
2181 dxil_module_get_int_type(&ctx->mod, ssa->bit_size) :
2182 dxil_module_get_float_type(&ctx->mod, ssa->bit_size), value);
2183 if (ssa->bit_size == 64) {
2184 if (expect_type == nir_type_int)
2185 ctx->mod.feats.int64_ops = true;
2186 if (expect_type == nir_type_float)
2187 ctx->mod.feats.doubles = true;
2188 }
2189 }
2190 ctx->defs[ssa->index].chans[chan] = value;
2191 }
2192
2193 static void
store_def(struct ntd_context * ctx,nir_def * def,unsigned chan,const struct dxil_value * value)2194 store_def(struct ntd_context *ctx, nir_def *def, unsigned chan,
2195 const struct dxil_value *value)
2196 {
2197 const struct dxil_type *type = dxil_value_get_type(value);
2198 if (type == ctx->mod.float64_type)
2199 ctx->mod.feats.doubles = true;
2200 if (type == ctx->mod.float16_type ||
2201 type == ctx->mod.int16_type)
2202 ctx->mod.feats.min_precision = true;
2203 if (type == ctx->mod.int64_type)
2204 ctx->mod.feats.int64_ops = true;
2205 store_ssa_def(ctx, def, chan, value);
2206 }
2207
2208 static void
store_alu_dest(struct ntd_context * ctx,nir_alu_instr * alu,unsigned chan,const struct dxil_value * value)2209 store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan,
2210 const struct dxil_value *value)
2211 {
2212 store_def(ctx, &alu->def, chan, value);
2213 }
2214
2215 static const struct dxil_value *
get_src_ssa(struct ntd_context * ctx,const nir_def * ssa,unsigned chan)2216 get_src_ssa(struct ntd_context *ctx, const nir_def *ssa, unsigned chan)
2217 {
2218 assert(ssa->index < ctx->num_defs);
2219 assert(chan < ssa->num_components);
2220 assert(ctx->defs[ssa->index].chans[chan]);
2221 return ctx->defs[ssa->index].chans[chan];
2222 }
2223
2224 static const struct dxil_value *
get_src(struct ntd_context * ctx,nir_src * src,unsigned chan,nir_alu_type type)2225 get_src(struct ntd_context *ctx, nir_src *src, unsigned chan,
2226 nir_alu_type type)
2227 {
2228 const struct dxil_value *value = get_src_ssa(ctx, src->ssa, chan);
2229
2230 const int bit_size = nir_src_bit_size(*src);
2231
2232 switch (nir_alu_type_get_base_type(type)) {
2233 case nir_type_int:
2234 case nir_type_uint: {
2235 const struct dxil_type *expect_type = dxil_module_get_int_type(&ctx->mod, bit_size);
2236 /* nohing to do */
2237 if (dxil_value_type_equal_to(value, expect_type)) {
2238 assert(bit_size != 64 || ctx->mod.feats.int64_ops);
2239 return value;
2240 }
2241 if (bit_size == 64) {
2242 assert(ctx->mod.feats.doubles);
2243 ctx->mod.feats.int64_ops = true;
2244 }
2245 if (bit_size == 16)
2246 ctx->mod.feats.native_low_precision = true;
2247 assert(dxil_value_type_bitsize_equal_to(value, bit_size));
2248 return bitcast_to_int(ctx, bit_size, value);
2249 }
2250
2251 case nir_type_float:
2252 assert(nir_src_bit_size(*src) >= 16);
2253 if (dxil_value_type_equal_to(value, dxil_module_get_float_type(&ctx->mod, bit_size))) {
2254 assert(nir_src_bit_size(*src) != 64 || ctx->mod.feats.doubles);
2255 return value;
2256 }
2257 if (bit_size == 64) {
2258 assert(ctx->mod.feats.int64_ops);
2259 ctx->mod.feats.doubles = true;
2260 }
2261 if (bit_size == 16)
2262 ctx->mod.feats.native_low_precision = true;
2263 assert(dxil_value_type_bitsize_equal_to(value, bit_size));
2264 return bitcast_to_float(ctx, bit_size, value);
2265
2266 case nir_type_bool:
2267 if (!dxil_value_type_bitsize_equal_to(value, 1)) {
2268 return dxil_emit_cast(&ctx->mod, DXIL_CAST_TRUNC,
2269 dxil_module_get_int_type(&ctx->mod, 1), value);
2270 }
2271 return value;
2272
2273 default:
2274 unreachable("unexpected nir_alu_type");
2275 }
2276 }
2277
2278 static const struct dxil_value *
get_alu_src(struct ntd_context * ctx,nir_alu_instr * alu,unsigned src)2279 get_alu_src(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)
2280 {
2281 unsigned chan = alu->src[src].swizzle[0];
2282 return get_src(ctx, &alu->src[src].src, chan,
2283 nir_op_infos[alu->op].input_types[src]);
2284 }
2285
2286 static bool
emit_binop(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)2287 emit_binop(struct ntd_context *ctx, nir_alu_instr *alu,
2288 enum dxil_bin_opcode opcode,
2289 const struct dxil_value *op0, const struct dxil_value *op1)
2290 {
2291 bool is_float_op = nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float;
2292
2293 enum dxil_opt_flags flags = 0;
2294 if (is_float_op && !alu->exact)
2295 flags |= DXIL_UNSAFE_ALGEBRA;
2296
2297 const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, flags);
2298 if (!v)
2299 return false;
2300 store_alu_dest(ctx, alu, 0, v);
2301 return true;
2302 }
2303
2304 static bool
emit_shift(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)2305 emit_shift(struct ntd_context *ctx, nir_alu_instr *alu,
2306 enum dxil_bin_opcode opcode,
2307 const struct dxil_value *op0, const struct dxil_value *op1)
2308 {
2309 unsigned op0_bit_size = nir_src_bit_size(alu->src[0].src);
2310 unsigned op1_bit_size = nir_src_bit_size(alu->src[1].src);
2311
2312 uint64_t shift_mask = op0_bit_size - 1;
2313 if (!nir_src_is_const(alu->src[1].src)) {
2314 if (op0_bit_size != op1_bit_size) {
2315 const struct dxil_type *type =
2316 dxil_module_get_int_type(&ctx->mod, op0_bit_size);
2317 enum dxil_cast_opcode cast_op =
2318 op1_bit_size < op0_bit_size ? DXIL_CAST_ZEXT : DXIL_CAST_TRUNC;
2319 op1 = dxil_emit_cast(&ctx->mod, cast_op, type, op1);
2320 }
2321 op1 = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND,
2322 op1,
2323 dxil_module_get_int_const(&ctx->mod, shift_mask, op0_bit_size),
2324 0);
2325 } else {
2326 uint64_t val = nir_scalar_as_uint(
2327 nir_scalar_chase_alu_src(nir_get_scalar(&alu->def, 0), 1));
2328 op1 = dxil_module_get_int_const(&ctx->mod, val & shift_mask, op0_bit_size);
2329 }
2330
2331 const struct dxil_value *v =
2332 dxil_emit_binop(&ctx->mod, opcode, op0, op1, 0);
2333 if (!v)
2334 return false;
2335 store_alu_dest(ctx, alu, 0, v);
2336 return true;
2337 }
2338
2339 static bool
emit_cmp(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_cmp_pred pred,const struct dxil_value * op0,const struct dxil_value * op1)2340 emit_cmp(struct ntd_context *ctx, nir_alu_instr *alu,
2341 enum dxil_cmp_pred pred,
2342 const struct dxil_value *op0, const struct dxil_value *op1)
2343 {
2344 const struct dxil_value *v = dxil_emit_cmp(&ctx->mod, pred, op0, op1);
2345 if (!v)
2346 return false;
2347 store_alu_dest(ctx, alu, 0, v);
2348 return true;
2349 }
2350
2351 static enum dxil_cast_opcode
get_cast_op(nir_alu_instr * alu)2352 get_cast_op(nir_alu_instr *alu)
2353 {
2354 unsigned dst_bits = alu->def.bit_size;
2355 unsigned src_bits = nir_src_bit_size(alu->src[0].src);
2356
2357 switch (alu->op) {
2358 /* bool -> int */
2359 case nir_op_b2i16:
2360 case nir_op_b2i32:
2361 case nir_op_b2i64:
2362 return DXIL_CAST_ZEXT;
2363
2364 /* float -> float */
2365 case nir_op_f2f16_rtz:
2366 case nir_op_f2f16:
2367 case nir_op_f2fmp:
2368 case nir_op_f2f32:
2369 case nir_op_f2f64:
2370 assert(dst_bits != src_bits);
2371 if (dst_bits < src_bits)
2372 return DXIL_CAST_FPTRUNC;
2373 else
2374 return DXIL_CAST_FPEXT;
2375
2376 /* int -> int */
2377 case nir_op_i2i1:
2378 case nir_op_i2i16:
2379 case nir_op_i2imp:
2380 case nir_op_i2i32:
2381 case nir_op_i2i64:
2382 assert(dst_bits != src_bits);
2383 if (dst_bits < src_bits)
2384 return DXIL_CAST_TRUNC;
2385 else
2386 return DXIL_CAST_SEXT;
2387
2388 /* uint -> uint */
2389 case nir_op_u2u1:
2390 case nir_op_u2u16:
2391 case nir_op_u2u32:
2392 case nir_op_u2u64:
2393 assert(dst_bits != src_bits);
2394 if (dst_bits < src_bits)
2395 return DXIL_CAST_TRUNC;
2396 else
2397 return DXIL_CAST_ZEXT;
2398
2399 /* float -> int */
2400 case nir_op_f2i16:
2401 case nir_op_f2imp:
2402 case nir_op_f2i32:
2403 case nir_op_f2i64:
2404 return DXIL_CAST_FPTOSI;
2405
2406 /* float -> uint */
2407 case nir_op_f2u16:
2408 case nir_op_f2ump:
2409 case nir_op_f2u32:
2410 case nir_op_f2u64:
2411 return DXIL_CAST_FPTOUI;
2412
2413 /* int -> float */
2414 case nir_op_i2f16:
2415 case nir_op_i2fmp:
2416 case nir_op_i2f32:
2417 case nir_op_i2f64:
2418 return DXIL_CAST_SITOFP;
2419
2420 /* uint -> float */
2421 case nir_op_u2f16:
2422 case nir_op_u2fmp:
2423 case nir_op_u2f32:
2424 case nir_op_u2f64:
2425 return DXIL_CAST_UITOFP;
2426
2427 default:
2428 unreachable("unexpected cast op");
2429 }
2430 }
2431
2432 static const struct dxil_type *
get_cast_dest_type(struct ntd_context * ctx,nir_alu_instr * alu)2433 get_cast_dest_type(struct ntd_context *ctx, nir_alu_instr *alu)
2434 {
2435 unsigned dst_bits = alu->def.bit_size;
2436 switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type)) {
2437 case nir_type_bool:
2438 assert(dst_bits == 1);
2439 FALLTHROUGH;
2440 case nir_type_int:
2441 case nir_type_uint:
2442 return dxil_module_get_int_type(&ctx->mod, dst_bits);
2443
2444 case nir_type_float:
2445 return dxil_module_get_float_type(&ctx->mod, dst_bits);
2446
2447 default:
2448 unreachable("unknown nir_alu_type");
2449 }
2450 }
2451
2452 static bool
is_double(nir_alu_type alu_type,unsigned bit_size)2453 is_double(nir_alu_type alu_type, unsigned bit_size)
2454 {
2455 return nir_alu_type_get_base_type(alu_type) == nir_type_float &&
2456 bit_size == 64;
2457 }
2458
2459 static bool
emit_cast(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * value)2460 emit_cast(struct ntd_context *ctx, nir_alu_instr *alu,
2461 const struct dxil_value *value)
2462 {
2463 enum dxil_cast_opcode opcode = get_cast_op(alu);
2464 const struct dxil_type *type = get_cast_dest_type(ctx, alu);
2465 if (!type)
2466 return false;
2467
2468 const nir_op_info *info = &nir_op_infos[alu->op];
2469 switch (opcode) {
2470 case DXIL_CAST_UITOFP:
2471 case DXIL_CAST_SITOFP:
2472 if (is_double(info->output_type, alu->def.bit_size))
2473 ctx->mod.feats.dx11_1_double_extensions = true;
2474 break;
2475 case DXIL_CAST_FPTOUI:
2476 case DXIL_CAST_FPTOSI:
2477 if (is_double(info->input_types[0], nir_src_bit_size(alu->src[0].src)))
2478 ctx->mod.feats.dx11_1_double_extensions = true;
2479 break;
2480 default:
2481 break;
2482 }
2483
2484 if (alu->def.bit_size == 16) {
2485 switch (alu->op) {
2486 case nir_op_f2fmp:
2487 case nir_op_i2imp:
2488 case nir_op_f2imp:
2489 case nir_op_f2ump:
2490 case nir_op_i2fmp:
2491 case nir_op_u2fmp:
2492 break;
2493 default:
2494 ctx->mod.feats.native_low_precision = true;
2495 }
2496 }
2497
2498 const struct dxil_value *v = dxil_emit_cast(&ctx->mod, opcode, type,
2499 value);
2500 if (!v)
2501 return false;
2502 store_alu_dest(ctx, alu, 0, v);
2503 return true;
2504 }
2505
2506 static enum overload_type
get_overload(nir_alu_type alu_type,unsigned bit_size)2507 get_overload(nir_alu_type alu_type, unsigned bit_size)
2508 {
2509 switch (nir_alu_type_get_base_type(alu_type)) {
2510 case nir_type_int:
2511 case nir_type_uint:
2512 switch (bit_size) {
2513 case 1: return DXIL_I1;
2514 case 16: return DXIL_I16;
2515 case 32: return DXIL_I32;
2516 case 64: return DXIL_I64;
2517 default:
2518 unreachable("unexpected bit_size");
2519 }
2520 case nir_type_float:
2521 switch (bit_size) {
2522 case 16: return DXIL_F16;
2523 case 32: return DXIL_F32;
2524 case 64: return DXIL_F64;
2525 default:
2526 unreachable("unexpected bit_size");
2527 }
2528 case nir_type_invalid:
2529 return DXIL_NONE;
2530 default:
2531 unreachable("unexpected output type");
2532 }
2533 }
2534
2535 static enum overload_type
get_ambiguous_overload(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum overload_type default_type)2536 get_ambiguous_overload(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2537 enum overload_type default_type)
2538 {
2539 if (BITSET_TEST(ctx->int_types, intr->def.index))
2540 return get_overload(nir_type_int, intr->def.bit_size);
2541 if (BITSET_TEST(ctx->float_types, intr->def.index))
2542 return get_overload(nir_type_float, intr->def.bit_size);
2543 return default_type;
2544 }
2545
2546 static enum overload_type
get_ambiguous_overload_alu_type(struct ntd_context * ctx,nir_intrinsic_instr * intr,nir_alu_type alu_type)2547 get_ambiguous_overload_alu_type(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2548 nir_alu_type alu_type)
2549 {
2550 return get_ambiguous_overload(ctx, intr, get_overload(alu_type, intr->def.bit_size));
2551 }
2552
2553 static bool
emit_unary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op)2554 emit_unary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2555 enum dxil_intr intr, const struct dxil_value *op)
2556 {
2557 const nir_op_info *info = &nir_op_infos[alu->op];
2558 unsigned src_bits = nir_src_bit_size(alu->src[0].src);
2559 enum overload_type overload = get_overload(info->input_types[0], src_bits);
2560
2561 const struct dxil_value *v = emit_unary_call(ctx, overload, intr, op);
2562 if (!v)
2563 return false;
2564 store_alu_dest(ctx, alu, 0, v);
2565 return true;
2566 }
2567
2568 static bool
emit_binary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)2569 emit_binary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2570 enum dxil_intr intr,
2571 const struct dxil_value *op0, const struct dxil_value *op1)
2572 {
2573 const nir_op_info *info = &nir_op_infos[alu->op];
2574 assert(info->output_type == info->input_types[0]);
2575 assert(info->output_type == info->input_types[1]);
2576 unsigned dst_bits = alu->def.bit_size;
2577 assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2578 assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2579 enum overload_type overload = get_overload(info->output_type, dst_bits);
2580
2581 const struct dxil_value *v = emit_binary_call(ctx, overload, intr,
2582 op0, op1);
2583 if (!v)
2584 return false;
2585 store_alu_dest(ctx, alu, 0, v);
2586 return true;
2587 }
2588
2589 static bool
emit_tertiary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)2590 emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2591 enum dxil_intr intr,
2592 const struct dxil_value *op0,
2593 const struct dxil_value *op1,
2594 const struct dxil_value *op2)
2595 {
2596 const nir_op_info *info = &nir_op_infos[alu->op];
2597 unsigned dst_bits = alu->def.bit_size;
2598 assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2599 assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2600 assert(nir_src_bit_size(alu->src[2].src) == dst_bits);
2601
2602 assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[0], dst_bits));
2603 assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[1], dst_bits));
2604 assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[2], dst_bits));
2605
2606 enum overload_type overload = get_overload(info->output_type, dst_bits);
2607
2608 const struct dxil_value *v = emit_tertiary_call(ctx, overload, intr,
2609 op0, op1, op2);
2610 if (!v)
2611 return false;
2612 store_alu_dest(ctx, alu, 0, v);
2613 return true;
2614 }
2615
2616 static bool
emit_bitfield_insert(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * base,const struct dxil_value * insert,const struct dxil_value * offset,const struct dxil_value * width)2617 emit_bitfield_insert(struct ntd_context *ctx, nir_alu_instr *alu,
2618 const struct dxil_value *base,
2619 const struct dxil_value *insert,
2620 const struct dxil_value *offset,
2621 const struct dxil_value *width)
2622 {
2623 /* DXIL is width, offset, insert, base, NIR is base, insert, offset, width */
2624 const struct dxil_value *v = emit_quaternary_call(ctx, DXIL_I32, DXIL_INTR_BFI,
2625 width, offset, insert, base);
2626 if (!v)
2627 return false;
2628
2629 /* DXIL uses the 5 LSB from width/offset. Special-case width >= 32 == copy insert. */
2630 const struct dxil_value *compare_width = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_SGE,
2631 width, dxil_module_get_int32_const(&ctx->mod, 32));
2632 v = dxil_emit_select(&ctx->mod, compare_width, insert, v);
2633 store_alu_dest(ctx, alu, 0, v);
2634 return true;
2635 }
2636
2637 static bool
emit_dot4add_packed(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * src0,const struct dxil_value * src1,const struct dxil_value * accum)2638 emit_dot4add_packed(struct ntd_context *ctx, nir_alu_instr *alu,
2639 enum dxil_intr intr,
2640 const struct dxil_value *src0,
2641 const struct dxil_value *src1,
2642 const struct dxil_value *accum)
2643 {
2644 const struct dxil_func *f = dxil_get_function(&ctx->mod, "dx.op.dot4AddPacked", DXIL_I32);
2645 if (!f)
2646 return false;
2647 const struct dxil_value *srcs[] = { dxil_module_get_int32_const(&ctx->mod, intr), accum, src0, src1 };
2648 const struct dxil_value *v = dxil_emit_call(&ctx->mod, f, srcs, ARRAY_SIZE(srcs));
2649 if (!v)
2650 return false;
2651
2652 store_alu_dest(ctx, alu, 0, v);
2653 return true;
2654 }
2655
emit_select(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * sel,const struct dxil_value * val_true,const struct dxil_value * val_false)2656 static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu,
2657 const struct dxil_value *sel,
2658 const struct dxil_value *val_true,
2659 const struct dxil_value *val_false)
2660 {
2661 assert(sel);
2662 assert(val_true);
2663 assert(val_false);
2664
2665 const struct dxil_value *v = dxil_emit_select(&ctx->mod, sel, val_true, val_false);
2666 if (!v)
2667 return false;
2668
2669 store_alu_dest(ctx, alu, 0, v);
2670 return true;
2671 }
2672
2673 static bool
emit_b2f16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2674 emit_b2f16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2675 {
2676 assert(val);
2677
2678 struct dxil_module *m = &ctx->mod;
2679
2680 const struct dxil_value *c1 = dxil_module_get_float16_const(m, 0x3C00);
2681 const struct dxil_value *c0 = dxil_module_get_float16_const(m, 0);
2682
2683 if (!c0 || !c1)
2684 return false;
2685
2686 return emit_select(ctx, alu, val, c1, c0);
2687 }
2688
2689 static bool
emit_b2f32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2690 emit_b2f32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2691 {
2692 assert(val);
2693
2694 struct dxil_module *m = &ctx->mod;
2695
2696 const struct dxil_value *c1 = dxil_module_get_float_const(m, 1.0f);
2697 const struct dxil_value *c0 = dxil_module_get_float_const(m, 0.0f);
2698
2699 if (!c0 || !c1)
2700 return false;
2701
2702 return emit_select(ctx, alu, val, c1, c0);
2703 }
2704
2705 static bool
emit_b2f64(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2706 emit_b2f64(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2707 {
2708 assert(val);
2709
2710 struct dxil_module *m = &ctx->mod;
2711
2712 const struct dxil_value *c1 = dxil_module_get_double_const(m, 1.0);
2713 const struct dxil_value *c0 = dxil_module_get_double_const(m, 0.0);
2714
2715 if (!c0 || !c1)
2716 return false;
2717
2718 ctx->mod.feats.doubles = 1;
2719 return emit_select(ctx, alu, val, c1, c0);
2720 }
2721
2722 static bool
emit_f16tof32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val,bool shift)2723 emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val, bool shift)
2724 {
2725 if (shift) {
2726 val = dxil_emit_binop(&ctx->mod, DXIL_BINOP_LSHR, val,
2727 dxil_module_get_int32_const(&ctx->mod, 16), 0);
2728 if (!val)
2729 return false;
2730 }
2731
2732 const struct dxil_func *func = dxil_get_function(&ctx->mod,
2733 "dx.op.legacyF16ToF32",
2734 DXIL_NONE);
2735 if (!func)
2736 return false;
2737
2738 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F16TOF32);
2739 if (!opcode)
2740 return false;
2741
2742 const struct dxil_value *args[] = {
2743 opcode,
2744 val
2745 };
2746
2747 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2748 if (!v)
2749 return false;
2750 store_alu_dest(ctx, alu, 0, v);
2751 return true;
2752 }
2753
2754 static bool
emit_f32tof16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val0,const struct dxil_value * val1)2755 emit_f32tof16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val0, const struct dxil_value *val1)
2756 {
2757 const struct dxil_func *func = dxil_get_function(&ctx->mod,
2758 "dx.op.legacyF32ToF16",
2759 DXIL_NONE);
2760 if (!func)
2761 return false;
2762
2763 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F32TOF16);
2764 if (!opcode)
2765 return false;
2766
2767 const struct dxil_value *args[] = {
2768 opcode,
2769 val0
2770 };
2771
2772 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2773 if (!v)
2774 return false;
2775
2776 if (!nir_src_is_const(alu->src[1].src) || nir_src_as_int(alu->src[1].src) != 0) {
2777 args[1] = val1;
2778 const struct dxil_value *v_high = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2779 if (!v_high)
2780 return false;
2781
2782 v_high = dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL, v_high,
2783 dxil_module_get_int32_const(&ctx->mod, 16), 0);
2784 if (!v_high)
2785 return false;
2786
2787 v = dxil_emit_binop(&ctx->mod, DXIL_BINOP_OR, v, v_high, 0);
2788 if (!v)
2789 return false;
2790 }
2791
2792 store_alu_dest(ctx, alu, 0, v);
2793 return true;
2794 }
2795
2796 static bool
emit_vec(struct ntd_context * ctx,nir_alu_instr * alu,unsigned num_inputs)2797 emit_vec(struct ntd_context *ctx, nir_alu_instr *alu, unsigned num_inputs)
2798 {
2799 for (unsigned i = 0; i < num_inputs; i++) {
2800 const struct dxil_value *src =
2801 get_src_ssa(ctx, alu->src[i].src.ssa, alu->src[i].swizzle[0]);
2802 if (!src)
2803 return false;
2804
2805 store_alu_dest(ctx, alu, i, src);
2806 }
2807 return true;
2808 }
2809
2810 static bool
emit_make_double(struct ntd_context * ctx,nir_alu_instr * alu)2811 emit_make_double(struct ntd_context *ctx, nir_alu_instr *alu)
2812 {
2813 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.makeDouble", DXIL_F64);
2814 if (!func)
2815 return false;
2816
2817 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_MAKE_DOUBLE);
2818 if (!opcode)
2819 return false;
2820
2821 const struct dxil_value *args[3] = {
2822 opcode,
2823 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_uint32),
2824 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[1], nir_type_uint32),
2825 };
2826 if (!args[1] || !args[2])
2827 return false;
2828
2829 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2830 if (!v)
2831 return false;
2832 store_def(ctx, &alu->def, 0, v);
2833 return true;
2834 }
2835
2836 static bool
emit_split_double(struct ntd_context * ctx,nir_alu_instr * alu)2837 emit_split_double(struct ntd_context *ctx, nir_alu_instr *alu)
2838 {
2839 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.splitDouble", DXIL_F64);
2840 if (!func)
2841 return false;
2842
2843 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SPLIT_DOUBLE);
2844 if (!opcode)
2845 return false;
2846
2847 const struct dxil_value *args[] = {
2848 opcode,
2849 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_float64)
2850 };
2851 if (!args[1])
2852 return false;
2853
2854 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2855 if (!v)
2856 return false;
2857
2858 const struct dxil_value *hi = dxil_emit_extractval(&ctx->mod, v, 0);
2859 const struct dxil_value *lo = dxil_emit_extractval(&ctx->mod, v, 1);
2860 if (!hi || !lo)
2861 return false;
2862
2863 store_def(ctx, &alu->def, 0, hi);
2864 store_def(ctx, &alu->def, 1, lo);
2865 return true;
2866 }
2867
2868 static bool
emit_alu(struct ntd_context * ctx,nir_alu_instr * alu)2869 emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
2870 {
2871 /* handle vec-instructions first; they are the only ones that produce
2872 * vector results.
2873 */
2874 switch (alu->op) {
2875 case nir_op_vec2:
2876 case nir_op_vec3:
2877 case nir_op_vec4:
2878 case nir_op_vec8:
2879 case nir_op_vec16:
2880 return emit_vec(ctx, alu, nir_op_infos[alu->op].num_inputs);
2881 case nir_op_mov: {
2882 assert(alu->def.num_components == 1);
2883 store_ssa_def(ctx, &alu->def, 0, get_src_ssa(ctx,
2884 alu->src->src.ssa, alu->src->swizzle[0]));
2885 return true;
2886 }
2887 case nir_op_pack_double_2x32_dxil:
2888 return emit_make_double(ctx, alu);
2889 case nir_op_unpack_double_2x32_dxil:
2890 return emit_split_double(ctx, alu);
2891 case nir_op_bcsel: {
2892 /* Handled here to avoid type forced bitcast to int, since bcsel is used for ints and floats.
2893 * Ideally, the back-typing got both sources to match, but if it didn't, explicitly get src1's type */
2894 const struct dxil_value *src1 = get_src_ssa(ctx, alu->src[1].src.ssa, alu->src[1].swizzle[0]);
2895 nir_alu_type src1_type = dxil_type_to_nir_type(dxil_value_get_type(src1));
2896 return emit_select(ctx, alu,
2897 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_bool),
2898 src1,
2899 get_src(ctx, &alu->src[2].src, alu->src[2].swizzle[0], src1_type));
2900 }
2901 default:
2902 /* silence warnings */
2903 ;
2904 }
2905
2906 /* other ops should be scalar */
2907 const struct dxil_value *src[4];
2908 assert(nir_op_infos[alu->op].num_inputs <= 4);
2909 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
2910 src[i] = get_alu_src(ctx, alu, i);
2911 if (!src[i])
2912 return false;
2913 }
2914
2915 switch (alu->op) {
2916 case nir_op_iadd:
2917 case nir_op_fadd: return emit_binop(ctx, alu, DXIL_BINOP_ADD, src[0], src[1]);
2918
2919 case nir_op_isub:
2920 case nir_op_fsub: return emit_binop(ctx, alu, DXIL_BINOP_SUB, src[0], src[1]);
2921
2922 case nir_op_imul:
2923 case nir_op_fmul: return emit_binop(ctx, alu, DXIL_BINOP_MUL, src[0], src[1]);
2924
2925 case nir_op_fdiv:
2926 if (alu->def.bit_size == 64)
2927 ctx->mod.feats.dx11_1_double_extensions = 1;
2928 return emit_binop(ctx, alu, DXIL_BINOP_SDIV, src[0], src[1]);
2929
2930 case nir_op_idiv:
2931 case nir_op_udiv:
2932 if (nir_src_is_const(alu->src[1].src)) {
2933 /* It's illegal to emit a literal divide by 0 in DXIL */
2934 nir_scalar divisor = nir_scalar_chase_alu_src(nir_get_scalar(&alu->def, 0), 1);
2935 if (nir_scalar_as_int(divisor) == 0) {
2936 store_alu_dest(ctx, alu, 0,
2937 dxil_module_get_int_const(&ctx->mod, 0, alu->def.bit_size));
2938 return true;
2939 }
2940 }
2941 return emit_binop(ctx, alu, alu->op == nir_op_idiv ? DXIL_BINOP_SDIV : DXIL_BINOP_UDIV, src[0], src[1]);
2942
2943 case nir_op_irem: return emit_binop(ctx, alu, DXIL_BINOP_SREM, src[0], src[1]);
2944 case nir_op_imod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2945 case nir_op_umod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2946 case nir_op_ishl: return emit_shift(ctx, alu, DXIL_BINOP_SHL, src[0], src[1]);
2947 case nir_op_ishr: return emit_shift(ctx, alu, DXIL_BINOP_ASHR, src[0], src[1]);
2948 case nir_op_ushr: return emit_shift(ctx, alu, DXIL_BINOP_LSHR, src[0], src[1]);
2949 case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]);
2950 case nir_op_ior: return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]);
2951 case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]);
2952 case nir_op_inot: {
2953 unsigned bit_size = alu->def.bit_size;
2954 intmax_t val = bit_size == 1 ? 1 : -1;
2955 const struct dxil_value *negative_one = dxil_module_get_int_const(&ctx->mod, val, bit_size);
2956 return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], negative_one);
2957 }
2958 case nir_op_ieq: return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]);
2959 case nir_op_ine: return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]);
2960 case nir_op_ige: return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]);
2961 case nir_op_uge: return emit_cmp(ctx, alu, DXIL_ICMP_UGE, src[0], src[1]);
2962 case nir_op_ilt: return emit_cmp(ctx, alu, DXIL_ICMP_SLT, src[0], src[1]);
2963 case nir_op_ult: return emit_cmp(ctx, alu, DXIL_ICMP_ULT, src[0], src[1]);
2964 case nir_op_feq: return emit_cmp(ctx, alu, DXIL_FCMP_OEQ, src[0], src[1]);
2965 case nir_op_fneu: return emit_cmp(ctx, alu, DXIL_FCMP_UNE, src[0], src[1]);
2966 case nir_op_flt: return emit_cmp(ctx, alu, DXIL_FCMP_OLT, src[0], src[1]);
2967 case nir_op_fge: return emit_cmp(ctx, alu, DXIL_FCMP_OGE, src[0], src[1]);
2968 case nir_op_ftrunc: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_Z, src[0]);
2969 case nir_op_fabs: return emit_unary_intin(ctx, alu, DXIL_INTR_FABS, src[0]);
2970 case nir_op_fcos: return emit_unary_intin(ctx, alu, DXIL_INTR_FCOS, src[0]);
2971 case nir_op_fsin: return emit_unary_intin(ctx, alu, DXIL_INTR_FSIN, src[0]);
2972 case nir_op_fceil: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_PI, src[0]);
2973 case nir_op_fexp2: return emit_unary_intin(ctx, alu, DXIL_INTR_FEXP2, src[0]);
2974 case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]);
2975 case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]);
2976 case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]);
2977 case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]);
2978 case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]);
2979
2980 case nir_op_fddx:
2981 case nir_op_fddx_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_COARSE, src[0]);
2982 case nir_op_fddx_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_FINE, src[0]);
2983 case nir_op_fddy:
2984 case nir_op_fddy_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_COARSE, src[0]);
2985 case nir_op_fddy_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_FINE, src[0]);
2986
2987 case nir_op_fround_even: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NE, src[0]);
2988 case nir_op_frcp: {
2989 const struct dxil_value *one;
2990 switch (alu->def.bit_size) {
2991 case 16:
2992 one = dxil_module_get_float16_const(&ctx->mod, 0x3C00);
2993 break;
2994 case 32:
2995 one = dxil_module_get_float_const(&ctx->mod, 1.0f);
2996 break;
2997 case 64:
2998 one = dxil_module_get_double_const(&ctx->mod, 1.0);
2999 break;
3000 default: unreachable("Invalid float size");
3001 }
3002 return emit_binop(ctx, alu, DXIL_BINOP_SDIV, one, src[0]);
3003 }
3004 case nir_op_fsat: return emit_unary_intin(ctx, alu, DXIL_INTR_SATURATE, src[0]);
3005 case nir_op_bit_count: return emit_unary_intin(ctx, alu, DXIL_INTR_COUNTBITS, src[0]);
3006 case nir_op_bitfield_reverse: return emit_unary_intin(ctx, alu, DXIL_INTR_BFREV, src[0]);
3007 case nir_op_ufind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_HI, src[0]);
3008 case nir_op_ifind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_SHI, src[0]);
3009 case nir_op_find_lsb: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_LO, src[0]);
3010 case nir_op_imax: return emit_binary_intin(ctx, alu, DXIL_INTR_IMAX, src[0], src[1]);
3011 case nir_op_imin: return emit_binary_intin(ctx, alu, DXIL_INTR_IMIN, src[0], src[1]);
3012 case nir_op_umax: return emit_binary_intin(ctx, alu, DXIL_INTR_UMAX, src[0], src[1]);
3013 case nir_op_umin: return emit_binary_intin(ctx, alu, DXIL_INTR_UMIN, src[0], src[1]);
3014 case nir_op_frsq: return emit_unary_intin(ctx, alu, DXIL_INTR_RSQRT, src[0]);
3015 case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]);
3016 case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]);
3017 case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]);
3018 case nir_op_ffma:
3019 if (alu->def.bit_size == 64)
3020 ctx->mod.feats.dx11_1_double_extensions = 1;
3021 return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]);
3022
3023 case nir_op_ibfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_IBFE, src[2], src[1], src[0]);
3024 case nir_op_ubfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_UBFE, src[2], src[1], src[0]);
3025 case nir_op_bitfield_insert: return emit_bitfield_insert(ctx, alu, src[0], src[1], src[2], src[3]);
3026
3027 case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0], false);
3028 case nir_op_unpack_half_2x16_split_y: return emit_f16tof32(ctx, alu, src[0], true);
3029 case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0], src[1]);
3030
3031 case nir_op_sdot_4x8_iadd: return emit_dot4add_packed(ctx, alu, DXIL_INTR_DOT4_ADD_I8_PACKED, src[0], src[1], src[2]);
3032 case nir_op_udot_4x8_uadd: return emit_dot4add_packed(ctx, alu, DXIL_INTR_DOT4_ADD_U8_PACKED, src[0], src[1], src[2]);
3033
3034 case nir_op_i2i1:
3035 case nir_op_u2u1:
3036 case nir_op_b2i16:
3037 case nir_op_i2i16:
3038 case nir_op_i2imp:
3039 case nir_op_f2i16:
3040 case nir_op_f2imp:
3041 case nir_op_f2u16:
3042 case nir_op_f2ump:
3043 case nir_op_u2u16:
3044 case nir_op_u2f16:
3045 case nir_op_u2fmp:
3046 case nir_op_i2f16:
3047 case nir_op_i2fmp:
3048 case nir_op_f2f16_rtz:
3049 case nir_op_f2f16:
3050 case nir_op_f2fmp:
3051 case nir_op_b2i32:
3052 case nir_op_f2f32:
3053 case nir_op_f2i32:
3054 case nir_op_f2u32:
3055 case nir_op_i2f32:
3056 case nir_op_i2i32:
3057 case nir_op_u2f32:
3058 case nir_op_u2u32:
3059 case nir_op_b2i64:
3060 case nir_op_f2f64:
3061 case nir_op_f2i64:
3062 case nir_op_f2u64:
3063 case nir_op_i2f64:
3064 case nir_op_i2i64:
3065 case nir_op_u2f64:
3066 case nir_op_u2u64:
3067 return emit_cast(ctx, alu, src[0]);
3068
3069 case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]);
3070 case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]);
3071 case nir_op_b2f64: return emit_b2f64(ctx, alu, src[0]);
3072 default:
3073 log_nir_instr_unsupported(ctx->logger, "Unimplemented ALU instruction",
3074 &alu->instr);
3075 return false;
3076 }
3077 }
3078
3079 static const struct dxil_value *
load_ubo(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * offset,enum overload_type overload)3080 load_ubo(struct ntd_context *ctx, const struct dxil_value *handle,
3081 const struct dxil_value *offset, enum overload_type overload)
3082 {
3083 assert(handle && offset);
3084
3085 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CBUFFER_LOAD_LEGACY);
3086 if (!opcode)
3087 return NULL;
3088
3089 const struct dxil_value *args[] = {
3090 opcode, handle, offset
3091 };
3092
3093 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cbufferLoadLegacy", overload);
3094 if (!func)
3095 return NULL;
3096 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3097 }
3098
3099 static bool
emit_barrier_impl(struct ntd_context * ctx,nir_variable_mode modes,mesa_scope execution_scope,mesa_scope mem_scope)3100 emit_barrier_impl(struct ntd_context *ctx, nir_variable_mode modes, mesa_scope execution_scope, mesa_scope mem_scope)
3101 {
3102 const struct dxil_value *opcode, *mode;
3103 const struct dxil_func *func;
3104 uint32_t flags = 0;
3105
3106 if (execution_scope == SCOPE_WORKGROUP)
3107 flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP;
3108
3109 bool is_compute = ctx->mod.shader_kind == DXIL_COMPUTE_SHADER;
3110
3111 if ((modes & (nir_var_mem_ssbo | nir_var_mem_global | nir_var_image)) &&
3112 (mem_scope > SCOPE_WORKGROUP || !is_compute)) {
3113 flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL;
3114 } else {
3115 flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP;
3116 }
3117
3118 if ((modes & nir_var_mem_shared) && is_compute)
3119 flags |= DXIL_BARRIER_MODE_GROUPSHARED_MEM_FENCE;
3120
3121 func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE);
3122 if (!func)
3123 return false;
3124
3125 opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER);
3126 if (!opcode)
3127 return false;
3128
3129 mode = dxil_module_get_int32_const(&ctx->mod, flags);
3130 if (!mode)
3131 return false;
3132
3133 const struct dxil_value *args[] = { opcode, mode };
3134
3135 return dxil_emit_call_void(&ctx->mod, func,
3136 args, ARRAY_SIZE(args));
3137 }
3138
3139 static bool
emit_barrier(struct ntd_context * ctx,nir_intrinsic_instr * intr)3140 emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3141 {
3142 return emit_barrier_impl(ctx,
3143 nir_intrinsic_memory_modes(intr),
3144 nir_intrinsic_execution_scope(intr),
3145 nir_intrinsic_memory_scope(intr));
3146 }
3147
3148 static bool
emit_load_global_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3149 emit_load_global_invocation_id(struct ntd_context *ctx,
3150 nir_intrinsic_instr *intr)
3151 {
3152 nir_component_mask_t comps = nir_def_components_read(&intr->def);
3153
3154 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3155 if (comps & (1 << i)) {
3156 const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
3157 if (!idx)
3158 return false;
3159 const struct dxil_value *globalid = emit_threadid_call(ctx, idx);
3160
3161 if (!globalid)
3162 return false;
3163
3164 store_def(ctx, &intr->def, i, globalid);
3165 }
3166 }
3167 return true;
3168 }
3169
3170 static bool
emit_load_local_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3171 emit_load_local_invocation_id(struct ntd_context *ctx,
3172 nir_intrinsic_instr *intr)
3173 {
3174 nir_component_mask_t comps = nir_def_components_read(&intr->def);
3175
3176 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3177 if (comps & (1 << i)) {
3178 const struct dxil_value
3179 *idx = dxil_module_get_int32_const(&ctx->mod, i);
3180 if (!idx)
3181 return false;
3182 const struct dxil_value
3183 *threadidingroup = emit_threadidingroup_call(ctx, idx);
3184 if (!threadidingroup)
3185 return false;
3186 store_def(ctx, &intr->def, i, threadidingroup);
3187 }
3188 }
3189 return true;
3190 }
3191
3192 static bool
emit_load_local_invocation_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)3193 emit_load_local_invocation_index(struct ntd_context *ctx,
3194 nir_intrinsic_instr *intr)
3195 {
3196 const struct dxil_value
3197 *flattenedthreadidingroup = emit_flattenedthreadidingroup_call(ctx);
3198 if (!flattenedthreadidingroup)
3199 return false;
3200 store_def(ctx, &intr->def, 0, flattenedthreadidingroup);
3201
3202 return true;
3203 }
3204
3205 static bool
emit_load_local_workgroup_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3206 emit_load_local_workgroup_id(struct ntd_context *ctx,
3207 nir_intrinsic_instr *intr)
3208 {
3209 nir_component_mask_t comps = nir_def_components_read(&intr->def);
3210
3211 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3212 if (comps & (1 << i)) {
3213 const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
3214 if (!idx)
3215 return false;
3216 const struct dxil_value *groupid = emit_groupid_call(ctx, idx);
3217 if (!groupid)
3218 return false;
3219 store_def(ctx, &intr->def, i, groupid);
3220 }
3221 }
3222 return true;
3223 }
3224
3225 static const struct dxil_value *
call_unary_external_function(struct ntd_context * ctx,const char * name,int32_t dxil_intr,enum overload_type overload)3226 call_unary_external_function(struct ntd_context *ctx,
3227 const char *name,
3228 int32_t dxil_intr,
3229 enum overload_type overload)
3230 {
3231 const struct dxil_func *func =
3232 dxil_get_function(&ctx->mod, name, overload);
3233 if (!func)
3234 return false;
3235
3236 const struct dxil_value *opcode =
3237 dxil_module_get_int32_const(&ctx->mod, dxil_intr);
3238 if (!opcode)
3239 return false;
3240
3241 const struct dxil_value *args[] = {opcode};
3242
3243 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3244 }
3245
3246 static bool
emit_load_unary_external_function(struct ntd_context * ctx,nir_intrinsic_instr * intr,const char * name,int32_t dxil_intr,nir_alu_type type)3247 emit_load_unary_external_function(struct ntd_context *ctx,
3248 nir_intrinsic_instr *intr, const char *name,
3249 int32_t dxil_intr,
3250 nir_alu_type type)
3251 {
3252 const struct dxil_value *value = call_unary_external_function(ctx, name, dxil_intr,
3253 get_overload(type, intr->def.bit_size));
3254 store_def(ctx, &intr->def, 0, value);
3255
3256 return true;
3257 }
3258
3259 static bool
emit_load_sample_mask_in(struct ntd_context * ctx,nir_intrinsic_instr * intr)3260 emit_load_sample_mask_in(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3261 {
3262 const struct dxil_value *value = call_unary_external_function(ctx,
3263 "dx.op.coverage", DXIL_INTR_COVERAGE, DXIL_I32);
3264
3265 /* Mask coverage with (1 << sample index). Note, done as an AND to handle extrapolation cases. */
3266 if (ctx->mod.info.has_per_sample_input) {
3267 value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND, value,
3268 dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL,
3269 dxil_module_get_int32_const(&ctx->mod, 1),
3270 call_unary_external_function(ctx, "dx.op.sampleIndex", DXIL_INTR_SAMPLE_INDEX, DXIL_I32), 0), 0);
3271 }
3272
3273 store_def(ctx, &intr->def, 0, value);
3274 return true;
3275 }
3276
3277 static bool
emit_load_tess_coord(struct ntd_context * ctx,nir_intrinsic_instr * intr)3278 emit_load_tess_coord(struct ntd_context *ctx,
3279 nir_intrinsic_instr *intr)
3280 {
3281 const struct dxil_func *func =
3282 dxil_get_function(&ctx->mod, "dx.op.domainLocation", DXIL_F32);
3283 if (!func)
3284 return false;
3285
3286 const struct dxil_value *opcode =
3287 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DOMAIN_LOCATION);
3288 if (!opcode)
3289 return false;
3290
3291 unsigned num_coords = ctx->shader->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES ? 3 : 2;
3292 for (unsigned i = 0; i < num_coords; ++i) {
3293 unsigned component_idx = i;
3294
3295 const struct dxil_value *component = dxil_module_get_int32_const(&ctx->mod, component_idx);
3296 if (!component)
3297 return false;
3298
3299 const struct dxil_value *args[] = { opcode, component };
3300
3301 const struct dxil_value *value =
3302 dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3303 store_def(ctx, &intr->def, i, value);
3304 }
3305
3306 for (unsigned i = num_coords; i < intr->def.num_components; ++i) {
3307 const struct dxil_value *value = dxil_module_get_float_const(&ctx->mod, 0.0f);
3308 store_def(ctx, &intr->def, i, value);
3309 }
3310
3311 return true;
3312 }
3313
3314 static const struct dxil_value *
get_int32_undef(struct dxil_module * m)3315 get_int32_undef(struct dxil_module *m)
3316 {
3317 const struct dxil_type *int32_type =
3318 dxil_module_get_int_type(m, 32);
3319 if (!int32_type)
3320 return NULL;
3321
3322 return dxil_module_get_undef(m, int32_type);
3323 }
3324
3325 static const struct dxil_value *
get_resource_handle(struct ntd_context * ctx,nir_src * src,enum dxil_resource_class class,enum dxil_resource_kind kind)3326 get_resource_handle(struct ntd_context *ctx, nir_src *src, enum dxil_resource_class class,
3327 enum dxil_resource_kind kind)
3328 {
3329 /* This source might be one of:
3330 * 1. Constant resource index - just look it up in precomputed handle arrays
3331 * If it's null in that array, create a handle, and store the result
3332 * 2. A handle from load_vulkan_descriptor - just get the stored SSA value
3333 * 3. Dynamic resource index - create a handle for it here
3334 */
3335 assert(src->ssa->num_components == 1 && src->ssa->bit_size == 32);
3336 nir_const_value *const_block_index = nir_src_as_const_value(*src);
3337 const struct dxil_value **handle_entry = NULL;
3338 if (const_block_index) {
3339 assert(ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN);
3340 switch (kind) {
3341 case DXIL_RESOURCE_KIND_CBUFFER:
3342 handle_entry = &ctx->cbv_handles[const_block_index->u32];
3343 break;
3344 case DXIL_RESOURCE_KIND_RAW_BUFFER:
3345 if (class == DXIL_RESOURCE_CLASS_UAV)
3346 handle_entry = &ctx->ssbo_handles[const_block_index->u32];
3347 else
3348 handle_entry = &ctx->srv_handles[const_block_index->u32];
3349 break;
3350 case DXIL_RESOURCE_KIND_SAMPLER:
3351 handle_entry = &ctx->sampler_handles[const_block_index->u32];
3352 break;
3353 default:
3354 if (class == DXIL_RESOURCE_CLASS_UAV)
3355 handle_entry = &ctx->image_handles[const_block_index->u32];
3356 else
3357 handle_entry = &ctx->srv_handles[const_block_index->u32];
3358 break;
3359 }
3360 }
3361
3362 if (handle_entry && *handle_entry)
3363 return *handle_entry;
3364
3365 if (nir_src_as_deref(*src) ||
3366 ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
3367 return get_src_ssa(ctx, src->ssa, 0);
3368 }
3369
3370 unsigned space = 0;
3371 if (ctx->opts->environment == DXIL_ENVIRONMENT_GL &&
3372 class == DXIL_RESOURCE_CLASS_UAV) {
3373 if (kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
3374 space = 2;
3375 else
3376 space = 1;
3377 }
3378
3379 /* The base binding here will almost always be zero. The only cases where we end
3380 * up in this type of dynamic indexing are:
3381 * 1. GL UBOs
3382 * 2. GL SSBOs
3383 * 2. CL SSBOs
3384 * In all cases except GL UBOs, the resources are a single zero-based array.
3385 * In that case, the base is 1, because uniforms use 0 and cannot by dynamically
3386 * indexed. All other cases should either fall into static indexing (first early return),
3387 * deref-based dynamic handle creation (images, or Vulkan textures/samplers), or
3388 * load_vulkan_descriptor handle creation.
3389 */
3390 unsigned base_binding = 0;
3391 if (ctx->opts->environment == DXIL_ENVIRONMENT_GL &&
3392 class == DXIL_RESOURCE_CLASS_CBV)
3393 base_binding = 1;
3394
3395 const struct dxil_value *value = get_src(ctx, src, 0, nir_type_uint);
3396 const struct dxil_value *handle = emit_createhandle_call_dynamic(ctx, class,
3397 space, base_binding, value, !const_block_index);
3398 if (handle_entry)
3399 *handle_entry = handle;
3400
3401 return handle;
3402 }
3403
3404 static const struct dxil_value *
create_image_handle(struct ntd_context * ctx,nir_intrinsic_instr * image_intr)3405 create_image_handle(struct ntd_context *ctx, nir_intrinsic_instr *image_intr)
3406 {
3407 const struct dxil_value *unannotated_handle =
3408 emit_createhandle_heap(ctx, get_src(ctx, &image_intr->src[0], 0, nir_type_uint32), false, true /*TODO: divergence*/);
3409 const struct dxil_value *res_props =
3410 dxil_module_get_uav_res_props_const(&ctx->mod, image_intr);
3411
3412 if (!unannotated_handle || !res_props)
3413 return NULL;
3414
3415 return emit_annotate_handle(ctx, unannotated_handle, res_props);
3416 }
3417
3418 static const struct dxil_value *
create_srv_handle(struct ntd_context * ctx,nir_tex_instr * tex,nir_src * src)3419 create_srv_handle(struct ntd_context *ctx, nir_tex_instr *tex, nir_src *src)
3420 {
3421 const struct dxil_value *unannotated_handle =
3422 emit_createhandle_heap(ctx, get_src(ctx, src, 0, nir_type_uint32), false, true /*TODO: divergence*/);
3423 const struct dxil_value *res_props =
3424 dxil_module_get_srv_res_props_const(&ctx->mod, tex);
3425
3426 if (!unannotated_handle || !res_props)
3427 return NULL;
3428
3429 return emit_annotate_handle(ctx, unannotated_handle, res_props);
3430 }
3431
3432 static const struct dxil_value *
create_sampler_handle(struct ntd_context * ctx,bool is_shadow,nir_src * src)3433 create_sampler_handle(struct ntd_context *ctx, bool is_shadow, nir_src *src)
3434 {
3435 const struct dxil_value *unannotated_handle =
3436 emit_createhandle_heap(ctx, get_src(ctx, src, 0, nir_type_uint32), true, true /*TODO: divergence*/);
3437 const struct dxil_value *res_props =
3438 dxil_module_get_sampler_res_props_const(&ctx->mod, is_shadow);
3439
3440 if (!unannotated_handle || !res_props)
3441 return NULL;
3442
3443 return emit_annotate_handle(ctx, unannotated_handle, res_props);
3444 }
3445
3446 static bool
emit_load_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)3447 emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3448 {
3449 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3450
3451 enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
3452 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
3453 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
3454 if (var && var->data.access & ACCESS_NON_WRITEABLE)
3455 class = DXIL_RESOURCE_CLASS_SRV;
3456 }
3457
3458 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
3459 const struct dxil_value *offset =
3460 get_src(ctx, &intr->src[1], 0, nir_type_uint);
3461 if (!int32_undef || !handle || !offset)
3462 return false;
3463
3464 assert(nir_src_bit_size(intr->src[0]) == 32);
3465 assert(nir_intrinsic_dest_components(intr) <= 4);
3466
3467 const struct dxil_value *coord[2] = {
3468 offset,
3469 int32_undef
3470 };
3471
3472 enum overload_type overload = get_ambiguous_overload_alu_type(ctx, intr, nir_type_uint);
3473 const struct dxil_value *load = ctx->mod.minor_version >= 2 ?
3474 emit_raw_bufferload_call(ctx, handle, coord,
3475 overload,
3476 nir_intrinsic_dest_components(intr),
3477 intr->def.bit_size / 8) :
3478 emit_bufferload_call(ctx, handle, coord, overload);
3479 if (!load)
3480 return false;
3481
3482 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3483 const struct dxil_value *val =
3484 dxil_emit_extractval(&ctx->mod, load, i);
3485 if (!val)
3486 return false;
3487 store_def(ctx, &intr->def, i, val);
3488 }
3489 if (intr->def.bit_size == 16)
3490 ctx->mod.feats.native_low_precision = true;
3491 return true;
3492 }
3493
3494 static bool
emit_store_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)3495 emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3496 {
3497 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[1], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
3498 const struct dxil_value *offset =
3499 get_src(ctx, &intr->src[2], 0, nir_type_uint);
3500 if (!handle || !offset)
3501 return false;
3502
3503 unsigned num_components = nir_src_num_components(intr->src[0]);
3504 assert(num_components <= 4);
3505 if (nir_src_bit_size(intr->src[0]) == 16)
3506 ctx->mod.feats.native_low_precision = true;
3507
3508 nir_alu_type type =
3509 dxil_type_to_nir_type(dxil_value_get_type(get_src_ssa(ctx, intr->src[0].ssa, 0)));
3510 const struct dxil_value *value[4] = { 0 };
3511 for (unsigned i = 0; i < num_components; ++i) {
3512 value[i] = get_src(ctx, &intr->src[0], i, type);
3513 if (!value[i])
3514 return false;
3515 }
3516
3517 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3518 if (!int32_undef)
3519 return false;
3520
3521 const struct dxil_value *coord[2] = {
3522 offset,
3523 int32_undef
3524 };
3525
3526 enum overload_type overload = get_overload(type, intr->src[0].ssa->bit_size);
3527 if (num_components < 4) {
3528 const struct dxil_value *value_undef = dxil_module_get_undef(&ctx->mod, dxil_value_get_type(value[0]));
3529 if (!value_undef)
3530 return false;
3531
3532 for (int i = num_components; i < 4; ++i)
3533 value[i] = value_undef;
3534 }
3535
3536 const struct dxil_value *write_mask =
3537 dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
3538 if (!write_mask)
3539 return false;
3540
3541 return ctx->mod.minor_version >= 2 ?
3542 emit_raw_bufferstore_call(ctx, handle, coord, value, write_mask, overload, intr->src[0].ssa->bit_size / 8) :
3543 emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
3544 }
3545
3546 static bool
emit_load_ubo_vec4(struct ntd_context * ctx,nir_intrinsic_instr * intr)3547 emit_load_ubo_vec4(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3548 {
3549 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER);
3550 const struct dxil_value *offset =
3551 get_src(ctx, &intr->src[1], 0, nir_type_uint);
3552
3553 if (!handle || !offset)
3554 return false;
3555
3556 enum overload_type overload = get_ambiguous_overload_alu_type(ctx, intr, nir_type_uint);
3557 const struct dxil_value *agg = load_ubo(ctx, handle, offset, overload);
3558 if (!agg)
3559 return false;
3560
3561 unsigned first_component = nir_intrinsic_has_component(intr) ?
3562 nir_intrinsic_component(intr) : 0;
3563 for (unsigned i = 0; i < intr->def.num_components; i++)
3564 store_def(ctx, &intr->def, i,
3565 dxil_emit_extractval(&ctx->mod, agg, i + first_component));
3566
3567 if (intr->def.bit_size == 16)
3568 ctx->mod.feats.native_low_precision = true;
3569 return true;
3570 }
3571
3572 /* Need to add patch-ness as a matching parameter, since driver_location is *not* unique
3573 * between control points and patch variables in HS/DS
3574 */
3575 static nir_variable *
find_patch_matching_variable_by_driver_location(nir_shader * s,nir_variable_mode mode,unsigned driver_location,bool patch)3576 find_patch_matching_variable_by_driver_location(nir_shader *s, nir_variable_mode mode, unsigned driver_location, bool patch)
3577 {
3578 nir_foreach_variable_with_modes(var, s, mode) {
3579 if (var->data.driver_location == driver_location &&
3580 var->data.patch == patch)
3581 return var;
3582 }
3583 return NULL;
3584 }
3585
3586 static bool
emit_store_output_via_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3587 emit_store_output_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3588 {
3589 assert(intr->intrinsic == nir_intrinsic_store_output ||
3590 ctx->mod.shader_kind == DXIL_HULL_SHADER);
3591 bool is_patch_constant = intr->intrinsic == nir_intrinsic_store_output &&
3592 ctx->mod.shader_kind == DXIL_HULL_SHADER;
3593 nir_alu_type out_type = nir_intrinsic_src_type(intr);
3594 enum overload_type overload = get_overload(out_type, intr->src[0].ssa->bit_size);
3595 const struct dxil_func *func = dxil_get_function(&ctx->mod, is_patch_constant ?
3596 "dx.op.storePatchConstant" : "dx.op.storeOutput",
3597 overload);
3598
3599 if (!func)
3600 return false;
3601
3602 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, is_patch_constant ?
3603 DXIL_INTR_STORE_PATCH_CONSTANT : DXIL_INTR_STORE_OUTPUT);
3604 const struct dxil_value *output_id = dxil_module_get_int32_const(&ctx->mod, nir_intrinsic_base(intr));
3605 unsigned row_index = intr->intrinsic == nir_intrinsic_store_output ? 1 : 2;
3606
3607 /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3608 * generation, so muck with them here too.
3609 */
3610 nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3611 bool is_tess_level = is_patch_constant &&
3612 (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3613 semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3614
3615 const struct dxil_value *row = NULL;
3616 const struct dxil_value *col = NULL;
3617 if (is_tess_level)
3618 col = dxil_module_get_int8_const(&ctx->mod, 0);
3619 else
3620 row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3621
3622 bool success = true;
3623 uint32_t writemask = nir_intrinsic_write_mask(intr);
3624
3625 nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_out, nir_intrinsic_base(intr), is_patch_constant);
3626 unsigned var_base_component = var->data.location_frac;
3627 unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3628
3629 if (ctx->mod.minor_validator >= 5) {
3630 struct dxil_signature_record *sig_rec = is_patch_constant ?
3631 &ctx->mod.patch_consts[nir_intrinsic_base(intr)] :
3632 &ctx->mod.outputs[nir_intrinsic_base(intr)];
3633 unsigned comp_size = intr->src[0].ssa->bit_size == 64 ? 2 : 1;
3634 unsigned comp_mask = 0;
3635 if (is_tess_level)
3636 comp_mask = 1;
3637 else if (comp_size == 1)
3638 comp_mask = writemask << var_base_component;
3639 else {
3640 for (unsigned i = 0; i < intr->num_components; ++i)
3641 if ((writemask & (1 << i)))
3642 comp_mask |= 3 << ((i + var_base_component) * comp_size);
3643 }
3644 for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3645 sig_rec->elements[r].never_writes_mask &= ~comp_mask;
3646
3647 if (!nir_src_is_const(intr->src[row_index])) {
3648 struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3649 &ctx->mod.psv_patch_consts[nir_intrinsic_base(intr)] :
3650 &ctx->mod.psv_outputs[nir_intrinsic_base(intr)];
3651 psv_rec->dynamic_mask_and_stream |= comp_mask;
3652 }
3653 }
3654
3655 for (unsigned i = 0; i < intr->num_components && success; ++i) {
3656 if (writemask & (1 << i)) {
3657 if (is_tess_level)
3658 row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3659 else
3660 col = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3661 const struct dxil_value *value = get_src(ctx, &intr->src[0], i, out_type);
3662 if (!col || !row || !value)
3663 return false;
3664
3665 const struct dxil_value *args[] = {
3666 opcode, output_id, row, col, value
3667 };
3668 success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3669 }
3670 }
3671
3672 return success;
3673 }
3674
3675 static bool
emit_load_input_via_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3676 emit_load_input_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3677 {
3678 bool attr_at_vertex = false;
3679 if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER &&
3680 ctx->opts->interpolate_at_vertex &&
3681 ctx->opts->provoking_vertex != 0 &&
3682 (nir_intrinsic_dest_type(intr) & nir_type_float)) {
3683 nir_variable *var = nir_find_variable_with_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr));
3684
3685 attr_at_vertex = var && var->data.interpolation == INTERP_MODE_FLAT;
3686 }
3687
3688 bool is_patch_constant = (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER &&
3689 intr->intrinsic == nir_intrinsic_load_input) ||
3690 (ctx->mod.shader_kind == DXIL_HULL_SHADER &&
3691 intr->intrinsic == nir_intrinsic_load_output);
3692 bool is_output_control_point = intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3693
3694 unsigned opcode_val;
3695 const char *func_name;
3696 if (attr_at_vertex) {
3697 opcode_val = DXIL_INTR_ATTRIBUTE_AT_VERTEX;
3698 func_name = "dx.op.attributeAtVertex";
3699 if (ctx->mod.minor_validator >= 6)
3700 ctx->mod.feats.barycentrics = 1;
3701 } else if (is_patch_constant) {
3702 opcode_val = DXIL_INTR_LOAD_PATCH_CONSTANT;
3703 func_name = "dx.op.loadPatchConstant";
3704 } else if (is_output_control_point) {
3705 opcode_val = DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT;
3706 func_name = "dx.op.loadOutputControlPoint";
3707 } else {
3708 opcode_val = DXIL_INTR_LOAD_INPUT;
3709 func_name = "dx.op.loadInput";
3710 }
3711
3712 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3713 if (!opcode)
3714 return false;
3715
3716 const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod,
3717 is_patch_constant || is_output_control_point ?
3718 nir_intrinsic_base(intr) :
3719 ctx->mod.input_mappings[nir_intrinsic_base(intr)]);
3720 if (!input_id)
3721 return false;
3722
3723 bool is_per_vertex =
3724 intr->intrinsic == nir_intrinsic_load_per_vertex_input ||
3725 intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3726 int row_index = is_per_vertex ? 1 : 0;
3727 const struct dxil_value *vertex_id = NULL;
3728 if (!is_patch_constant) {
3729 if (is_per_vertex) {
3730 vertex_id = get_src(ctx, &intr->src[0], 0, nir_type_int);
3731 } else if (attr_at_vertex) {
3732 vertex_id = dxil_module_get_int8_const(&ctx->mod, ctx->opts->provoking_vertex);
3733 } else {
3734 const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
3735 if (!int32_type)
3736 return false;
3737
3738 vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);
3739 }
3740 if (!vertex_id)
3741 return false;
3742 }
3743
3744 /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3745 * generation, so muck with them here too.
3746 */
3747 nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3748 bool is_tess_level = is_patch_constant &&
3749 (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3750 semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3751
3752 const struct dxil_value *row = NULL;
3753 const struct dxil_value *comp = NULL;
3754 if (is_tess_level)
3755 comp = dxil_module_get_int8_const(&ctx->mod, 0);
3756 else
3757 row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3758
3759 nir_alu_type out_type = nir_intrinsic_dest_type(intr);
3760 enum overload_type overload = get_overload(out_type, intr->def.bit_size);
3761
3762 const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, overload);
3763
3764 if (!func)
3765 return false;
3766
3767 nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), is_patch_constant);
3768 unsigned var_base_component = var ? var->data.location_frac : 0;
3769 unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3770
3771 if (ctx->mod.minor_validator >= 5 &&
3772 !is_output_control_point &&
3773 intr->intrinsic != nir_intrinsic_load_output) {
3774 struct dxil_signature_record *sig_rec = is_patch_constant ?
3775 &ctx->mod.patch_consts[nir_intrinsic_base(intr)] :
3776 &ctx->mod.inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3777 unsigned comp_size = intr->def.bit_size == 64 ? 2 : 1;
3778 unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3779 comp_mask <<= (var_base_component * comp_size);
3780 if (is_tess_level)
3781 comp_mask = 1;
3782 for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3783 sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3784
3785 if (!nir_src_is_const(intr->src[row_index])) {
3786 struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3787 &ctx->mod.psv_patch_consts[nir_intrinsic_base(intr)] :
3788 &ctx->mod.psv_inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3789 psv_rec->dynamic_mask_and_stream |= comp_mask;
3790 }
3791 }
3792
3793 for (unsigned i = 0; i < intr->num_components; ++i) {
3794 if (is_tess_level)
3795 row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3796 else
3797 comp = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3798
3799 if (!row || !comp)
3800 return false;
3801
3802 const struct dxil_value *args[] = {
3803 opcode, input_id, row, comp, vertex_id
3804 };
3805
3806 unsigned num_args = ARRAY_SIZE(args) - (is_patch_constant ? 1 : 0);
3807 const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3808 if (!retval)
3809 return false;
3810 store_def(ctx, &intr->def, i, retval);
3811 }
3812 return true;
3813 }
3814
3815 static bool
emit_load_interpolated_input(struct ntd_context * ctx,nir_intrinsic_instr * intr)3816 emit_load_interpolated_input(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3817 {
3818 nir_intrinsic_instr *barycentric = nir_src_as_intrinsic(intr->src[0]);
3819
3820 const struct dxil_value *args[6] = { 0 };
3821
3822 unsigned opcode_val;
3823 const char *func_name;
3824 unsigned num_args;
3825 switch (barycentric->intrinsic) {
3826 case nir_intrinsic_load_barycentric_at_offset:
3827 opcode_val = DXIL_INTR_EVAL_SNAPPED;
3828 func_name = "dx.op.evalSnapped";
3829 num_args = 6;
3830 for (unsigned i = 0; i < 2; ++i) {
3831 const struct dxil_value *float_offset = get_src(ctx, &barycentric->src[0], i, nir_type_float);
3832 /* GLSL uses [-0.5f, 0.5f), DXIL uses (-8, 7) */
3833 const struct dxil_value *offset_16 = dxil_emit_binop(&ctx->mod,
3834 DXIL_BINOP_MUL, float_offset, dxil_module_get_float_const(&ctx->mod, 16.0f), 0);
3835 args[i + 4] = dxil_emit_cast(&ctx->mod, DXIL_CAST_FPTOSI,
3836 dxil_module_get_int_type(&ctx->mod, 32), offset_16);
3837 }
3838 break;
3839 case nir_intrinsic_load_barycentric_pixel:
3840 opcode_val = DXIL_INTR_EVAL_SNAPPED;
3841 func_name = "dx.op.evalSnapped";
3842 num_args = 6;
3843 args[4] = args[5] = dxil_module_get_int32_const(&ctx->mod, 0);
3844 break;
3845 case nir_intrinsic_load_barycentric_at_sample:
3846 opcode_val = DXIL_INTR_EVAL_SAMPLE_INDEX;
3847 func_name = "dx.op.evalSampleIndex";
3848 num_args = 5;
3849 args[4] = get_src(ctx, &barycentric->src[0], 0, nir_type_int);
3850 break;
3851 case nir_intrinsic_load_barycentric_centroid:
3852 opcode_val = DXIL_INTR_EVAL_CENTROID;
3853 func_name = "dx.op.evalCentroid";
3854 num_args = 4;
3855 break;
3856 default:
3857 unreachable("Unsupported interpolation barycentric intrinsic");
3858 }
3859 args[0] = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3860 args[1] = dxil_module_get_int32_const(&ctx->mod, nir_intrinsic_base(intr));
3861 args[2] = get_src(ctx, &intr->src[1], 0, nir_type_int);
3862
3863 const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, DXIL_F32);
3864
3865 if (!func)
3866 return false;
3867
3868 nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), false);
3869 unsigned var_base_component = var ? var->data.location_frac : 0;
3870 unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3871
3872 if (ctx->mod.minor_validator >= 5) {
3873 struct dxil_signature_record *sig_rec =
3874 &ctx->mod.inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3875 unsigned comp_size = intr->def.bit_size == 64 ? 2 : 1;
3876 unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3877 comp_mask <<= (var_base_component * comp_size);
3878 for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3879 sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3880
3881 if (!nir_src_is_const(intr->src[1])) {
3882 struct dxil_psv_signature_element *psv_rec =
3883 &ctx->mod.psv_inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3884 psv_rec->dynamic_mask_and_stream |= comp_mask;
3885 }
3886 }
3887
3888 for (unsigned i = 0; i < intr->num_components; ++i) {
3889 args[3] = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3890
3891 const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3892 if (!retval)
3893 return false;
3894 store_def(ctx, &intr->def, i, retval);
3895 }
3896 return true;
3897 }
3898
3899 static const struct dxil_value *
deref_to_gep(struct ntd_context * ctx,nir_deref_instr * deref)3900 deref_to_gep(struct ntd_context *ctx, nir_deref_instr *deref)
3901 {
3902 nir_deref_path path;
3903 nir_deref_path_init(&path, deref, ctx->ralloc_ctx);
3904 assert(path.path[0]->deref_type == nir_deref_type_var);
3905 uint32_t count = 0;
3906 while (path.path[count])
3907 ++count;
3908
3909 const struct dxil_value **gep_indices = ralloc_array(ctx->ralloc_ctx,
3910 const struct dxil_value *,
3911 count + 1);
3912 nir_variable *var = path.path[0]->var;
3913 const struct dxil_value **var_array;
3914 switch (deref->modes) {
3915 case nir_var_mem_constant: var_array = ctx->consts; break;
3916 case nir_var_mem_shared: var_array = ctx->sharedvars; break;
3917 case nir_var_function_temp: var_array = ctx->scratchvars; break;
3918 default: unreachable("Invalid deref mode");
3919 }
3920 gep_indices[0] = var_array[var->data.driver_location];
3921
3922 for (uint32_t i = 0; i < count; ++i)
3923 gep_indices[i + 1] = get_src_ssa(ctx, &path.path[i]->def, 0);
3924
3925 return dxil_emit_gep_inbounds(&ctx->mod, gep_indices, count + 1);
3926 }
3927
3928 static bool
emit_load_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3929 emit_load_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3930 {
3931 const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3932 if (!ptr)
3933 return false;
3934
3935 const struct dxil_value *retval =
3936 dxil_emit_load(&ctx->mod, ptr, intr->def.bit_size / 8, false);
3937 if (!retval)
3938 return false;
3939
3940 store_def(ctx, &intr->def, 0, retval);
3941 return true;
3942 }
3943
3944 static bool
emit_store_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3945 emit_store_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3946 {
3947 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
3948 const struct dxil_value *ptr = deref_to_gep(ctx, deref);
3949 if (!ptr)
3950 return false;
3951
3952 const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, nir_get_nir_type_for_glsl_type(deref->type));
3953 return dxil_emit_store(&ctx->mod, value, ptr, nir_src_bit_size(intr->src[1]) / 8, false);
3954 }
3955
3956 static bool
emit_atomic_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3957 emit_atomic_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3958 {
3959 const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3960 if (!ptr)
3961 return false;
3962
3963 const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3964 if (!value)
3965 return false;
3966
3967 enum dxil_rmw_op dxil_op = nir_atomic_to_dxil_rmw(nir_intrinsic_atomic_op(intr));
3968 const struct dxil_value *retval = dxil_emit_atomicrmw(&ctx->mod, value, ptr, dxil_op, false,
3969 DXIL_ATOMIC_ORDERING_ACQREL,
3970 DXIL_SYNC_SCOPE_CROSSTHREAD);
3971 if (!retval)
3972 return false;
3973
3974 store_def(ctx, &intr->def, 0, retval);
3975 return true;
3976 }
3977
3978 static bool
emit_atomic_deref_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)3979 emit_atomic_deref_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3980 {
3981 const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3982 if (!ptr)
3983 return false;
3984
3985 const struct dxil_value *cmp = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3986 const struct dxil_value *value = get_src(ctx, &intr->src[2], 0, nir_type_uint);
3987 if (!value)
3988 return false;
3989
3990 const struct dxil_value *retval = dxil_emit_cmpxchg(&ctx->mod, cmp, value, ptr, false,
3991 DXIL_ATOMIC_ORDERING_ACQREL,
3992 DXIL_SYNC_SCOPE_CROSSTHREAD);
3993 if (!retval)
3994 return false;
3995
3996 store_def(ctx, &intr->def, 0, retval);
3997 return true;
3998 }
3999
4000 static bool
emit_discard_if_with_value(struct ntd_context * ctx,const struct dxil_value * value)4001 emit_discard_if_with_value(struct ntd_context *ctx, const struct dxil_value *value)
4002 {
4003 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DISCARD);
4004 if (!opcode)
4005 return false;
4006
4007 const struct dxil_value *args[] = {
4008 opcode,
4009 value
4010 };
4011
4012 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.discard", DXIL_NONE);
4013 if (!func)
4014 return false;
4015
4016 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4017 }
4018
4019 static bool
emit_discard_if(struct ntd_context * ctx,nir_intrinsic_instr * intr)4020 emit_discard_if(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4021 {
4022 const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_bool);
4023 if (!value)
4024 return false;
4025
4026 return emit_discard_if_with_value(ctx, value);
4027 }
4028
4029 static bool
emit_discard(struct ntd_context * ctx)4030 emit_discard(struct ntd_context *ctx)
4031 {
4032 const struct dxil_value *value = dxil_module_get_int1_const(&ctx->mod, true);
4033 return emit_discard_if_with_value(ctx, value);
4034 }
4035
4036 static bool
emit_emit_vertex(struct ntd_context * ctx,nir_intrinsic_instr * intr)4037 emit_emit_vertex(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4038 {
4039 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_EMIT_STREAM);
4040 const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
4041 if (!opcode || !stream_id)
4042 return false;
4043
4044 const struct dxil_value *args[] = {
4045 opcode,
4046 stream_id
4047 };
4048
4049 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.emitStream", DXIL_NONE);
4050 if (!func)
4051 return false;
4052
4053 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4054 }
4055
4056 static bool
emit_end_primitive(struct ntd_context * ctx,nir_intrinsic_instr * intr)4057 emit_end_primitive(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4058 {
4059 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CUT_STREAM);
4060 const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
4061 if (!opcode || !stream_id)
4062 return false;
4063
4064 const struct dxil_value *args[] = {
4065 opcode,
4066 stream_id
4067 };
4068
4069 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cutStream", DXIL_NONE);
4070 if (!func)
4071 return false;
4072
4073 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4074 }
4075
4076 static bool
emit_image_store(struct ntd_context * ctx,nir_intrinsic_instr * intr)4077 emit_image_store(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4078 {
4079 const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_store ?
4080 create_image_handle(ctx, intr) :
4081 get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4082 if (!handle)
4083 return false;
4084
4085 bool is_array = false;
4086 if (intr->intrinsic == nir_intrinsic_image_deref_store)
4087 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4088 else
4089 is_array = nir_intrinsic_image_array(intr);
4090
4091 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4092 if (!int32_undef)
4093 return false;
4094
4095 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4096 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_store ?
4097 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4098 nir_intrinsic_image_dim(intr);
4099 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4100 if (is_array)
4101 ++num_coords;
4102
4103 assert(num_coords <= nir_src_num_components(intr->src[1]));
4104 for (unsigned i = 0; i < num_coords; ++i) {
4105 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4106 if (!coord[i])
4107 return false;
4108 }
4109
4110 nir_alu_type in_type = nir_intrinsic_src_type(intr);
4111 enum overload_type overload = get_overload(in_type, 32);
4112
4113 assert(nir_src_bit_size(intr->src[3]) == 32);
4114 unsigned num_components = nir_src_num_components(intr->src[3]);
4115 assert(num_components <= 4);
4116 const struct dxil_value *value[4];
4117 for (unsigned i = 0; i < num_components; ++i) {
4118 value[i] = get_src(ctx, &intr->src[3], i, in_type);
4119 if (!value[i])
4120 return false;
4121 }
4122
4123 for (int i = num_components; i < 4; ++i)
4124 value[i] = dxil_module_get_undef(&ctx->mod, dxil_value_get_type(value[0]));
4125
4126 const struct dxil_value *write_mask =
4127 dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
4128 if (!write_mask)
4129 return false;
4130
4131 if (image_dim == GLSL_SAMPLER_DIM_BUF) {
4132 coord[1] = int32_undef;
4133 return emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
4134 } else
4135 return emit_texturestore_call(ctx, handle, coord, value, write_mask, overload);
4136 }
4137
4138 static bool
emit_image_load(struct ntd_context * ctx,nir_intrinsic_instr * intr)4139 emit_image_load(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4140 {
4141 const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_load ?
4142 create_image_handle(ctx, intr) :
4143 get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4144 if (!handle)
4145 return false;
4146
4147 bool is_array = false;
4148 if (intr->intrinsic == nir_intrinsic_image_deref_load)
4149 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4150 else
4151 is_array = nir_intrinsic_image_array(intr);
4152
4153 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4154 if (!int32_undef)
4155 return false;
4156
4157 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4158 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_load ?
4159 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4160 nir_intrinsic_image_dim(intr);
4161 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4162 if (is_array)
4163 ++num_coords;
4164
4165 assert(num_coords <= nir_src_num_components(intr->src[1]));
4166 for (unsigned i = 0; i < num_coords; ++i) {
4167 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4168 if (!coord[i])
4169 return false;
4170 }
4171
4172 nir_alu_type out_type = nir_intrinsic_dest_type(intr);
4173 enum overload_type overload = get_overload(out_type, 32);
4174
4175 const struct dxil_value *load_result;
4176 if (image_dim == GLSL_SAMPLER_DIM_BUF) {
4177 coord[1] = int32_undef;
4178 load_result = emit_bufferload_call(ctx, handle, coord, overload);
4179 } else
4180 load_result = emit_textureload_call(ctx, handle, coord, overload);
4181
4182 if (!load_result)
4183 return false;
4184
4185 assert(intr->def.bit_size == 32);
4186 unsigned num_components = intr->def.num_components;
4187 assert(num_components <= 4);
4188 for (unsigned i = 0; i < num_components; ++i) {
4189 const struct dxil_value *component = dxil_emit_extractval(&ctx->mod, load_result, i);
4190 if (!component)
4191 return false;
4192 store_def(ctx, &intr->def, i, component);
4193 }
4194
4195 if (util_format_get_nr_components(nir_intrinsic_format(intr)) > 1)
4196 ctx->mod.feats.typed_uav_load_additional_formats = true;
4197
4198 return true;
4199 }
4200
4201 static bool
emit_image_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4202 emit_image_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4203 {
4204 const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_atomic ?
4205 create_image_handle(ctx, intr) :
4206 get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4207 if (!handle)
4208 return false;
4209
4210 bool is_array = false;
4211 if (intr->intrinsic == nir_intrinsic_image_deref_atomic)
4212 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4213 else
4214 is_array = nir_intrinsic_image_array(intr);
4215
4216 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4217 if (!int32_undef)
4218 return false;
4219
4220 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4221 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_atomic ?
4222 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4223 nir_intrinsic_image_dim(intr);
4224 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4225 if (is_array)
4226 ++num_coords;
4227
4228 assert(num_coords <= nir_src_num_components(intr->src[1]));
4229 for (unsigned i = 0; i < num_coords; ++i) {
4230 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4231 if (!coord[i])
4232 return false;
4233 }
4234
4235 nir_atomic_op nir_op = nir_intrinsic_atomic_op(intr);
4236 enum dxil_atomic_op dxil_op = nir_atomic_to_dxil_atomic(nir_op);
4237 nir_alu_type type = nir_atomic_op_type(nir_op);
4238 const struct dxil_value *value = get_src(ctx, &intr->src[3], 0, type);
4239 if (!value)
4240 return false;
4241
4242 const struct dxil_value *retval =
4243 emit_atomic_binop(ctx, handle, dxil_op, coord, value);
4244
4245 if (!retval)
4246 return false;
4247
4248 store_def(ctx, &intr->def, 0, retval);
4249 return true;
4250 }
4251
4252 static bool
emit_image_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)4253 emit_image_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4254 {
4255 const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_atomic_swap ?
4256 create_image_handle(ctx, intr) :
4257 get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4258 if (!handle)
4259 return false;
4260
4261 bool is_array = false;
4262 if (intr->intrinsic == nir_intrinsic_image_deref_atomic_swap)
4263 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4264 else
4265 is_array = nir_intrinsic_image_array(intr);
4266
4267 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4268 if (!int32_undef)
4269 return false;
4270
4271 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4272 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ?
4273 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4274 nir_intrinsic_image_dim(intr);
4275 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4276 if (is_array)
4277 ++num_coords;
4278
4279 assert(num_coords <= nir_src_num_components(intr->src[1]));
4280 for (unsigned i = 0; i < num_coords; ++i) {
4281 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4282 if (!coord[i])
4283 return false;
4284 }
4285
4286 const struct dxil_value *cmpval = get_src(ctx, &intr->src[3], 0, nir_type_uint);
4287 const struct dxil_value *newval = get_src(ctx, &intr->src[4], 0, nir_type_uint);
4288 if (!cmpval || !newval)
4289 return false;
4290
4291 const struct dxil_value *retval =
4292 emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
4293
4294 if (!retval)
4295 return false;
4296
4297 store_def(ctx, &intr->def, 0, retval);
4298 return true;
4299 }
4300
4301 struct texop_parameters {
4302 const struct dxil_value *tex;
4303 const struct dxil_value *sampler;
4304 const struct dxil_value *bias, *lod_or_sample, *min_lod;
4305 const struct dxil_value *coord[4], *offset[3], *dx[3], *dy[3];
4306 const struct dxil_value *cmp;
4307 enum overload_type overload;
4308 };
4309
4310 static const struct dxil_value *
emit_texture_size(struct ntd_context * ctx,struct texop_parameters * params)4311 emit_texture_size(struct ntd_context *ctx, struct texop_parameters *params)
4312 {
4313 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.getDimensions", DXIL_NONE);
4314 if (!func)
4315 return false;
4316
4317 const struct dxil_value *args[] = {
4318 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_SIZE),
4319 params->tex,
4320 params->lod_or_sample
4321 };
4322
4323 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4324 }
4325
4326 static bool
emit_image_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)4327 emit_image_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4328 {
4329 const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_size ?
4330 create_image_handle(ctx, intr) :
4331 get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4332 if (!handle)
4333 return false;
4334
4335 enum glsl_sampler_dim sampler_dim = intr->intrinsic == nir_intrinsic_image_deref_size ?
4336 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4337 nir_intrinsic_image_dim(intr);
4338 const struct dxil_value *lod = sampler_dim == GLSL_SAMPLER_DIM_BUF ?
4339 dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32)) :
4340 get_src(ctx, &intr->src[1], 0, nir_type_uint);
4341 if (!lod)
4342 return false;
4343
4344 struct texop_parameters params = {
4345 .tex = handle,
4346 .lod_or_sample = lod
4347 };
4348 const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms);
4349 if (!dimensions)
4350 return false;
4351
4352 for (unsigned i = 0; i < intr->def.num_components; ++i) {
4353 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, i);
4354 store_def(ctx, &intr->def, i, retval);
4355 }
4356
4357 return true;
4358 }
4359
4360 static bool
emit_get_ssbo_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)4361 emit_get_ssbo_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4362 {
4363 enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
4364 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
4365 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
4366 if (var && var->data.access & ACCESS_NON_WRITEABLE)
4367 class = DXIL_RESOURCE_CLASS_SRV;
4368 }
4369
4370 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
4371 if (!handle)
4372 return false;
4373
4374 struct texop_parameters params = {
4375 .tex = handle,
4376 .lod_or_sample = dxil_module_get_undef(
4377 &ctx->mod, dxil_module_get_int_type(&ctx->mod, 32))
4378 };
4379
4380 const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms);
4381 if (!dimensions)
4382 return false;
4383
4384 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, 0);
4385 store_def(ctx, &intr->def, 0, retval);
4386
4387 return true;
4388 }
4389
4390 static bool
emit_ssbo_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4391 emit_ssbo_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4392 {
4393 nir_atomic_op nir_op = nir_intrinsic_atomic_op(intr);
4394 enum dxil_atomic_op dxil_op = nir_atomic_to_dxil_atomic(nir_op);
4395 nir_alu_type type = nir_atomic_op_type(nir_op);
4396 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
4397 const struct dxil_value *offset =
4398 get_src(ctx, &intr->src[1], 0, nir_type_uint);
4399 const struct dxil_value *value =
4400 get_src(ctx, &intr->src[2], 0, type);
4401
4402 if (!value || !handle || !offset)
4403 return false;
4404
4405 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4406 if (!int32_undef)
4407 return false;
4408
4409 const struct dxil_value *coord[3] = {
4410 offset, int32_undef, int32_undef
4411 };
4412
4413 const struct dxil_value *retval =
4414 emit_atomic_binop(ctx, handle, dxil_op, coord, value);
4415
4416 if (!retval)
4417 return false;
4418
4419 store_def(ctx, &intr->def, 0, retval);
4420 return true;
4421 }
4422
4423 static bool
emit_ssbo_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)4424 emit_ssbo_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4425 {
4426 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
4427 const struct dxil_value *offset =
4428 get_src(ctx, &intr->src[1], 0, nir_type_uint);
4429 const struct dxil_value *cmpval =
4430 get_src(ctx, &intr->src[2], 0, nir_type_int);
4431 const struct dxil_value *newval =
4432 get_src(ctx, &intr->src[3], 0, nir_type_int);
4433
4434 if (!cmpval || !newval || !handle || !offset)
4435 return false;
4436
4437 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4438 if (!int32_undef)
4439 return false;
4440
4441 const struct dxil_value *coord[3] = {
4442 offset, int32_undef, int32_undef
4443 };
4444
4445 const struct dxil_value *retval =
4446 emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
4447
4448 if (!retval)
4449 return false;
4450
4451 store_def(ctx, &intr->def, 0, retval);
4452 return true;
4453 }
4454
4455 static bool
emit_vulkan_resource_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)4456 emit_vulkan_resource_index(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4457 {
4458 unsigned int binding = nir_intrinsic_binding(intr);
4459
4460 bool const_index = nir_src_is_const(intr->src[0]);
4461 if (const_index) {
4462 binding += nir_src_as_const_value(intr->src[0])->u32;
4463 }
4464
4465 const struct dxil_value *index_value = dxil_module_get_int32_const(&ctx->mod, binding);
4466 if (!index_value)
4467 return false;
4468
4469 if (!const_index) {
4470 const struct dxil_value *offset = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4471 if (!offset)
4472 return false;
4473
4474 index_value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, index_value, offset, 0);
4475 if (!index_value)
4476 return false;
4477 }
4478
4479 store_def(ctx, &intr->def, 0, index_value);
4480 store_def(ctx, &intr->def, 1, dxil_module_get_int32_const(&ctx->mod, 0));
4481 return true;
4482 }
4483
4484 static bool
emit_load_vulkan_descriptor(struct ntd_context * ctx,nir_intrinsic_instr * intr)4485 emit_load_vulkan_descriptor(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4486 {
4487 nir_intrinsic_instr* index = nir_src_as_intrinsic(intr->src[0]);
4488 const struct dxil_value *handle = NULL;
4489
4490 enum dxil_resource_class resource_class;
4491 enum dxil_resource_kind resource_kind;
4492 switch (nir_intrinsic_desc_type(intr)) {
4493 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4494 resource_class = DXIL_RESOURCE_CLASS_CBV;
4495 resource_kind = DXIL_RESOURCE_KIND_CBUFFER;
4496 break;
4497 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4498 resource_class = DXIL_RESOURCE_CLASS_UAV;
4499 resource_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
4500 break;
4501 default:
4502 unreachable("unknown descriptor type");
4503 return false;
4504 }
4505
4506 if (index && index->intrinsic == nir_intrinsic_vulkan_resource_index) {
4507 unsigned binding = nir_intrinsic_binding(index);
4508 unsigned space = nir_intrinsic_desc_set(index);
4509
4510 /* The descriptor_set field for variables is only 5 bits. We shouldn't have intrinsics trying to go beyond that. */
4511 assert(space < 32);
4512
4513 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
4514 if (resource_class == DXIL_RESOURCE_CLASS_UAV &&
4515 (var->data.access & ACCESS_NON_WRITEABLE))
4516 resource_class = DXIL_RESOURCE_CLASS_SRV;
4517
4518 const struct dxil_value *index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4519 if (!index_value)
4520 return false;
4521
4522 handle = emit_createhandle_call_dynamic(ctx, resource_class, space, binding, index_value, false);
4523 } else {
4524 const struct dxil_value *heap_index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4525 if (!heap_index_value)
4526 return false;
4527 const struct dxil_value *unannotated_handle = emit_createhandle_heap(ctx, heap_index_value, false, true);
4528 const struct dxil_value *res_props = dxil_module_get_buffer_res_props_const(&ctx->mod, resource_class, resource_kind);
4529 if (!unannotated_handle || !res_props)
4530 return false;
4531 handle = emit_annotate_handle(ctx, unannotated_handle, res_props);
4532 }
4533
4534 store_ssa_def(ctx, &intr->def, 0, handle);
4535 store_def(ctx, &intr->def, 1, get_src(ctx, &intr->src[0], 1, nir_type_uint32));
4536
4537 return true;
4538 }
4539
4540 static bool
emit_load_sample_pos_from_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4541 emit_load_sample_pos_from_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4542 {
4543 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.renderTargetGetSamplePosition", DXIL_NONE);
4544 if (!func)
4545 return false;
4546
4547 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION);
4548 if (!opcode)
4549 return false;
4550
4551 const struct dxil_value *args[] = {
4552 opcode,
4553 get_src(ctx, &intr->src[0], 0, nir_type_uint32),
4554 };
4555 if (!args[1])
4556 return false;
4557
4558 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4559 if (!v)
4560 return false;
4561
4562 for (unsigned i = 0; i < 2; ++i) {
4563 /* GL coords go from 0 -> 1, D3D from -0.5 -> 0.5 */
4564 const struct dxil_value *coord = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
4565 dxil_emit_extractval(&ctx->mod, v, i),
4566 dxil_module_get_float_const(&ctx->mod, 0.5f), 0);
4567 store_def(ctx, &intr->def, i, coord);
4568 }
4569 return true;
4570 }
4571
4572 static bool
emit_load_sample_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4573 emit_load_sample_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4574 {
4575 assert(ctx->mod.info.has_per_sample_input ||
4576 intr->intrinsic == nir_intrinsic_load_sample_id_no_per_sample);
4577
4578 if (ctx->mod.info.has_per_sample_input)
4579 return emit_load_unary_external_function(ctx, intr, "dx.op.sampleIndex",
4580 DXIL_INTR_SAMPLE_INDEX, nir_type_int);
4581
4582 store_def(ctx, &intr->def, 0, dxil_module_get_int32_const(&ctx->mod, 0));
4583 return true;
4584 }
4585
4586 static bool
emit_read_first_invocation(struct ntd_context * ctx,nir_intrinsic_instr * intr)4587 emit_read_first_invocation(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4588 {
4589 ctx->mod.feats.wave_ops = 1;
4590 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveReadLaneFirst",
4591 get_overload(nir_type_uint, intr->def.bit_size));
4592 const struct dxil_value *args[] = {
4593 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_READ_LANE_FIRST),
4594 get_src(ctx, intr->src, 0, nir_type_uint),
4595 };
4596 if (!func || !args[0] || !args[1])
4597 return false;
4598
4599 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4600 if (!ret)
4601 return false;
4602 store_def(ctx, &intr->def, 0, ret);
4603 return true;
4604 }
4605
4606 static bool
emit_read_invocation(struct ntd_context * ctx,nir_intrinsic_instr * intr)4607 emit_read_invocation(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4608 {
4609 ctx->mod.feats.wave_ops = 1;
4610 bool quad = intr->intrinsic == nir_intrinsic_quad_broadcast;
4611 const struct dxil_func *func = dxil_get_function(&ctx->mod, quad ? "dx.op.quadReadLaneAt" : "dx.op.waveReadLaneAt",
4612 get_overload(nir_type_uint, intr->def.bit_size));
4613 const struct dxil_value *args[] = {
4614 dxil_module_get_int32_const(&ctx->mod, quad ? DXIL_INTR_QUAD_READ_LANE_AT : DXIL_INTR_WAVE_READ_LANE_AT),
4615 get_src(ctx, &intr->src[0], 0, nir_type_uint),
4616 get_src(ctx, &intr->src[1], 0, nir_type_uint),
4617 };
4618 if (!func || !args[0] || !args[1] || !args[2])
4619 return false;
4620
4621 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4622 if (!ret)
4623 return false;
4624 store_def(ctx, &intr->def, 0, ret);
4625 return true;
4626 }
4627
4628 static bool
emit_vote_eq(struct ntd_context * ctx,nir_intrinsic_instr * intr)4629 emit_vote_eq(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4630 {
4631 ctx->mod.feats.wave_ops = 1;
4632 nir_alu_type alu_type = intr->intrinsic == nir_intrinsic_vote_ieq ? nir_type_int : nir_type_float;
4633 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveAllEqual",
4634 get_overload(alu_type, intr->src[0].ssa->bit_size));
4635 const struct dxil_value *args[] = {
4636 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_ALL_EQUAL),
4637 get_src(ctx, intr->src, 0, alu_type),
4638 };
4639 if (!func || !args[0] || !args[1])
4640 return false;
4641
4642 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4643 if (!ret)
4644 return false;
4645 store_def(ctx, &intr->def, 0, ret);
4646 return true;
4647 }
4648
4649 static bool
emit_vote(struct ntd_context * ctx,nir_intrinsic_instr * intr)4650 emit_vote(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4651 {
4652 ctx->mod.feats.wave_ops = 1;
4653 bool any = intr->intrinsic == nir_intrinsic_vote_any;
4654 const struct dxil_func *func = dxil_get_function(&ctx->mod,
4655 any ? "dx.op.waveAnyTrue" : "dx.op.waveAllTrue",
4656 DXIL_NONE);
4657 const struct dxil_value *args[] = {
4658 dxil_module_get_int32_const(&ctx->mod, any ? DXIL_INTR_WAVE_ANY_TRUE : DXIL_INTR_WAVE_ALL_TRUE),
4659 get_src(ctx, intr->src, 0, nir_type_bool),
4660 };
4661 if (!func || !args[0] || !args[1])
4662 return false;
4663
4664 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4665 if (!ret)
4666 return false;
4667 store_def(ctx, &intr->def, 0, ret);
4668 return true;
4669 }
4670
4671 static bool
emit_ballot(struct ntd_context * ctx,nir_intrinsic_instr * intr)4672 emit_ballot(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4673 {
4674 ctx->mod.feats.wave_ops = 1;
4675 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveBallot", DXIL_NONE);
4676 const struct dxil_value *args[] = {
4677 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_BALLOT),
4678 get_src(ctx, intr->src, 0, nir_type_bool),
4679 };
4680 if (!func || !args[0] || !args[1])
4681 return false;
4682
4683 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4684 if (!ret)
4685 return false;
4686 for (uint32_t i = 0; i < 4; ++i)
4687 store_def(ctx, &intr->def, i, dxil_emit_extractval(&ctx->mod, ret, i));
4688 return true;
4689 }
4690
4691 static bool
emit_quad_op(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_quad_op_kind op)4692 emit_quad_op(struct ntd_context *ctx, nir_intrinsic_instr *intr, enum dxil_quad_op_kind op)
4693 {
4694 ctx->mod.feats.wave_ops = 1;
4695 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quadOp",
4696 get_overload(nir_type_uint, intr->def.bit_size));
4697 const struct dxil_value *args[] = {
4698 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_QUAD_OP),
4699 get_src(ctx, intr->src, 0, nir_type_uint),
4700 dxil_module_get_int8_const(&ctx->mod, op),
4701 };
4702 if (!func || !args[0] || !args[1] || !args[2])
4703 return false;
4704
4705 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4706 if (!ret)
4707 return false;
4708 store_def(ctx, &intr->def, 0, ret);
4709 return true;
4710 }
4711
4712 static enum dxil_wave_bit_op_kind
get_reduce_bit_op(nir_op op)4713 get_reduce_bit_op(nir_op op)
4714 {
4715 switch (op) {
4716 case nir_op_ior: return DXIL_WAVE_BIT_OP_OR;
4717 case nir_op_ixor: return DXIL_WAVE_BIT_OP_XOR;
4718 case nir_op_iand: return DXIL_WAVE_BIT_OP_AND;
4719 default:
4720 unreachable("Invalid bit op");
4721 }
4722 }
4723
4724 static bool
emit_reduce_bitwise(struct ntd_context * ctx,nir_intrinsic_instr * intr)4725 emit_reduce_bitwise(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4726 {
4727 enum dxil_wave_bit_op_kind wave_bit_op = get_reduce_bit_op(nir_intrinsic_reduction_op(intr));
4728 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveBit",
4729 get_overload(nir_type_uint, intr->def.bit_size));
4730 const struct dxil_value *args[] = {
4731 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_BIT),
4732 get_src(ctx, intr->src, 0, nir_type_uint),
4733 dxil_module_get_int8_const(&ctx->mod, wave_bit_op),
4734 };
4735 if (!func || !args[0] || !args[1] || !args[2])
4736 return false;
4737
4738 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4739 if (!ret)
4740 return false;
4741 store_def(ctx, &intr->def, 0, ret);
4742 return true;
4743 }
4744
4745 static enum dxil_wave_op_kind
get_reduce_op(nir_op op)4746 get_reduce_op(nir_op op)
4747 {
4748 switch (op) {
4749 case nir_op_iadd:
4750 case nir_op_fadd:
4751 return DXIL_WAVE_OP_SUM;
4752 case nir_op_imul:
4753 case nir_op_fmul:
4754 return DXIL_WAVE_OP_PRODUCT;
4755 case nir_op_imax:
4756 case nir_op_umax:
4757 case nir_op_fmax:
4758 return DXIL_WAVE_OP_MAX;
4759 case nir_op_imin:
4760 case nir_op_umin:
4761 case nir_op_fmin:
4762 return DXIL_WAVE_OP_MIN;
4763 default:
4764 unreachable("Unexpected reduction op");
4765 }
4766 }
4767
4768 static bool
emit_reduce(struct ntd_context * ctx,nir_intrinsic_instr * intr)4769 emit_reduce(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4770 {
4771 ctx->mod.feats.wave_ops = 1;
4772 bool is_prefix = intr->intrinsic == nir_intrinsic_exclusive_scan;
4773 nir_op reduction_op = (nir_op)nir_intrinsic_reduction_op(intr);
4774 switch (reduction_op) {
4775 case nir_op_ior:
4776 case nir_op_ixor:
4777 case nir_op_iand:
4778 assert(!is_prefix);
4779 return emit_reduce_bitwise(ctx, intr);
4780 default:
4781 break;
4782 }
4783 nir_alu_type alu_type = nir_op_infos[reduction_op].input_types[0];
4784 enum dxil_wave_op_kind wave_op = get_reduce_op(reduction_op);
4785 const struct dxil_func *func = dxil_get_function(&ctx->mod, is_prefix ? "dx.op.wavePrefixOp" : "dx.op.waveActiveOp",
4786 get_overload(alu_type, intr->def.bit_size));
4787 bool is_unsigned = alu_type == nir_type_uint;
4788 const struct dxil_value *args[] = {
4789 dxil_module_get_int32_const(&ctx->mod, is_prefix ? DXIL_INTR_WAVE_PREFIX_OP : DXIL_INTR_WAVE_ACTIVE_OP),
4790 get_src(ctx, intr->src, 0, alu_type),
4791 dxil_module_get_int8_const(&ctx->mod, wave_op),
4792 dxil_module_get_int8_const(&ctx->mod, is_unsigned),
4793 };
4794 if (!func || !args[0] || !args[1] || !args[2] || !args[3])
4795 return false;
4796
4797 const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4798 if (!ret)
4799 return false;
4800 store_def(ctx, &intr->def, 0, ret);
4801 return true;
4802 }
4803
4804 static bool
emit_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4805 emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4806 {
4807 switch (intr->intrinsic) {
4808 case nir_intrinsic_load_global_invocation_id:
4809 case nir_intrinsic_load_global_invocation_id_zero_base:
4810 return emit_load_global_invocation_id(ctx, intr);
4811 case nir_intrinsic_load_local_invocation_id:
4812 return emit_load_local_invocation_id(ctx, intr);
4813 case nir_intrinsic_load_local_invocation_index:
4814 return emit_load_local_invocation_index(ctx, intr);
4815 case nir_intrinsic_load_workgroup_id:
4816 case nir_intrinsic_load_workgroup_id_zero_base:
4817 return emit_load_local_workgroup_id(ctx, intr);
4818 case nir_intrinsic_load_ssbo:
4819 return emit_load_ssbo(ctx, intr);
4820 case nir_intrinsic_store_ssbo:
4821 return emit_store_ssbo(ctx, intr);
4822 case nir_intrinsic_load_deref:
4823 return emit_load_deref(ctx, intr);
4824 case nir_intrinsic_store_deref:
4825 return emit_store_deref(ctx, intr);
4826 case nir_intrinsic_deref_atomic:
4827 return emit_atomic_deref(ctx, intr);
4828 case nir_intrinsic_deref_atomic_swap:
4829 return emit_atomic_deref_swap(ctx, intr);
4830 case nir_intrinsic_load_ubo_vec4:
4831 return emit_load_ubo_vec4(ctx, intr);
4832 case nir_intrinsic_load_primitive_id:
4833 return emit_load_unary_external_function(ctx, intr, "dx.op.primitiveID",
4834 DXIL_INTR_PRIMITIVE_ID, nir_type_int);
4835 case nir_intrinsic_load_sample_id:
4836 case nir_intrinsic_load_sample_id_no_per_sample:
4837 return emit_load_sample_id(ctx, intr);
4838 case nir_intrinsic_load_invocation_id:
4839 switch (ctx->mod.shader_kind) {
4840 case DXIL_HULL_SHADER:
4841 return emit_load_unary_external_function(ctx, intr, "dx.op.outputControlPointID",
4842 DXIL_INTR_OUTPUT_CONTROL_POINT_ID, nir_type_int);
4843 case DXIL_GEOMETRY_SHADER:
4844 return emit_load_unary_external_function(ctx, intr, "dx.op.gsInstanceID",
4845 DXIL_INTR_GS_INSTANCE_ID, nir_type_int);
4846 default:
4847 unreachable("Unexpected shader kind for invocation ID");
4848 }
4849 case nir_intrinsic_load_view_index:
4850 ctx->mod.feats.view_id = true;
4851 return emit_load_unary_external_function(ctx, intr, "dx.op.viewID",
4852 DXIL_INTR_VIEW_ID, nir_type_int);
4853 case nir_intrinsic_load_sample_mask_in:
4854 return emit_load_sample_mask_in(ctx, intr);
4855 case nir_intrinsic_load_tess_coord:
4856 return emit_load_tess_coord(ctx, intr);
4857 case nir_intrinsic_discard_if:
4858 case nir_intrinsic_demote_if:
4859 return emit_discard_if(ctx, intr);
4860 case nir_intrinsic_discard:
4861 case nir_intrinsic_demote:
4862 return emit_discard(ctx);
4863 case nir_intrinsic_emit_vertex:
4864 return emit_emit_vertex(ctx, intr);
4865 case nir_intrinsic_end_primitive:
4866 return emit_end_primitive(ctx, intr);
4867 case nir_intrinsic_barrier:
4868 return emit_barrier(ctx, intr);
4869 case nir_intrinsic_ssbo_atomic:
4870 return emit_ssbo_atomic(ctx, intr);
4871 case nir_intrinsic_ssbo_atomic_swap:
4872 return emit_ssbo_atomic_comp_swap(ctx, intr);
4873 case nir_intrinsic_image_deref_atomic:
4874 case nir_intrinsic_image_atomic:
4875 case nir_intrinsic_bindless_image_atomic:
4876 return emit_image_atomic(ctx, intr);
4877 case nir_intrinsic_image_deref_atomic_swap:
4878 case nir_intrinsic_image_atomic_swap:
4879 case nir_intrinsic_bindless_image_atomic_swap:
4880 return emit_image_atomic_comp_swap(ctx, intr);
4881 case nir_intrinsic_image_store:
4882 case nir_intrinsic_image_deref_store:
4883 case nir_intrinsic_bindless_image_store:
4884 return emit_image_store(ctx, intr);
4885 case nir_intrinsic_image_load:
4886 case nir_intrinsic_image_deref_load:
4887 case nir_intrinsic_bindless_image_load:
4888 return emit_image_load(ctx, intr);
4889 case nir_intrinsic_image_size:
4890 case nir_intrinsic_image_deref_size:
4891 case nir_intrinsic_bindless_image_size:
4892 return emit_image_size(ctx, intr);
4893 case nir_intrinsic_get_ssbo_size:
4894 return emit_get_ssbo_size(ctx, intr);
4895 case nir_intrinsic_load_input:
4896 case nir_intrinsic_load_per_vertex_input:
4897 case nir_intrinsic_load_output:
4898 case nir_intrinsic_load_per_vertex_output:
4899 return emit_load_input_via_intrinsic(ctx, intr);
4900 case nir_intrinsic_store_output:
4901 case nir_intrinsic_store_per_vertex_output:
4902 return emit_store_output_via_intrinsic(ctx, intr);
4903
4904 case nir_intrinsic_load_barycentric_at_offset:
4905 case nir_intrinsic_load_barycentric_at_sample:
4906 case nir_intrinsic_load_barycentric_centroid:
4907 case nir_intrinsic_load_barycentric_pixel:
4908 /* Emit nothing, we only support these as inputs to load_interpolated_input */
4909 return true;
4910 case nir_intrinsic_load_interpolated_input:
4911 return emit_load_interpolated_input(ctx, intr);
4912 break;
4913
4914 case nir_intrinsic_vulkan_resource_index:
4915 return emit_vulkan_resource_index(ctx, intr);
4916 case nir_intrinsic_load_vulkan_descriptor:
4917 return emit_load_vulkan_descriptor(ctx, intr);
4918
4919 case nir_intrinsic_load_sample_pos_from_id:
4920 return emit_load_sample_pos_from_id(ctx, intr);
4921
4922 case nir_intrinsic_is_helper_invocation:
4923 return emit_load_unary_external_function(
4924 ctx, intr, "dx.op.isHelperLane", DXIL_INTR_IS_HELPER_LANE, nir_type_int);
4925 case nir_intrinsic_elect:
4926 ctx->mod.feats.wave_ops = 1;
4927 return emit_load_unary_external_function(
4928 ctx, intr, "dx.op.waveIsFirstLane", DXIL_INTR_WAVE_IS_FIRST_LANE, nir_type_invalid);
4929 case nir_intrinsic_load_subgroup_size:
4930 ctx->mod.feats.wave_ops = 1;
4931 return emit_load_unary_external_function(
4932 ctx, intr, "dx.op.waveGetLaneCount", DXIL_INTR_WAVE_GET_LANE_COUNT, nir_type_invalid);
4933 case nir_intrinsic_load_subgroup_invocation:
4934 ctx->mod.feats.wave_ops = 1;
4935 return emit_load_unary_external_function(
4936 ctx, intr, "dx.op.waveGetLaneIndex", DXIL_INTR_WAVE_GET_LANE_INDEX, nir_type_invalid);
4937
4938 case nir_intrinsic_vote_feq:
4939 case nir_intrinsic_vote_ieq:
4940 return emit_vote_eq(ctx, intr);
4941 case nir_intrinsic_vote_any:
4942 case nir_intrinsic_vote_all:
4943 return emit_vote(ctx, intr);
4944
4945 case nir_intrinsic_ballot:
4946 return emit_ballot(ctx, intr);
4947
4948 case nir_intrinsic_read_first_invocation:
4949 return emit_read_first_invocation(ctx, intr);
4950 case nir_intrinsic_read_invocation:
4951 case nir_intrinsic_shuffle:
4952 case nir_intrinsic_quad_broadcast:
4953 return emit_read_invocation(ctx, intr);
4954
4955 case nir_intrinsic_quad_swap_horizontal:
4956 return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_X);
4957 case nir_intrinsic_quad_swap_vertical:
4958 return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_Y);
4959 case nir_intrinsic_quad_swap_diagonal:
4960 return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_DIAGONAL);
4961
4962 case nir_intrinsic_reduce:
4963 case nir_intrinsic_exclusive_scan:
4964 return emit_reduce(ctx, intr);
4965
4966 case nir_intrinsic_load_num_workgroups:
4967 case nir_intrinsic_load_workgroup_size:
4968 default:
4969 log_nir_instr_unsupported(
4970 ctx->logger, "Unimplemented intrinsic instruction", &intr->instr);
4971 return false;
4972 }
4973 }
4974
4975 static const struct dxil_type *
dxil_type_for_const(struct ntd_context * ctx,nir_def * def)4976 dxil_type_for_const(struct ntd_context *ctx, nir_def *def)
4977 {
4978 if (BITSET_TEST(ctx->int_types, def->index) ||
4979 !BITSET_TEST(ctx->float_types, def->index))
4980 return dxil_module_get_int_type(&ctx->mod, def->bit_size);
4981 return dxil_module_get_float_type(&ctx->mod, def->bit_size);
4982 }
4983
4984 static bool
emit_load_const(struct ntd_context * ctx,nir_load_const_instr * load_const)4985 emit_load_const(struct ntd_context *ctx, nir_load_const_instr *load_const)
4986 {
4987 for (uint32_t i = 0; i < load_const->def.num_components; ++i) {
4988 const struct dxil_type *type = dxil_type_for_const(ctx, &load_const->def);
4989 store_ssa_def(ctx, &load_const->def, i, get_value_for_const(&ctx->mod, &load_const->value[i], type));
4990 }
4991 return true;
4992 }
4993
4994 static bool
emit_deref(struct ntd_context * ctx,nir_deref_instr * instr)4995 emit_deref(struct ntd_context* ctx, nir_deref_instr* instr)
4996 {
4997 /* There's two possible reasons we might be walking through derefs:
4998 * 1. Computing an index to be used for a texture/sampler/image binding, which
4999 * can only do array indexing and should compute the indices along the way with
5000 * array-of-array sizes.
5001 * 2. Storing an index to be used in a GEP for access to a variable.
5002 */
5003 nir_variable *var = nir_deref_instr_get_variable(instr);
5004 assert(var);
5005
5006 bool is_aoa_size =
5007 glsl_type_is_sampler(glsl_without_array(var->type)) ||
5008 glsl_type_is_image(glsl_without_array(var->type)) ||
5009 glsl_type_is_texture(glsl_without_array(var->type));
5010
5011 if (!is_aoa_size) {
5012 /* Just store the values, we'll use these to build a GEP in the load or store */
5013 switch (instr->deref_type) {
5014 case nir_deref_type_var:
5015 store_def(ctx, &instr->def, 0, dxil_module_get_int_const(&ctx->mod, 0, instr->def.bit_size));
5016 return true;
5017 case nir_deref_type_array:
5018 store_def(ctx, &instr->def, 0, get_src(ctx, &instr->arr.index, 0, nir_type_int));
5019 return true;
5020 case nir_deref_type_struct:
5021 store_def(ctx, &instr->def, 0, dxil_module_get_int_const(&ctx->mod, instr->strct.index, 32));
5022 return true;
5023 default:
5024 unreachable("Other deref types not supported");
5025 }
5026 }
5027
5028 /* In the CL environment, there's nothing to emit. Any references to
5029 * derefs will emit the necessary logic to handle scratch/shared GEP addressing
5030 */
5031 if (ctx->opts->environment == DXIL_ENVIRONMENT_CL)
5032 return true;
5033
5034 const struct glsl_type *type = instr->type;
5035 const struct dxil_value *binding;
5036 unsigned binding_val = ctx->opts->environment == DXIL_ENVIRONMENT_GL ?
5037 var->data.driver_location : var->data.binding;
5038
5039 if (instr->deref_type == nir_deref_type_var) {
5040 binding = dxil_module_get_int32_const(&ctx->mod, binding_val);
5041 } else {
5042 const struct dxil_value *base = get_src(ctx, &instr->parent, 0, nir_type_uint32);
5043 const struct dxil_value *offset = get_src(ctx, &instr->arr.index, 0, nir_type_uint32);
5044 if (!base || !offset)
5045 return false;
5046
5047 if (glsl_type_is_array(instr->type)) {
5048 offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_MUL, offset,
5049 dxil_module_get_int32_const(&ctx->mod, glsl_get_aoa_size(instr->type)), 0);
5050 if (!offset)
5051 return false;
5052 }
5053 binding = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, base, offset, 0);
5054 }
5055
5056 if (!binding)
5057 return false;
5058
5059 /* Haven't finished chasing the deref chain yet, just store the value */
5060 if (glsl_type_is_array(type)) {
5061 store_def(ctx, &instr->def, 0, binding);
5062 return true;
5063 }
5064
5065 assert(glsl_type_is_sampler(type) || glsl_type_is_image(type) || glsl_type_is_texture(type));
5066 enum dxil_resource_class res_class;
5067 if (glsl_type_is_image(type))
5068 res_class = DXIL_RESOURCE_CLASS_UAV;
5069 else if (glsl_type_is_sampler(type))
5070 res_class = DXIL_RESOURCE_CLASS_SAMPLER;
5071 else
5072 res_class = DXIL_RESOURCE_CLASS_SRV;
5073
5074 unsigned descriptor_set = ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN ?
5075 var->data.descriptor_set : (glsl_type_is_image(type) ? 1 : 0);
5076 const struct dxil_value *handle = emit_createhandle_call_dynamic(ctx, res_class,
5077 descriptor_set, binding_val, binding, false);
5078 if (!handle)
5079 return false;
5080
5081 store_ssa_def(ctx, &instr->def, 0, handle);
5082 return true;
5083 }
5084
5085 static bool
emit_cond_branch(struct ntd_context * ctx,const struct dxil_value * cond,int true_block,int false_block)5086 emit_cond_branch(struct ntd_context *ctx, const struct dxil_value *cond,
5087 int true_block, int false_block)
5088 {
5089 assert(cond);
5090 assert(true_block >= 0);
5091 assert(false_block >= 0);
5092 return dxil_emit_branch(&ctx->mod, cond, true_block, false_block);
5093 }
5094
5095 static bool
emit_branch(struct ntd_context * ctx,int block)5096 emit_branch(struct ntd_context *ctx, int block)
5097 {
5098 assert(block >= 0);
5099 return dxil_emit_branch(&ctx->mod, NULL, block, -1);
5100 }
5101
5102 static bool
emit_jump(struct ntd_context * ctx,nir_jump_instr * instr)5103 emit_jump(struct ntd_context *ctx, nir_jump_instr *instr)
5104 {
5105 switch (instr->type) {
5106 case nir_jump_break:
5107 case nir_jump_continue:
5108 assert(instr->instr.block->successors[0]);
5109 assert(!instr->instr.block->successors[1]);
5110 return emit_branch(ctx, instr->instr.block->successors[0]->index);
5111
5112 default:
5113 unreachable("Unsupported jump type\n");
5114 }
5115 }
5116
5117 struct phi_block {
5118 unsigned num_components;
5119 struct dxil_instr *comp[NIR_MAX_VEC_COMPONENTS];
5120 };
5121
5122 static bool
emit_phi(struct ntd_context * ctx,nir_phi_instr * instr)5123 emit_phi(struct ntd_context *ctx, nir_phi_instr *instr)
5124 {
5125 const struct dxil_type *type = NULL;
5126 nir_foreach_phi_src(src, instr) {
5127 /* All sources have the same type, just use the first one */
5128 type = dxil_value_get_type(ctx->defs[src->src.ssa->index].chans[0]);
5129 break;
5130 }
5131
5132 struct phi_block *vphi = ralloc(ctx->phis, struct phi_block);
5133 vphi->num_components = instr->def.num_components;
5134
5135 for (unsigned i = 0; i < vphi->num_components; ++i) {
5136 struct dxil_instr *phi = vphi->comp[i] = dxil_emit_phi(&ctx->mod, type);
5137 if (!phi)
5138 return false;
5139 store_ssa_def(ctx, &instr->def, i, dxil_instr_get_return_value(phi));
5140 }
5141 _mesa_hash_table_insert(ctx->phis, instr, vphi);
5142 return true;
5143 }
5144
5145 static bool
fixup_phi(struct ntd_context * ctx,nir_phi_instr * instr,struct phi_block * vphi)5146 fixup_phi(struct ntd_context *ctx, nir_phi_instr *instr,
5147 struct phi_block *vphi)
5148 {
5149 const struct dxil_value *values[16];
5150 unsigned blocks[16];
5151 for (unsigned i = 0; i < vphi->num_components; ++i) {
5152 size_t num_incoming = 0;
5153 nir_foreach_phi_src(src, instr) {
5154 const struct dxil_value *val = get_src_ssa(ctx, src->src.ssa, i);
5155 values[num_incoming] = val;
5156 blocks[num_incoming] = src->pred->index;
5157 ++num_incoming;
5158 if (num_incoming == ARRAY_SIZE(values)) {
5159 if (!dxil_phi_add_incoming(vphi->comp[i], values, blocks,
5160 num_incoming))
5161 return false;
5162 num_incoming = 0;
5163 }
5164 }
5165 if (num_incoming > 0 && !dxil_phi_add_incoming(vphi->comp[i], values,
5166 blocks, num_incoming))
5167 return false;
5168 }
5169 return true;
5170 }
5171
5172 static unsigned
get_n_src(struct ntd_context * ctx,const struct dxil_value ** values,unsigned max_components,nir_tex_src * src,nir_alu_type type)5173 get_n_src(struct ntd_context *ctx, const struct dxil_value **values,
5174 unsigned max_components, nir_tex_src *src, nir_alu_type type)
5175 {
5176 unsigned num_components = nir_src_num_components(src->src);
5177 unsigned i = 0;
5178
5179 assert(num_components <= max_components);
5180
5181 for (i = 0; i < num_components; ++i) {
5182 values[i] = get_src(ctx, &src->src, i, type);
5183 if (!values[i])
5184 return 0;
5185 }
5186
5187 return num_components;
5188 }
5189
5190 #define PAD_SRC(ctx, array, components, undef) \
5191 for (unsigned i = components; i < ARRAY_SIZE(array); ++i) { \
5192 array[i] = undef; \
5193 }
5194
5195 static const struct dxil_value *
emit_sample(struct ntd_context * ctx,struct texop_parameters * params)5196 emit_sample(struct ntd_context *ctx, struct texop_parameters *params)
5197 {
5198 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sample", params->overload);
5199 if (!func)
5200 return NULL;
5201
5202 const struct dxil_value *args[11] = {
5203 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE),
5204 params->tex, params->sampler,
5205 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5206 params->offset[0], params->offset[1], params->offset[2],
5207 params->min_lod
5208 };
5209
5210 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5211 }
5212
5213 static const struct dxil_value *
emit_sample_bias(struct ntd_context * ctx,struct texop_parameters * params)5214 emit_sample_bias(struct ntd_context *ctx, struct texop_parameters *params)
5215 {
5216 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleBias", params->overload);
5217 if (!func)
5218 return NULL;
5219
5220 assert(params->bias != NULL);
5221
5222 const struct dxil_value *args[12] = {
5223 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_BIAS),
5224 params->tex, params->sampler,
5225 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5226 params->offset[0], params->offset[1], params->offset[2],
5227 params->bias, params->min_lod
5228 };
5229
5230 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5231 }
5232
5233 static const struct dxil_value *
emit_sample_level(struct ntd_context * ctx,struct texop_parameters * params)5234 emit_sample_level(struct ntd_context *ctx, struct texop_parameters *params)
5235 {
5236 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleLevel", params->overload);
5237 if (!func)
5238 return NULL;
5239
5240 assert(params->lod_or_sample != NULL);
5241
5242 const struct dxil_value *args[11] = {
5243 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_LEVEL),
5244 params->tex, params->sampler,
5245 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5246 params->offset[0], params->offset[1], params->offset[2],
5247 params->lod_or_sample
5248 };
5249
5250 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5251 }
5252
5253 static const struct dxil_value *
emit_sample_cmp(struct ntd_context * ctx,struct texop_parameters * params)5254 emit_sample_cmp(struct ntd_context *ctx, struct texop_parameters *params)
5255 {
5256 const struct dxil_func *func;
5257 enum dxil_intr opcode;
5258
5259 func = dxil_get_function(&ctx->mod, "dx.op.sampleCmp", DXIL_F32);
5260 opcode = DXIL_INTR_SAMPLE_CMP;
5261
5262 if (!func)
5263 return NULL;
5264
5265 const struct dxil_value *args[12] = {
5266 dxil_module_get_int32_const(&ctx->mod, opcode),
5267 params->tex, params->sampler,
5268 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5269 params->offset[0], params->offset[1], params->offset[2],
5270 params->cmp, params->min_lod
5271 };
5272
5273 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5274 }
5275
5276 static const struct dxil_value *
emit_sample_cmp_level_zero(struct ntd_context * ctx,struct texop_parameters * params)5277 emit_sample_cmp_level_zero(struct ntd_context *ctx, struct texop_parameters *params)
5278 {
5279 const struct dxil_func *func;
5280 enum dxil_intr opcode;
5281
5282 func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevelZero", DXIL_F32);
5283 opcode = DXIL_INTR_SAMPLE_CMP_LVL_ZERO;
5284
5285 if (!func)
5286 return NULL;
5287
5288 const struct dxil_value *args[11] = {
5289 dxil_module_get_int32_const(&ctx->mod, opcode),
5290 params->tex, params->sampler,
5291 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5292 params->offset[0], params->offset[1], params->offset[2],
5293 params->cmp
5294 };
5295
5296 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5297 }
5298
5299 static const struct dxil_value *
emit_sample_cmp_level(struct ntd_context * ctx,struct texop_parameters * params)5300 emit_sample_cmp_level(struct ntd_context *ctx, struct texop_parameters *params)
5301 {
5302 ctx->mod.feats.advanced_texture_ops = true;
5303 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevel", params->overload);
5304 if (!func)
5305 return NULL;
5306
5307 assert(params->lod_or_sample != NULL);
5308
5309 const struct dxil_value *args[12] = {
5310 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_LEVEL),
5311 params->tex, params->sampler,
5312 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5313 params->offset[0], params->offset[1], params->offset[2],
5314 params->cmp, params->lod_or_sample
5315 };
5316
5317 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5318 }
5319
5320 static const struct dxil_value *
emit_sample_cmp_bias(struct ntd_context * ctx,struct texop_parameters * params)5321 emit_sample_cmp_bias(struct ntd_context *ctx, struct texop_parameters *params)
5322 {
5323 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpBias", params->overload);
5324 if (!func)
5325 return NULL;
5326
5327 assert(params->bias != NULL);
5328 ctx->mod.feats.sample_cmp_bias_gradient = 1;
5329
5330 const struct dxil_value *args[13] = {
5331 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_BIAS),
5332 params->tex, params->sampler,
5333 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5334 params->offset[0], params->offset[1], params->offset[2],
5335 params->cmp, params->bias, params->min_lod
5336 };
5337
5338 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5339 }
5340
5341 static const struct dxil_value *
emit_sample_grad(struct ntd_context * ctx,struct texop_parameters * params)5342 emit_sample_grad(struct ntd_context *ctx, struct texop_parameters *params)
5343 {
5344 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleGrad", params->overload);
5345 if (!func)
5346 return false;
5347
5348 const struct dxil_value *args[17] = {
5349 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_GRAD),
5350 params->tex, params->sampler,
5351 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5352 params->offset[0], params->offset[1], params->offset[2],
5353 params->dx[0], params->dx[1], params->dx[2],
5354 params->dy[0], params->dy[1], params->dy[2],
5355 params->min_lod
5356 };
5357
5358 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5359 }
5360
5361 static const struct dxil_value *
emit_sample_cmp_grad(struct ntd_context * ctx,struct texop_parameters * params)5362 emit_sample_cmp_grad(struct ntd_context *ctx, struct texop_parameters *params)
5363 {
5364 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpGrad", params->overload);
5365 if (!func)
5366 return false;
5367
5368 ctx->mod.feats.sample_cmp_bias_gradient = 1;
5369
5370 const struct dxil_value *args[18] = {
5371 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_GRAD),
5372 params->tex, params->sampler,
5373 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5374 params->offset[0], params->offset[1], params->offset[2],
5375 params->cmp,
5376 params->dx[0], params->dx[1], params->dx[2],
5377 params->dy[0], params->dy[1], params->dy[2],
5378 params->min_lod
5379 };
5380
5381 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5382 }
5383
5384 static const struct dxil_value *
emit_texel_fetch(struct ntd_context * ctx,struct texop_parameters * params)5385 emit_texel_fetch(struct ntd_context *ctx, struct texop_parameters *params)
5386 {
5387 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", params->overload);
5388 if (!func)
5389 return false;
5390
5391 if (!params->lod_or_sample)
5392 params->lod_or_sample = dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32));
5393
5394 const struct dxil_value *args[] = {
5395 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD),
5396 params->tex,
5397 params->lod_or_sample, params->coord[0], params->coord[1], params->coord[2],
5398 params->offset[0], params->offset[1], params->offset[2]
5399 };
5400
5401 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5402 }
5403
5404 static const struct dxil_value *
emit_texture_lod(struct ntd_context * ctx,struct texop_parameters * params,bool clamped)5405 emit_texture_lod(struct ntd_context *ctx, struct texop_parameters *params, bool clamped)
5406 {
5407 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.calculateLOD", DXIL_F32);
5408 if (!func)
5409 return false;
5410
5411 const struct dxil_value *args[] = {
5412 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOD),
5413 params->tex,
5414 params->sampler,
5415 params->coord[0],
5416 params->coord[1],
5417 params->coord[2],
5418 dxil_module_get_int1_const(&ctx->mod, clamped ? 1 : 0)
5419 };
5420
5421 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5422 }
5423
5424 static const struct dxil_value *
emit_texture_gather(struct ntd_context * ctx,struct texop_parameters * params,unsigned component)5425 emit_texture_gather(struct ntd_context *ctx, struct texop_parameters *params, unsigned component)
5426 {
5427 const struct dxil_func *func = dxil_get_function(&ctx->mod,
5428 params->cmp ? "dx.op.textureGatherCmp" : "dx.op.textureGather", params->overload);
5429 if (!func)
5430 return false;
5431
5432 const struct dxil_value *args[] = {
5433 dxil_module_get_int32_const(&ctx->mod, params->cmp ?
5434 DXIL_INTR_TEXTURE_GATHER_CMP : DXIL_INTR_TEXTURE_GATHER),
5435 params->tex,
5436 params->sampler,
5437 params->coord[0],
5438 params->coord[1],
5439 params->coord[2],
5440 params->coord[3],
5441 params->offset[0],
5442 params->offset[1],
5443 dxil_module_get_int32_const(&ctx->mod, component),
5444 params->cmp
5445 };
5446
5447 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args) - (params->cmp ? 0 : 1));
5448 }
5449
5450 static bool
emit_tex(struct ntd_context * ctx,nir_tex_instr * instr)5451 emit_tex(struct ntd_context *ctx, nir_tex_instr *instr)
5452 {
5453 struct texop_parameters params;
5454 memset(¶ms, 0, sizeof(struct texop_parameters));
5455 if (ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN) {
5456 params.tex = ctx->srv_handles[instr->texture_index];
5457 params.sampler = ctx->sampler_handles[instr->sampler_index];
5458 }
5459
5460 const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
5461 const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32);
5462 const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
5463 const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type);
5464
5465 unsigned coord_components = 0, offset_components = 0, dx_components = 0, dy_components = 0;
5466 params.overload = get_overload(instr->dest_type, 32);
5467
5468 bool lod_is_zero = false;
5469 for (unsigned i = 0; i < instr->num_srcs; i++) {
5470 nir_alu_type type = nir_tex_instr_src_type(instr, i);
5471
5472 switch (instr->src[i].src_type) {
5473 case nir_tex_src_coord:
5474 coord_components = get_n_src(ctx, params.coord, ARRAY_SIZE(params.coord),
5475 &instr->src[i], type);
5476 if (!coord_components)
5477 return false;
5478 break;
5479
5480 case nir_tex_src_offset:
5481 offset_components = get_n_src(ctx, params.offset, ARRAY_SIZE(params.offset),
5482 &instr->src[i], nir_type_int);
5483 if (!offset_components)
5484 return false;
5485
5486 /* Dynamic offsets were only allowed with gather, until "advanced texture ops" in SM7 */
5487 if (!nir_src_is_const(instr->src[i].src) && instr->op != nir_texop_tg4)
5488 ctx->mod.feats.advanced_texture_ops = true;
5489 break;
5490
5491 case nir_tex_src_bias:
5492 assert(instr->op == nir_texop_txb);
5493 assert(nir_src_num_components(instr->src[i].src) == 1);
5494 params.bias = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
5495 if (!params.bias)
5496 return false;
5497 break;
5498
5499 case nir_tex_src_lod:
5500 assert(nir_src_num_components(instr->src[i].src) == 1);
5501 if (instr->op == nir_texop_txf_ms) {
5502 assert(nir_src_as_int(instr->src[i].src) == 0);
5503 break;
5504 }
5505
5506 /* Buffers don't have a LOD */
5507 if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF)
5508 params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, type);
5509 else
5510 params.lod_or_sample = int_undef;
5511 if (!params.lod_or_sample)
5512 return false;
5513
5514 if (nir_src_is_const(instr->src[i].src) && nir_src_as_float(instr->src[i].src) == 0.0f)
5515 lod_is_zero = true;
5516 break;
5517
5518 case nir_tex_src_min_lod:
5519 assert(nir_src_num_components(instr->src[i].src) == 1);
5520 params.min_lod = get_src(ctx, &instr->src[i].src, 0, type);
5521 if (!params.min_lod)
5522 return false;
5523 break;
5524
5525 case nir_tex_src_comparator:
5526 assert(nir_src_num_components(instr->src[i].src) == 1);
5527 params.cmp = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
5528 if (!params.cmp)
5529 return false;
5530 break;
5531
5532 case nir_tex_src_ddx:
5533 dx_components = get_n_src(ctx, params.dx, ARRAY_SIZE(params.dx),
5534 &instr->src[i], nir_type_float);
5535 if (!dx_components)
5536 return false;
5537 break;
5538
5539 case nir_tex_src_ddy:
5540 dy_components = get_n_src(ctx, params.dy, ARRAY_SIZE(params.dy),
5541 &instr->src[i], nir_type_float);
5542 if (!dy_components)
5543 return false;
5544 break;
5545
5546 case nir_tex_src_ms_index:
5547 params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, nir_type_int);
5548 if (!params.lod_or_sample)
5549 return false;
5550 break;
5551
5552 case nir_tex_src_texture_deref:
5553 assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
5554 params.tex = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
5555 break;
5556
5557 case nir_tex_src_sampler_deref:
5558 assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
5559 params.sampler = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
5560 break;
5561
5562 case nir_tex_src_texture_offset:
5563 params.tex = emit_createhandle_call_dynamic(ctx, DXIL_RESOURCE_CLASS_SRV,
5564 0, instr->texture_index,
5565 dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
5566 get_src(ctx, &instr->src[i].src, 0, nir_type_uint),
5567 dxil_module_get_int32_const(&ctx->mod, instr->texture_index), 0),
5568 instr->texture_non_uniform);
5569 break;
5570
5571 case nir_tex_src_sampler_offset:
5572 if (nir_tex_instr_need_sampler(instr)) {
5573 params.sampler = emit_createhandle_call_dynamic(ctx, DXIL_RESOURCE_CLASS_SAMPLER,
5574 0, instr->sampler_index,
5575 dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
5576 get_src(ctx, &instr->src[i].src, 0, nir_type_uint),
5577 dxil_module_get_int32_const(&ctx->mod, instr->sampler_index), 0),
5578 instr->sampler_non_uniform);
5579 }
5580 break;
5581
5582 case nir_tex_src_texture_handle:
5583 params.tex = create_srv_handle(ctx, instr, &instr->src[i].src);
5584 break;
5585
5586 case nir_tex_src_sampler_handle:
5587 if (nir_tex_instr_need_sampler(instr))
5588 params.sampler = create_sampler_handle(ctx, instr->is_shadow, &instr->src[i].src);
5589 break;
5590
5591 case nir_tex_src_projector:
5592 unreachable("Texture projector should have been lowered");
5593
5594 default:
5595 fprintf(stderr, "texture source: %d\n", instr->src[i].src_type);
5596 unreachable("unknown texture source");
5597 }
5598 }
5599
5600 assert(params.tex != NULL);
5601 assert(instr->op == nir_texop_txf ||
5602 instr->op == nir_texop_txf_ms ||
5603 nir_tex_instr_is_query(instr) ||
5604 params.sampler != NULL);
5605
5606 PAD_SRC(ctx, params.coord, coord_components, float_undef);
5607 PAD_SRC(ctx, params.offset, offset_components, int_undef);
5608 if (!params.min_lod) params.min_lod = float_undef;
5609
5610 const struct dxil_value *sample = NULL;
5611 switch (instr->op) {
5612 case nir_texop_txb:
5613 if (params.cmp != NULL && ctx->mod.minor_version >= 8)
5614 sample = emit_sample_cmp_bias(ctx, ¶ms);
5615 else
5616 sample = emit_sample_bias(ctx, ¶ms);
5617 break;
5618
5619 case nir_texop_tex:
5620 if (params.cmp != NULL) {
5621 sample = emit_sample_cmp(ctx, ¶ms);
5622 break;
5623 } else if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) {
5624 sample = emit_sample(ctx, ¶ms);
5625 break;
5626 }
5627 params.lod_or_sample = dxil_module_get_float_const(&ctx->mod, 0);
5628 lod_is_zero = true;
5629 FALLTHROUGH;
5630 case nir_texop_txl:
5631 if (lod_is_zero && params.cmp != NULL && ctx->mod.minor_version < 7) {
5632 /* Prior to SM 6.7, if the level is constant 0.0, ignore the LOD argument,
5633 * so level-less DXIL instructions are used. This is needed to avoid emitting
5634 * dx.op.sampleCmpLevel, which would not be available.
5635 */
5636 sample = emit_sample_cmp_level_zero(ctx, ¶ms);
5637 } else {
5638 if (params.cmp != NULL)
5639 sample = emit_sample_cmp_level(ctx, ¶ms);
5640 else
5641 sample = emit_sample_level(ctx, ¶ms);
5642 }
5643 break;
5644
5645 case nir_texop_txd:
5646 PAD_SRC(ctx, params.dx, dx_components, float_undef);
5647 PAD_SRC(ctx, params.dy, dy_components,float_undef);
5648 if (params.cmp != NULL && ctx->mod.minor_version >= 8)
5649 sample = emit_sample_cmp_grad(ctx, ¶ms);
5650 else
5651 sample = emit_sample_grad(ctx, ¶ms);
5652 break;
5653
5654 case nir_texop_txf:
5655 case nir_texop_txf_ms:
5656 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
5657 params.coord[1] = int_undef;
5658 sample = emit_bufferload_call(ctx, params.tex, params.coord, params.overload);
5659 } else {
5660 PAD_SRC(ctx, params.coord, coord_components, int_undef);
5661 sample = emit_texel_fetch(ctx, ¶ms);
5662 }
5663 break;
5664
5665 case nir_texop_txs:
5666 sample = emit_texture_size(ctx, ¶ms);
5667 break;
5668
5669 case nir_texop_tg4:
5670 sample = emit_texture_gather(ctx, ¶ms, instr->component);
5671 break;
5672
5673 case nir_texop_lod:
5674 sample = emit_texture_lod(ctx, ¶ms, true);
5675 store_def(ctx, &instr->def, 0, sample);
5676 sample = emit_texture_lod(ctx, ¶ms, false);
5677 store_def(ctx, &instr->def, 1, sample);
5678 return true;
5679
5680 case nir_texop_query_levels: {
5681 params.lod_or_sample = dxil_module_get_int_const(&ctx->mod, 0, 32);
5682 sample = emit_texture_size(ctx, ¶ms);
5683 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
5684 store_def(ctx, &instr->def, 0, retval);
5685 return true;
5686 }
5687
5688 case nir_texop_texture_samples: {
5689 params.lod_or_sample = int_undef;
5690 sample = emit_texture_size(ctx, ¶ms);
5691 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
5692 store_def(ctx, &instr->def, 0, retval);
5693 return true;
5694 }
5695
5696 default:
5697 fprintf(stderr, "texture op: %d\n", instr->op);
5698 unreachable("unknown texture op");
5699 }
5700
5701 if (!sample)
5702 return false;
5703
5704 for (unsigned i = 0; i < instr->def.num_components; ++i) {
5705 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, i);
5706 store_def(ctx, &instr->def, i, retval);
5707 }
5708
5709 return true;
5710 }
5711
5712 static bool
emit_undefined(struct ntd_context * ctx,nir_undef_instr * undef)5713 emit_undefined(struct ntd_context *ctx, nir_undef_instr *undef)
5714 {
5715 for (unsigned i = 0; i < undef->def.num_components; ++i)
5716 store_ssa_def(ctx, &undef->def, i, dxil_module_get_int32_const(&ctx->mod, 0));
5717 return true;
5718 }
5719
emit_instr(struct ntd_context * ctx,struct nir_instr * instr)5720 static bool emit_instr(struct ntd_context *ctx, struct nir_instr* instr)
5721 {
5722 switch (instr->type) {
5723 case nir_instr_type_alu:
5724 return emit_alu(ctx, nir_instr_as_alu(instr));
5725 case nir_instr_type_intrinsic:
5726 return emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
5727 case nir_instr_type_load_const:
5728 return emit_load_const(ctx, nir_instr_as_load_const(instr));
5729 case nir_instr_type_deref:
5730 return emit_deref(ctx, nir_instr_as_deref(instr));
5731 case nir_instr_type_jump:
5732 return emit_jump(ctx, nir_instr_as_jump(instr));
5733 case nir_instr_type_phi:
5734 return emit_phi(ctx, nir_instr_as_phi(instr));
5735 case nir_instr_type_tex:
5736 return emit_tex(ctx, nir_instr_as_tex(instr));
5737 case nir_instr_type_undef:
5738 return emit_undefined(ctx, nir_instr_as_undef(instr));
5739 default:
5740 log_nir_instr_unsupported(ctx->logger, "Unimplemented instruction type",
5741 instr);
5742 return false;
5743 }
5744 }
5745
5746
5747 static bool
emit_block(struct ntd_context * ctx,struct nir_block * block)5748 emit_block(struct ntd_context *ctx, struct nir_block *block)
5749 {
5750 assert(block->index < ctx->mod.cur_emitting_func->num_basic_block_ids);
5751 ctx->mod.cur_emitting_func->basic_block_ids[block->index] = ctx->mod.cur_emitting_func->curr_block;
5752
5753 nir_foreach_instr(instr, block) {
5754 TRACE_CONVERSION(instr);
5755
5756 if (!emit_instr(ctx, instr)) {
5757 return false;
5758 }
5759 }
5760 return true;
5761 }
5762
5763 static bool
5764 emit_cf_list(struct ntd_context *ctx, struct exec_list *list);
5765
5766 static bool
emit_if(struct ntd_context * ctx,struct nir_if * if_stmt)5767 emit_if(struct ntd_context *ctx, struct nir_if *if_stmt)
5768 {
5769 assert(nir_src_num_components(if_stmt->condition) == 1);
5770 const struct dxil_value *cond = get_src(ctx, &if_stmt->condition, 0,
5771 nir_type_bool);
5772 if (!cond)
5773 return false;
5774
5775 /* prepare blocks */
5776 nir_block *then_block = nir_if_first_then_block(if_stmt);
5777 assert(nir_if_last_then_block(if_stmt)->successors[0]);
5778 assert(!nir_if_last_then_block(if_stmt)->successors[1]);
5779 int then_succ = nir_if_last_then_block(if_stmt)->successors[0]->index;
5780
5781 nir_block *else_block = NULL;
5782 int else_succ = -1;
5783 if (!exec_list_is_empty(&if_stmt->else_list)) {
5784 else_block = nir_if_first_else_block(if_stmt);
5785 assert(nir_if_last_else_block(if_stmt)->successors[0]);
5786 assert(!nir_if_last_else_block(if_stmt)->successors[1]);
5787 else_succ = nir_if_last_else_block(if_stmt)->successors[0]->index;
5788 }
5789
5790 if (!emit_cond_branch(ctx, cond, then_block->index,
5791 else_block ? else_block->index : then_succ))
5792 return false;
5793
5794 /* handle then-block */
5795 if (!emit_cf_list(ctx, &if_stmt->then_list) ||
5796 (!nir_block_ends_in_jump(nir_if_last_then_block(if_stmt)) &&
5797 !emit_branch(ctx, then_succ)))
5798 return false;
5799
5800 if (else_block) {
5801 /* handle else-block */
5802 if (!emit_cf_list(ctx, &if_stmt->else_list) ||
5803 (!nir_block_ends_in_jump(nir_if_last_else_block(if_stmt)) &&
5804 !emit_branch(ctx, else_succ)))
5805 return false;
5806 }
5807
5808 return true;
5809 }
5810
5811 static bool
emit_loop(struct ntd_context * ctx,nir_loop * loop)5812 emit_loop(struct ntd_context *ctx, nir_loop *loop)
5813 {
5814 assert(!nir_loop_has_continue_construct(loop));
5815 nir_block *first_block = nir_loop_first_block(loop);
5816 nir_block *last_block = nir_loop_last_block(loop);
5817
5818 assert(last_block->successors[0]);
5819 assert(!last_block->successors[1]);
5820
5821 if (!emit_branch(ctx, first_block->index))
5822 return false;
5823
5824 if (!emit_cf_list(ctx, &loop->body))
5825 return false;
5826
5827 /* If the loop's last block doesn't explicitly jump somewhere, then there's
5828 * an implicit continue that should take it back to the first loop block
5829 */
5830 nir_instr *last_instr = nir_block_last_instr(last_block);
5831 if ((!last_instr || last_instr->type != nir_instr_type_jump) &&
5832 !emit_branch(ctx, first_block->index))
5833 return false;
5834
5835 return true;
5836 }
5837
5838 static bool
emit_cf_list(struct ntd_context * ctx,struct exec_list * list)5839 emit_cf_list(struct ntd_context *ctx, struct exec_list *list)
5840 {
5841 foreach_list_typed(nir_cf_node, node, node, list) {
5842 switch (node->type) {
5843 case nir_cf_node_block:
5844 if (!emit_block(ctx, nir_cf_node_as_block(node)))
5845 return false;
5846 break;
5847
5848 case nir_cf_node_if:
5849 if (!emit_if(ctx, nir_cf_node_as_if(node)))
5850 return false;
5851 break;
5852
5853 case nir_cf_node_loop:
5854 if (!emit_loop(ctx, nir_cf_node_as_loop(node)))
5855 return false;
5856 break;
5857
5858 default:
5859 unreachable("unsupported cf-list node");
5860 break;
5861 }
5862 }
5863 return true;
5864 }
5865
5866 static void
insert_sorted_by_binding(struct exec_list * var_list,nir_variable * new_var)5867 insert_sorted_by_binding(struct exec_list *var_list, nir_variable *new_var)
5868 {
5869 nir_foreach_variable_in_list(var, var_list) {
5870 if (var->data.binding > new_var->data.binding) {
5871 exec_node_insert_node_before(&var->node, &new_var->node);
5872 return;
5873 }
5874 }
5875 exec_list_push_tail(var_list, &new_var->node);
5876 }
5877
5878
5879 static void
sort_uniforms_by_binding_and_remove_structs(nir_shader * s)5880 sort_uniforms_by_binding_and_remove_structs(nir_shader *s)
5881 {
5882 struct exec_list new_list;
5883 exec_list_make_empty(&new_list);
5884
5885 nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) {
5886 exec_node_remove(&var->node);
5887 const struct glsl_type *type = glsl_without_array(var->type);
5888 if (!glsl_type_is_struct(type))
5889 insert_sorted_by_binding(&new_list, var);
5890 }
5891 exec_list_append(&s->variables, &new_list);
5892 }
5893
5894 static bool
emit_cbvs(struct ntd_context * ctx)5895 emit_cbvs(struct ntd_context *ctx)
5896 {
5897 if (ctx->opts->environment != DXIL_ENVIRONMENT_GL) {
5898 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ubo) {
5899 if (!emit_ubo_var(ctx, var))
5900 return false;
5901 }
5902 } else {
5903 if (ctx->shader->info.num_ubos) {
5904 const unsigned ubo_size = 16384 /*4096 vec4's*/;
5905 bool has_ubo0 = !ctx->opts->no_ubo0;
5906 bool has_state_vars = ctx->opts->last_ubo_is_not_arrayed;
5907 unsigned ubo1_array_size = ctx->shader->info.num_ubos -
5908 (has_state_vars ? 2 : 1);
5909
5910 if (has_ubo0 &&
5911 !emit_cbv(ctx, 0, 0, ubo_size, 1, "__ubo_uniforms"))
5912 return false;
5913 if (ubo1_array_size &&
5914 !emit_cbv(ctx, 1, 0, ubo_size, ubo1_array_size, "__ubos"))
5915 return false;
5916 if (has_state_vars &&
5917 !emit_cbv(ctx, ctx->shader->info.num_ubos - 1, 0, ubo_size, 1, "__ubo_state_vars"))
5918 return false;
5919 }
5920 }
5921
5922 return true;
5923 }
5924
5925 static bool
emit_scratch(struct ntd_context * ctx,nir_function_impl * impl)5926 emit_scratch(struct ntd_context *ctx, nir_function_impl *impl)
5927 {
5928 uint32_t index = 0;
5929 nir_foreach_function_temp_variable(var, impl)
5930 var->data.driver_location = index++;
5931
5932 if (ctx->scratchvars)
5933 ralloc_free((void *)ctx->scratchvars);
5934
5935 ctx->scratchvars = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
5936
5937 nir_foreach_function_temp_variable(var, impl) {
5938 const struct dxil_type *type = get_type_for_glsl_type(&ctx->mod, var->type);
5939 const struct dxil_value *length = dxil_module_get_int32_const(&ctx->mod, 1);
5940 const struct dxil_value *ptr = dxil_emit_alloca(&ctx->mod, type, length, 16);
5941 if (!ptr)
5942 return false;
5943
5944 ctx->scratchvars[var->data.driver_location] = ptr;
5945 }
5946
5947 return true;
5948 }
5949
5950 static bool
emit_function(struct ntd_context * ctx,nir_function * func,nir_function_impl * impl)5951 emit_function(struct ntd_context *ctx, nir_function *func, nir_function_impl *impl)
5952 {
5953 assert(func->num_params == 0);
5954 nir_metadata_require(impl, nir_metadata_block_index);
5955
5956 const char *attr_keys[2] = { NULL };
5957 const char *attr_values[2] = { NULL };
5958 if (ctx->shader->info.float_controls_execution_mode &
5959 (FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32 | FLOAT_CONTROLS_DENORM_PRESERVE_FP32))
5960 attr_keys[0] = "fp32-denorm-mode";
5961 if (ctx->shader->info.float_controls_execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32)
5962 attr_values[0] = "ftz";
5963 else if (ctx->shader->info.float_controls_execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32)
5964 attr_values[0] = "preserve";
5965
5966 const struct dxil_type *void_type = dxil_module_get_void_type(&ctx->mod);
5967 const struct dxil_type *func_type = dxil_module_add_function_type(&ctx->mod, void_type, NULL, 0);
5968 struct dxil_func_def *func_def = dxil_add_function_def(&ctx->mod, func->name, func_type, impl->num_blocks, attr_keys, attr_values);
5969 if (!func_def)
5970 return false;
5971
5972 if (func->is_entrypoint)
5973 ctx->main_func_def = func_def;
5974 else if (func == ctx->tess_ctrl_patch_constant_func)
5975 ctx->tess_ctrl_patch_constant_func_def = func_def;
5976
5977 ctx->defs = rzalloc_array(ctx->ralloc_ctx, struct dxil_def, impl->ssa_alloc);
5978 ctx->float_types = rzalloc_array(ctx->ralloc_ctx, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc));
5979 ctx->int_types = rzalloc_array(ctx->ralloc_ctx, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc));
5980 if (!ctx->defs || !ctx->float_types || !ctx->int_types)
5981 return false;
5982 ctx->num_defs = impl->ssa_alloc;
5983
5984 ctx->phis = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
5985 if (!ctx->phis)
5986 return false;
5987
5988 nir_gather_types(impl, ctx->float_types, ctx->int_types);
5989
5990 if (!emit_scratch(ctx, impl))
5991 return false;
5992
5993 if (!emit_static_indexing_handles(ctx))
5994 return false;
5995
5996 if (!emit_cf_list(ctx, &impl->body))
5997 return false;
5998
5999 hash_table_foreach(ctx->phis, entry) {
6000 if (!fixup_phi(ctx, (nir_phi_instr *)entry->key,
6001 (struct phi_block *)entry->data))
6002 return false;
6003 }
6004
6005 if (!dxil_emit_ret_void(&ctx->mod))
6006 return false;
6007
6008 ralloc_free(ctx->defs);
6009 ctx->defs = NULL;
6010 _mesa_hash_table_destroy(ctx->phis, NULL);
6011 return true;
6012 }
6013
6014 static bool
emit_module(struct ntd_context * ctx,const struct nir_to_dxil_options * opts)6015 emit_module(struct ntd_context *ctx, const struct nir_to_dxil_options *opts)
6016 {
6017 /* The validator forces us to emit resources in a specific order:
6018 * CBVs, Samplers, SRVs, UAVs. While we are at it also remove
6019 * stale struct uniforms, they are lowered but might not have been removed */
6020 sort_uniforms_by_binding_and_remove_structs(ctx->shader);
6021
6022 /* CBVs */
6023 if (!emit_cbvs(ctx))
6024 return false;
6025
6026 /* Samplers */
6027 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
6028 unsigned count = glsl_type_get_sampler_count(var->type);
6029 assert(count == 0 || glsl_type_is_bare_sampler(glsl_without_array(var->type)));
6030 if (count > 0 && !emit_sampler(ctx, var, count))
6031 return false;
6032 }
6033
6034 /* SRVs */
6035 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
6036 unsigned count = glsl_type_get_texture_count(var->type);
6037 assert(count == 0 || glsl_type_is_texture(glsl_without_array(var->type)));
6038 if (count > 0 && !emit_srv(ctx, var, count))
6039 return false;
6040 }
6041
6042 /* Handle read-only SSBOs as SRVs */
6043 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6044 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
6045 if ((var->data.access & ACCESS_NON_WRITEABLE) != 0) {
6046 unsigned count = 1;
6047 if (glsl_type_is_array(var->type))
6048 count = glsl_get_length(var->type);
6049 if (!emit_srv(ctx, var, count))
6050 return false;
6051 }
6052 }
6053 }
6054
6055 if (!emit_shared_vars(ctx))
6056 return false;
6057 if (!emit_global_consts(ctx))
6058 return false;
6059
6060 /* UAVs */
6061 if (ctx->shader->info.stage == MESA_SHADER_KERNEL) {
6062 if (!emit_globals(ctx, opts->num_kernel_globals))
6063 return false;
6064
6065 } else if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6066 /* Handle read/write SSBOs as UAVs */
6067 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
6068 if ((var->data.access & ACCESS_NON_WRITEABLE) == 0) {
6069 unsigned count = 1;
6070 if (glsl_type_is_array(var->type))
6071 count = glsl_get_length(var->type);
6072 if (!emit_uav(ctx, var->data.binding, var->data.descriptor_set,
6073 count, DXIL_COMP_TYPE_INVALID, 1,
6074 DXIL_RESOURCE_KIND_RAW_BUFFER, var->data.access, var->name))
6075 return false;
6076
6077 }
6078 }
6079 } else {
6080 for (unsigned i = 0; i < ctx->shader->info.num_ssbos; ++i) {
6081 char name[64];
6082 snprintf(name, sizeof(name), "__ssbo%d", i);
6083 if (!emit_uav(ctx, i, 0, 1, DXIL_COMP_TYPE_INVALID, 1,
6084 DXIL_RESOURCE_KIND_RAW_BUFFER, 0, name))
6085 return false;
6086 }
6087 /* To work around a WARP bug, bind these descriptors a second time in descriptor
6088 * space 2. Space 0 will be used for static indexing, while space 2 will be used
6089 * for dynamic indexing. Space 0 will be individual SSBOs in the DXIL shader, while
6090 * space 2 will be a single array.
6091 */
6092 if (ctx->shader->info.num_ssbos &&
6093 !emit_uav(ctx, 0, 2, ctx->shader->info.num_ssbos, DXIL_COMP_TYPE_INVALID, 1,
6094 DXIL_RESOURCE_KIND_RAW_BUFFER, 0, "__ssbo_dynamic"))
6095 return false;
6096 }
6097
6098 nir_foreach_image_variable(var, ctx->shader) {
6099 if (!emit_uav_var(ctx, var, glsl_type_get_image_count(var->type)))
6100 return false;
6101 }
6102
6103 ctx->mod.info.has_per_sample_input =
6104 BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
6105 ctx->shader->info.fs.uses_sample_shading ||
6106 ctx->shader->info.fs.uses_sample_qualifier;
6107 if (!ctx->mod.info.has_per_sample_input && ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
6108 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in | nir_var_system_value) {
6109 if (var->data.sample) {
6110 ctx->mod.info.has_per_sample_input = true;
6111 break;
6112 }
6113 }
6114 }
6115
6116 /* From the Vulkan spec 1.3.238, section 15.8:
6117 * When Sample Shading is enabled, the x and y components of FragCoord reflect the location
6118 * of one of the samples corresponding to the shader invocation.
6119 *
6120 * In other words, if the fragment shader is executing per-sample, then the position variable
6121 * should always be per-sample,
6122 *
6123 * Also:
6124 * The Centroid interpolation decoration is ignored, but allowed, on FragCoord.
6125 */
6126 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6127 nir_variable *pos_var = nir_find_variable_with_location(ctx->shader, nir_var_shader_in, VARYING_SLOT_POS);
6128 if (pos_var) {
6129 if (ctx->mod.info.has_per_sample_input)
6130 pos_var->data.sample = true;
6131 pos_var->data.centroid = false;
6132 }
6133 }
6134
6135 unsigned input_clip_size = ctx->mod.shader_kind == DXIL_PIXEL_SHADER ?
6136 ctx->shader->info.clip_distance_array_size : ctx->opts->input_clip_size;
6137 preprocess_signatures(&ctx->mod, ctx->shader, input_clip_size);
6138
6139 nir_foreach_function_with_impl(func, impl, ctx->shader) {
6140 if (!emit_function(ctx, func, impl))
6141 return false;
6142 }
6143
6144 if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
6145 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_out) {
6146 if (var->data.location == FRAG_RESULT_STENCIL) {
6147 ctx->mod.feats.stencil_ref = true;
6148 }
6149 }
6150 } else if (ctx->shader->info.stage == MESA_SHADER_VERTEX ||
6151 ctx->shader->info.stage == MESA_SHADER_TESS_EVAL) {
6152 if (ctx->shader->info.outputs_written &
6153 (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER))
6154 ctx->mod.feats.array_layer_from_vs_or_ds = true;
6155 } else if (ctx->shader->info.stage == MESA_SHADER_GEOMETRY ||
6156 ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) {
6157 if (ctx->shader->info.inputs_read &
6158 (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER))
6159 ctx->mod.feats.array_layer_from_vs_or_ds = true;
6160 }
6161
6162 if (ctx->mod.feats.native_low_precision && ctx->mod.minor_version < 2) {
6163 ctx->logger->log(ctx->logger->priv,
6164 "Shader uses 16bit, which requires shader model 6.2, but 6.2 is unsupported\n");
6165 return false;
6166 }
6167
6168 return emit_metadata(ctx) &&
6169 dxil_emit_module(&ctx->mod);
6170 }
6171
6172 static unsigned int
get_dxil_shader_kind(struct nir_shader * s)6173 get_dxil_shader_kind(struct nir_shader *s)
6174 {
6175 switch (s->info.stage) {
6176 case MESA_SHADER_VERTEX:
6177 return DXIL_VERTEX_SHADER;
6178 case MESA_SHADER_TESS_CTRL:
6179 return DXIL_HULL_SHADER;
6180 case MESA_SHADER_TESS_EVAL:
6181 return DXIL_DOMAIN_SHADER;
6182 case MESA_SHADER_GEOMETRY:
6183 return DXIL_GEOMETRY_SHADER;
6184 case MESA_SHADER_FRAGMENT:
6185 return DXIL_PIXEL_SHADER;
6186 case MESA_SHADER_KERNEL:
6187 case MESA_SHADER_COMPUTE:
6188 return DXIL_COMPUTE_SHADER;
6189 default:
6190 unreachable("unknown shader stage in nir_to_dxil");
6191 return DXIL_COMPUTE_SHADER;
6192 }
6193 }
6194
6195 static unsigned
lower_bit_size_callback(const nir_instr * instr,void * data)6196 lower_bit_size_callback(const nir_instr* instr, void *data)
6197 {
6198 if (instr->type != nir_instr_type_alu)
6199 return 0;
6200 nir_alu_instr *alu = nir_instr_as_alu(instr);
6201
6202 if (nir_op_infos[alu->op].is_conversion)
6203 return 0;
6204
6205 if (nir_op_is_vec_or_mov(alu->op))
6206 return 0;
6207
6208 unsigned num_inputs = nir_op_infos[alu->op].num_inputs;
6209 const struct nir_to_dxil_options *opts = (const struct nir_to_dxil_options*)data;
6210 unsigned min_bit_size = opts->lower_int16 ? 32 : 16;
6211
6212 unsigned ret = 0;
6213 for (unsigned i = 0; i < num_inputs; i++) {
6214 unsigned bit_size = nir_src_bit_size(alu->src[i].src);
6215 if (bit_size != 1 && bit_size < min_bit_size)
6216 ret = min_bit_size;
6217 }
6218
6219 return ret;
6220 }
6221
6222 static bool
vectorize_filter(unsigned align_mul,unsigned align_offset,unsigned bit_size,unsigned num_components,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * data)6223 vectorize_filter(
6224 unsigned align_mul,
6225 unsigned align_offset,
6226 unsigned bit_size,
6227 unsigned num_components,
6228 nir_intrinsic_instr *low, nir_intrinsic_instr *high,
6229 void *data)
6230 {
6231 return util_is_power_of_two_nonzero(num_components);
6232 }
6233
6234 struct lower_mem_bit_sizes_data {
6235 const nir_shader_compiler_options *nir_options;
6236 const struct nir_to_dxil_options *dxil_options;
6237 };
6238
6239 static nir_mem_access_size_align
lower_mem_access_bit_sizes_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size_in,uint32_t align_mul,uint32_t align_offset,bool offset_is_const,const void * cb_data)6240 lower_mem_access_bit_sizes_cb(nir_intrinsic_op intrin,
6241 uint8_t bytes,
6242 uint8_t bit_size_in,
6243 uint32_t align_mul,
6244 uint32_t align_offset,
6245 bool offset_is_const,
6246 const void *cb_data)
6247 {
6248 const struct lower_mem_bit_sizes_data *data = cb_data;
6249 unsigned max_bit_size = 32;
6250 unsigned min_bit_size = data->dxil_options->lower_int16 ? 32 : 16;
6251 unsigned closest_bit_size = MAX2(min_bit_size, MIN2(max_bit_size, bit_size_in));
6252 if (intrin == nir_intrinsic_load_ubo) {
6253 /* UBO loads can be done at whatever (supported) bit size, but require 16 byte
6254 * alignment and can load up to 16 bytes per instruction. However this pass requires
6255 * loading 16 bytes of data to get 16-byte alignment. We're going to run lower_ubo_vec4
6256 * which can deal with unaligned vec4s, so for this pass let's just deal with bit size
6257 * and total size restrictions. */
6258 return (nir_mem_access_size_align) {
6259 .align = closest_bit_size / 8,
6260 .bit_size = closest_bit_size,
6261 .num_components = DIV_ROUND_UP(MIN2(bytes, 16) * 8, closest_bit_size),
6262 };
6263 }
6264
6265 assert(intrin == nir_intrinsic_load_ssbo || intrin == nir_intrinsic_store_ssbo);
6266 uint32_t align = nir_combined_align(align_mul, align_offset);
6267 if (align < min_bit_size / 8) {
6268 /* Unaligned load/store, use the minimum bit size, up to 4 components */
6269 unsigned ideal_num_components = intrin == nir_intrinsic_load_ssbo ?
6270 DIV_ROUND_UP(bytes * 8, min_bit_size) :
6271 (32 / min_bit_size);
6272 return (nir_mem_access_size_align) {
6273 .align = min_bit_size / 8,
6274 .bit_size = min_bit_size,
6275 .num_components = MIN2(4, ideal_num_components),
6276 };
6277 }
6278
6279 /* Increase/decrease bit size to try to get closer to the requested byte size/align */
6280 unsigned bit_size = closest_bit_size;
6281 unsigned target = MIN2(bytes, align);
6282 while (target < bit_size / 8 && bit_size > min_bit_size)
6283 bit_size /= 2;
6284 while (target > bit_size / 8 * 4 && bit_size < max_bit_size)
6285 bit_size *= 2;
6286
6287 /* This is the best we can do */
6288 unsigned num_components = intrin == nir_intrinsic_load_ssbo ?
6289 DIV_ROUND_UP(bytes * 8, bit_size) :
6290 MAX2(1, (bytes * 8 / bit_size));
6291 return (nir_mem_access_size_align) {
6292 .align = bit_size / 8,
6293 .bit_size = bit_size,
6294 .num_components = MIN2(4, num_components),
6295 };
6296 }
6297
6298 static void
optimize_nir(struct nir_shader * s,const struct nir_to_dxil_options * opts)6299 optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts)
6300 {
6301 bool progress;
6302 do {
6303 progress = false;
6304 NIR_PASS_V(s, nir_lower_vars_to_ssa);
6305 NIR_PASS(progress, s, nir_lower_indirect_derefs, nir_var_function_temp, 4);
6306 NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
6307 NIR_PASS(progress, s, nir_copy_prop);
6308 NIR_PASS(progress, s, nir_opt_copy_prop_vars);
6309 NIR_PASS(progress, s, nir_lower_bit_size, lower_bit_size_callback, (void*)opts);
6310 NIR_PASS(progress, s, dxil_nir_lower_8bit_conv);
6311 if (opts->lower_int16)
6312 NIR_PASS(progress, s, dxil_nir_lower_16bit_conv);
6313 NIR_PASS(progress, s, nir_opt_remove_phis);
6314 NIR_PASS(progress, s, nir_opt_dce);
6315 NIR_PASS(progress, s, nir_opt_if,
6316 nir_opt_if_optimize_phi_true_false | nir_opt_if_avoid_64bit_phis);
6317 NIR_PASS(progress, s, nir_opt_dead_cf);
6318 NIR_PASS(progress, s, nir_opt_cse);
6319 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
6320 NIR_PASS(progress, s, nir_opt_algebraic);
6321 NIR_PASS(progress, s, dxil_nir_algebraic);
6322 if (s->options->lower_int64_options)
6323 NIR_PASS(progress, s, nir_lower_int64);
6324 NIR_PASS(progress, s, nir_lower_alu);
6325 NIR_PASS(progress, s, nir_opt_constant_folding);
6326 NIR_PASS(progress, s, nir_opt_undef);
6327 NIR_PASS(progress, s, nir_lower_undef_to_zero);
6328 NIR_PASS(progress, s, nir_opt_deref);
6329 NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16);
6330 NIR_PASS(progress, s, nir_lower_64bit_phis);
6331 NIR_PASS(progress, s, nir_lower_phis_to_scalar, true);
6332 NIR_PASS(progress, s, nir_opt_loop_unroll);
6333 NIR_PASS(progress, s, nir_lower_pack);
6334 NIR_PASS_V(s, nir_lower_system_values);
6335 } while (progress);
6336
6337 do {
6338 progress = false;
6339 NIR_PASS(progress, s, nir_opt_algebraic_late);
6340 } while (progress);
6341 }
6342
6343 static
dxil_fill_validation_state(struct ntd_context * ctx,struct dxil_validation_state * state)6344 void dxil_fill_validation_state(struct ntd_context *ctx,
6345 struct dxil_validation_state *state)
6346 {
6347 unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
6348 sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
6349 state->num_resources = ctx->resources.size / resource_element_size;
6350 state->resources.v0 = (struct dxil_resource_v0*)ctx->resources.data;
6351 if (ctx->shader->info.subgroup_size >= SUBGROUP_SIZE_REQUIRE_8) {
6352 state->state.psv1.psv0.max_expected_wave_lane_count = ctx->shader->info.subgroup_size;
6353 state->state.psv1.psv0.min_expected_wave_lane_count = ctx->shader->info.subgroup_size;
6354 } else {
6355 state->state.psv1.psv0.max_expected_wave_lane_count = UINT_MAX;
6356 }
6357 state->state.psv1.shader_stage = (uint8_t)ctx->mod.shader_kind;
6358 state->state.psv1.uses_view_id = (uint8_t)ctx->mod.feats.view_id;
6359 state->state.psv1.sig_input_elements = (uint8_t)ctx->mod.num_sig_inputs;
6360 state->state.psv1.sig_output_elements = (uint8_t)ctx->mod.num_sig_outputs;
6361 state->state.psv1.sig_patch_const_or_prim_elements = (uint8_t)ctx->mod.num_sig_patch_consts;
6362
6363 switch (ctx->mod.shader_kind) {
6364 case DXIL_VERTEX_SHADER:
6365 state->state.psv1.psv0.vs.output_position_present = ctx->mod.info.has_out_position;
6366 break;
6367 case DXIL_PIXEL_SHADER:
6368 /* TODO: handle depth outputs */
6369 state->state.psv1.psv0.ps.depth_output = ctx->mod.info.has_out_depth;
6370 state->state.psv1.psv0.ps.sample_frequency =
6371 ctx->mod.info.has_per_sample_input;
6372 break;
6373 case DXIL_COMPUTE_SHADER:
6374 state->state.num_threads_x = MAX2(ctx->shader->info.workgroup_size[0], 1);
6375 state->state.num_threads_y = MAX2(ctx->shader->info.workgroup_size[1], 1);
6376 state->state.num_threads_z = MAX2(ctx->shader->info.workgroup_size[2], 1);
6377 break;
6378 case DXIL_GEOMETRY_SHADER:
6379 state->state.psv1.max_vertex_count = ctx->shader->info.gs.vertices_out;
6380 state->state.psv1.psv0.gs.input_primitive = dxil_get_input_primitive(ctx->shader->info.gs.input_primitive);
6381 state->state.psv1.psv0.gs.output_toplology = dxil_get_primitive_topology(ctx->shader->info.gs.output_primitive);
6382 state->state.psv1.psv0.gs.output_stream_mask = MAX2(ctx->shader->info.gs.active_stream_mask, 1);
6383 state->state.psv1.psv0.gs.output_position_present = ctx->mod.info.has_out_position;
6384 break;
6385 case DXIL_HULL_SHADER:
6386 state->state.psv1.psv0.hs.input_control_point_count = ctx->tess_input_control_point_count;
6387 state->state.psv1.psv0.hs.output_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
6388 state->state.psv1.psv0.hs.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
6389 state->state.psv1.psv0.hs.tessellator_output_primitive = get_tessellator_output_primitive(&ctx->shader->info);
6390 state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
6391 break;
6392 case DXIL_DOMAIN_SHADER:
6393 state->state.psv1.psv0.ds.input_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
6394 state->state.psv1.psv0.ds.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
6395 state->state.psv1.psv0.ds.output_position_present = ctx->mod.info.has_out_position;
6396 state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
6397 break;
6398 default:
6399 assert(0 && "Shader type not (yet) supported");
6400 }
6401 }
6402
6403 static nir_variable *
add_sysvalue(struct ntd_context * ctx,uint8_t value,char * name,int driver_location)6404 add_sysvalue(struct ntd_context *ctx,
6405 uint8_t value, char *name,
6406 int driver_location)
6407 {
6408
6409 nir_variable *var = rzalloc(ctx->shader, nir_variable);
6410 if (!var)
6411 return NULL;
6412 var->data.driver_location = driver_location;
6413 var->data.location = value;
6414 var->type = glsl_uint_type();
6415 var->name = name;
6416 var->data.mode = nir_var_system_value;
6417 var->data.interpolation = INTERP_MODE_FLAT;
6418 return var;
6419 }
6420
6421 static bool
append_input_or_sysvalue(struct ntd_context * ctx,int input_loc,int sv_slot,char * name,int driver_location)6422 append_input_or_sysvalue(struct ntd_context *ctx,
6423 int input_loc, int sv_slot,
6424 char *name, int driver_location)
6425 {
6426 if (input_loc >= 0) {
6427 /* Check inputs whether a variable is available the corresponds
6428 * to the sysvalue */
6429 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
6430 if (var->data.location == input_loc) {
6431 ctx->system_value[sv_slot] = var;
6432 return true;
6433 }
6434 }
6435 }
6436
6437 ctx->system_value[sv_slot] = add_sysvalue(ctx, sv_slot, name, driver_location);
6438 if (!ctx->system_value[sv_slot])
6439 return false;
6440
6441 nir_shader_add_variable(ctx->shader, ctx->system_value[sv_slot]);
6442 return true;
6443 }
6444
6445 struct sysvalue_name {
6446 gl_system_value value;
6447 int slot;
6448 char *name;
6449 gl_shader_stage only_in_shader;
6450 } possible_sysvalues[] = {
6451 {SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, -1, "SV_VertexID", MESA_SHADER_NONE},
6452 {SYSTEM_VALUE_INSTANCE_ID, -1, "SV_InstanceID", MESA_SHADER_NONE},
6453 {SYSTEM_VALUE_FRONT_FACE, VARYING_SLOT_FACE, "SV_IsFrontFace", MESA_SHADER_NONE},
6454 {SYSTEM_VALUE_PRIMITIVE_ID, VARYING_SLOT_PRIMITIVE_ID, "SV_PrimitiveID", MESA_SHADER_GEOMETRY},
6455 {SYSTEM_VALUE_SAMPLE_ID, -1, "SV_SampleIndex", MESA_SHADER_NONE},
6456 };
6457
6458 static bool
allocate_sysvalues(struct ntd_context * ctx)6459 allocate_sysvalues(struct ntd_context *ctx)
6460 {
6461 unsigned driver_location = 0;
6462 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in)
6463 driver_location = MAX2(driver_location, var->data.driver_location + 1);
6464 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_system_value)
6465 driver_location = MAX2(driver_location, var->data.driver_location + 1);
6466
6467 if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
6468 !BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID)) {
6469 bool need_sample_id = ctx->shader->info.fs.uses_sample_shading;
6470
6471 /* "var->data.sample = true" sometimes just mean, "I want per-sample
6472 * shading", which explains why we can end up with vars having flat
6473 * interpolation with the per-sample bit set. If there's only such
6474 * type of variables, we need to tell DXIL that we read SV_SampleIndex
6475 * to make DXIL validation happy.
6476 */
6477 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
6478 bool var_can_be_sample_rate = !var->data.centroid && var->data.interpolation != INTERP_MODE_FLAT;
6479 /* If there's an input that will actually force sample-rate shading, then we don't
6480 * need SV_SampleIndex. */
6481 if (var->data.sample && var_can_be_sample_rate) {
6482 need_sample_id = false;
6483 break;
6484 }
6485 /* If there's an input that wants to be sample-rate, but can't be, then we might
6486 * need SV_SampleIndex. */
6487 if (var->data.sample && !var_can_be_sample_rate)
6488 need_sample_id = true;
6489 }
6490
6491 if (need_sample_id)
6492 BITSET_SET(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
6493 }
6494
6495 for (unsigned i = 0; i < ARRAY_SIZE(possible_sysvalues); ++i) {
6496 struct sysvalue_name *info = &possible_sysvalues[i];
6497 if (info->only_in_shader != MESA_SHADER_NONE &&
6498 info->only_in_shader != ctx->shader->info.stage)
6499 continue;
6500 if (BITSET_TEST(ctx->shader->info.system_values_read, info->value)) {
6501 if (!append_input_or_sysvalue(ctx, info->slot,
6502 info->value, info->name,
6503 driver_location++))
6504 return false;
6505 }
6506 }
6507 return true;
6508 }
6509
6510 static int
type_size_vec4(const struct glsl_type * type,bool bindless)6511 type_size_vec4(const struct glsl_type *type, bool bindless)
6512 {
6513 return glsl_count_attribute_slots(type, false);
6514 }
6515
6516 static const unsigned dxil_validator_min_capable_version = DXIL_VALIDATOR_1_4;
6517 static const unsigned dxil_validator_max_capable_version = DXIL_VALIDATOR_1_8;
6518 static const unsigned dxil_min_shader_model = SHADER_MODEL_6_0;
6519 static const unsigned dxil_max_shader_model = SHADER_MODEL_6_8;
6520
6521 bool
nir_to_dxil(struct nir_shader * s,const struct nir_to_dxil_options * opts,const struct dxil_logger * logger,struct blob * blob)6522 nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts,
6523 const struct dxil_logger *logger, struct blob *blob)
6524 {
6525 assert(opts);
6526 bool retval = true;
6527 debug_dxil = (int)debug_get_option_debug_dxil();
6528 blob_init(blob);
6529
6530 if (opts->shader_model_max < dxil_min_shader_model) {
6531 debug_printf("D3D12: cannot support emitting shader models lower than %d.%d\n",
6532 dxil_min_shader_model >> 16,
6533 dxil_min_shader_model & 0xffff);
6534 return false;
6535 }
6536
6537 if (opts->shader_model_max > dxil_max_shader_model) {
6538 debug_printf("D3D12: cannot support emitting higher than shader model %d.%d\n",
6539 dxil_max_shader_model >> 16,
6540 dxil_max_shader_model & 0xffff);
6541 return false;
6542 }
6543
6544 if (opts->validator_version_max != NO_DXIL_VALIDATION &&
6545 opts->validator_version_max < dxil_validator_min_capable_version) {
6546 debug_printf("D3D12: Invalid validator version %d.%d, must be 1.4 or greater\n",
6547 opts->validator_version_max >> 16,
6548 opts->validator_version_max & 0xffff);
6549 return false;
6550 }
6551
6552 /* If no validation, write a blob as if it was going to be validated by the newest understood validator.
6553 * Same if the validator is newer than we know how to write for.
6554 */
6555 uint32_t validator_version =
6556 opts->validator_version_max == NO_DXIL_VALIDATION ||
6557 opts->validator_version_max > dxil_validator_max_capable_version ?
6558 dxil_validator_max_capable_version : opts->validator_version_max;
6559
6560 struct ntd_context *ctx = calloc(1, sizeof(*ctx));
6561 if (!ctx)
6562 return false;
6563
6564 ctx->opts = opts;
6565 ctx->shader = s;
6566 ctx->logger = logger ? logger : &default_logger;
6567
6568 ctx->ralloc_ctx = ralloc_context(NULL);
6569 if (!ctx->ralloc_ctx) {
6570 retval = false;
6571 goto out;
6572 }
6573
6574 util_dynarray_init(&ctx->srv_metadata_nodes, ctx->ralloc_ctx);
6575 util_dynarray_init(&ctx->uav_metadata_nodes, ctx->ralloc_ctx);
6576 util_dynarray_init(&ctx->cbv_metadata_nodes, ctx->ralloc_ctx);
6577 util_dynarray_init(&ctx->sampler_metadata_nodes, ctx->ralloc_ctx);
6578 util_dynarray_init(&ctx->resources, ctx->ralloc_ctx);
6579 dxil_module_init(&ctx->mod, ctx->ralloc_ctx);
6580 ctx->mod.shader_kind = get_dxil_shader_kind(s);
6581 ctx->mod.major_version = 6;
6582 /* Use the highest shader model that's supported and can be validated */
6583 ctx->mod.minor_version =
6584 MIN2(opts->shader_model_max & 0xffff, validator_version & 0xffff);
6585 ctx->mod.major_validator = validator_version >> 16;
6586 ctx->mod.minor_validator = validator_version & 0xffff;
6587
6588 if (s->info.stage <= MESA_SHADER_FRAGMENT) {
6589 uint64_t in_mask =
6590 s->info.stage == MESA_SHADER_VERTEX ?
6591 0 : (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
6592 uint64_t out_mask =
6593 s->info.stage == MESA_SHADER_FRAGMENT ?
6594 ((1ull << FRAG_RESULT_STENCIL) | (1ull << FRAG_RESULT_SAMPLE_MASK)) :
6595 (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
6596
6597 NIR_PASS_V(s, dxil_nir_fix_io_uint_type, in_mask, out_mask);
6598 }
6599
6600 NIR_PASS_V(s, dxil_nir_lower_fquantize2f16);
6601 NIR_PASS_V(s, nir_lower_frexp);
6602 NIR_PASS_V(s, nir_lower_flrp, 16 | 32 | 64, true);
6603 NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32);
6604 NIR_PASS_V(s, dxil_nir_ensure_position_writes);
6605 NIR_PASS_V(s, dxil_nir_lower_system_values);
6606 NIR_PASS_V(s, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_system_value | nir_var_shader_out, NULL, NULL);
6607
6608 /* Do a round of optimization to try to vectorize loads/stores. Otherwise the addresses used for loads
6609 * might be too opaque for the pass to see that they're next to each other. */
6610 optimize_nir(s, opts);
6611
6612 /* Vectorize UBO/SSBO accesses aggressively. This can help increase alignment to enable us to do better
6613 * chunking of loads and stores after lowering bit sizes. Ignore load/store size limitations here, we'll
6614 * address them with lower_mem_access_bit_sizes */
6615 nir_load_store_vectorize_options vectorize_opts = {
6616 .callback = vectorize_filter,
6617 .modes = nir_var_mem_ubo | nir_var_mem_ssbo,
6618 };
6619 NIR_PASS_V(s, nir_opt_load_store_vectorize, &vectorize_opts);
6620
6621 /* Now that they're bloated to the max, address bit size restrictions and overall size limitations for
6622 * a single load/store op. */
6623 struct lower_mem_bit_sizes_data mem_size_data = { s->options, opts };
6624 nir_lower_mem_access_bit_sizes_options mem_size_options = {
6625 .modes = nir_var_mem_ubo | nir_var_mem_ssbo,
6626 .callback = lower_mem_access_bit_sizes_cb,
6627 .may_lower_unaligned_stores_to_atomics = true,
6628 .cb_data = &mem_size_data
6629 };
6630 NIR_PASS_V(s, nir_lower_mem_access_bit_sizes, &mem_size_options);
6631
6632 /* Lastly, conver byte-address UBO loads to vec-addressed. This pass can also deal with selecting sub-
6633 * components from the load and dealing with vec-straddling loads. */
6634 NIR_PASS_V(s, nir_lower_ubo_vec4);
6635
6636 if (opts->shader_model_max < SHADER_MODEL_6_6) {
6637 /* In a later pass, load_helper_invocation will be lowered to sample mask based fallback,
6638 * so both load- and is- will be emulated eventually.
6639 */
6640 NIR_PASS_V(s, nir_lower_is_helper_invocation);
6641 }
6642
6643 if (ctx->mod.shader_kind == DXIL_HULL_SHADER)
6644 NIR_PASS_V(s, dxil_nir_split_tess_ctrl, &ctx->tess_ctrl_patch_constant_func);
6645
6646 if (ctx->mod.shader_kind == DXIL_HULL_SHADER ||
6647 ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
6648 /* Make sure any derefs are gone after lower_io before updating tess level vars */
6649 NIR_PASS_V(s, nir_opt_dce);
6650 NIR_PASS_V(s, dxil_nir_fixup_tess_level_for_domain);
6651 }
6652
6653 optimize_nir(s, opts);
6654
6655 NIR_PASS_V(s, nir_remove_dead_variables,
6656 nir_var_function_temp | nir_var_mem_constant | nir_var_mem_shared, NULL);
6657
6658 if (!allocate_sysvalues(ctx))
6659 return false;
6660
6661 NIR_PASS_V(s, dxil_nir_lower_sysval_to_load_input, ctx->system_value);
6662 NIR_PASS_V(s, nir_opt_dce);
6663
6664 /* This needs to be after any copy prop is done to prevent these movs from being erased */
6665 NIR_PASS_V(s, dxil_nir_move_consts);
6666 NIR_PASS_V(s, nir_opt_dce);
6667
6668 NIR_PASS_V(s, dxil_nir_guess_image_formats);
6669
6670 if (debug_dxil & DXIL_DEBUG_VERBOSE)
6671 nir_print_shader(s, stderr);
6672
6673 if (!emit_module(ctx, opts)) {
6674 debug_printf("D3D12: dxil_container_add_module failed\n");
6675 retval = false;
6676 goto out;
6677 }
6678
6679 if (debug_dxil & DXIL_DEBUG_DUMP_MODULE) {
6680 struct dxil_dumper *dumper = dxil_dump_create();
6681 dxil_dump_module(dumper, &ctx->mod);
6682 fprintf(stderr, "\n");
6683 dxil_dump_buf_to_file(dumper, stderr);
6684 fprintf(stderr, "\n\n");
6685 dxil_dump_free(dumper);
6686 }
6687
6688 struct dxil_container container;
6689 dxil_container_init(&container);
6690 /* Native low precision disables min-precision */
6691 if (ctx->mod.feats.native_low_precision)
6692 ctx->mod.feats.min_precision = false;
6693 if (!dxil_container_add_features(&container, &ctx->mod.feats)) {
6694 debug_printf("D3D12: dxil_container_add_features failed\n");
6695 retval = false;
6696 goto out;
6697 }
6698
6699 if (!dxil_container_add_io_signature(&container,
6700 DXIL_ISG1,
6701 ctx->mod.num_sig_inputs,
6702 ctx->mod.inputs,
6703 ctx->mod.minor_validator >= 7)) {
6704 debug_printf("D3D12: failed to write input signature\n");
6705 retval = false;
6706 goto out;
6707 }
6708
6709 if (!dxil_container_add_io_signature(&container,
6710 DXIL_OSG1,
6711 ctx->mod.num_sig_outputs,
6712 ctx->mod.outputs,
6713 ctx->mod.minor_validator >= 7)) {
6714 debug_printf("D3D12: failed to write output signature\n");
6715 retval = false;
6716 goto out;
6717 }
6718
6719 if ((ctx->mod.shader_kind == DXIL_HULL_SHADER ||
6720 ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) &&
6721 !dxil_container_add_io_signature(&container,
6722 DXIL_PSG1,
6723 ctx->mod.num_sig_patch_consts,
6724 ctx->mod.patch_consts,
6725 ctx->mod.minor_validator >= 7)) {
6726 debug_printf("D3D12: failed to write patch constant signature\n");
6727 retval = false;
6728 goto out;
6729 }
6730
6731 struct dxil_validation_state validation_state;
6732 memset(&validation_state, 0, sizeof(validation_state));
6733 dxil_fill_validation_state(ctx, &validation_state);
6734
6735 if (!dxil_container_add_state_validation(&container,&ctx->mod,
6736 &validation_state)) {
6737 debug_printf("D3D12: failed to write state-validation\n");
6738 retval = false;
6739 goto out;
6740 }
6741
6742 if (!dxil_container_add_module(&container, &ctx->mod)) {
6743 debug_printf("D3D12: failed to write module\n");
6744 retval = false;
6745 goto out;
6746 }
6747
6748 if (!dxil_container_write(&container, blob)) {
6749 debug_printf("D3D12: dxil_container_write failed\n");
6750 retval = false;
6751 goto out;
6752 }
6753 dxil_container_finish(&container);
6754
6755 if (debug_dxil & DXIL_DEBUG_DUMP_BLOB) {
6756 static int shader_id = 0;
6757 char buffer[64];
6758 snprintf(buffer, sizeof(buffer), "shader_%s_%d.blob",
6759 get_shader_kind_str(ctx->mod.shader_kind), shader_id++);
6760 debug_printf("Try to write blob to %s\n", buffer);
6761 FILE *f = fopen(buffer, "wb");
6762 if (f) {
6763 fwrite(blob->data, 1, blob->size, f);
6764 fclose(f);
6765 }
6766 }
6767
6768 out:
6769 dxil_module_release(&ctx->mod);
6770 ralloc_free(ctx->ralloc_ctx);
6771 free(ctx);
6772 return retval;
6773 }
6774