• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir_to_dxil.h"
25 
26 #include "dxil_container.h"
27 #include "dxil_dump.h"
28 #include "dxil_enums.h"
29 #include "dxil_function.h"
30 #include "dxil_module.h"
31 #include "dxil_nir.h"
32 #include "dxil_signature.h"
33 
34 #include "nir/nir_builder.h"
35 #include "nir_deref.h"
36 #include "util/ralloc.h"
37 #include "util/u_debug.h"
38 #include "util/u_dynarray.h"
39 #include "util/u_math.h"
40 
41 #include "git_sha1.h"
42 
43 #include "vulkan/vulkan_core.h"
44 
45 #include <stdint.h>
46 
47 int debug_dxil = 0;
48 
49 static const struct debug_named_value
50 dxil_debug_options[] = {
51    { "verbose", DXIL_DEBUG_VERBOSE, NULL },
52    { "dump_blob",  DXIL_DEBUG_DUMP_BLOB , "Write shader blobs" },
53    { "trace",  DXIL_DEBUG_TRACE , "Trace instruction conversion" },
54    { "dump_module", DXIL_DEBUG_DUMP_MODULE, "dump module tree to stderr"},
55    DEBUG_NAMED_VALUE_END
56 };
57 
58 DEBUG_GET_ONCE_FLAGS_OPTION(debug_dxil, "DXIL_DEBUG", dxil_debug_options, 0)
59 
60 static void
log_nir_instr_unsupported(const struct dxil_logger * logger,const char * message_prefix,const nir_instr * instr)61 log_nir_instr_unsupported(const struct dxil_logger *logger,
62                           const char *message_prefix, const nir_instr *instr)
63 {
64    char *msg = NULL;
65    char *instr_str = nir_instr_as_str(instr, NULL);
66    asprintf(&msg, "%s: %s\n", message_prefix, instr_str);
67    ralloc_free(instr_str);
68    assert(msg);
69    logger->log(logger->priv, msg);
70    free(msg);
71 }
72 
73 static void
default_logger_func(void * priv,const char * msg)74 default_logger_func(void *priv, const char *msg)
75 {
76    fprintf(stderr, "%s", msg);
77    unreachable("Unhandled error");
78 }
79 
80 static const struct dxil_logger default_logger = { .priv = NULL, .log = default_logger_func };
81 
82 #define TRACE_CONVERSION(instr) \
83    if (debug_dxil & DXIL_DEBUG_TRACE) \
84       do { \
85          fprintf(stderr, "Convert '"); \
86          nir_print_instr(instr, stderr); \
87          fprintf(stderr, "'\n"); \
88       } while (0)
89 
90 static const nir_shader_compiler_options
91 nir_options = {
92    .compact_arrays = true,
93    .lower_ineg = true,
94    .lower_fneg = true,
95    .lower_ffma16 = true,
96    .lower_ffma32 = true,
97    .lower_isign = true,
98    .lower_fsign = true,
99    .lower_iabs = true,
100    .lower_fmod = true,
101    .lower_fpow = true,
102    .lower_scmp = true,
103    .lower_ldexp = true,
104    .lower_flrp16 = true,
105    .lower_flrp32 = true,
106    .lower_flrp64 = true,
107    .lower_bitfield_extract = true,
108    .lower_ifind_msb = true,
109    .lower_ufind_msb = true,
110    .lower_extract_word = true,
111    .lower_extract_byte = true,
112    .lower_insert_word = true,
113    .lower_insert_byte = true,
114    .lower_all_io_to_elements = true,
115    .lower_hadd = true,
116    .lower_uadd_sat = true,
117    .lower_usub_sat = true,
118    .lower_iadd_sat = true,
119    .lower_uadd_carry = true,
120    .lower_usub_borrow = true,
121    .lower_mul_high = true,
122    .lower_pack_half_2x16 = true,
123    .lower_pack_unorm_4x8 = true,
124    .lower_pack_snorm_4x8 = true,
125    .lower_pack_snorm_2x16 = true,
126    .lower_pack_unorm_2x16 = true,
127    .lower_pack_64_2x32_split = true,
128    .lower_pack_32_2x16_split = true,
129    .lower_pack_64_4x16 = true,
130    .lower_unpack_64_2x32_split = true,
131    .lower_unpack_32_2x16_split = true,
132    .lower_unpack_half_2x16 = true,
133    .lower_unpack_snorm_2x16 = true,
134    .lower_unpack_snorm_4x8 = true,
135    .lower_unpack_unorm_2x16 = true,
136    .lower_unpack_unorm_4x8 = true,
137    .lower_interpolate_at = true,
138    .has_fsub = true,
139    .has_isub = true,
140    .has_bfe = true,
141    .has_find_msb_rev = true,
142    .vertex_id_zero_based = true,
143    .lower_base_vertex = true,
144    .lower_helper_invocation = true,
145    .has_cs_global_id = true,
146    .lower_mul_2x32_64 = true,
147    .lower_doubles_options =
148       nir_lower_drcp |
149       nir_lower_dsqrt |
150       nir_lower_drsq |
151       nir_lower_dfract |
152       nir_lower_dtrunc |
153       nir_lower_dfloor |
154       nir_lower_dceil |
155       nir_lower_dround_even,
156    .lower_uniforms_to_ubo = true,
157    .max_unroll_iterations = 32, /* arbitrary */
158    .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out),
159    .lower_device_index_to_zero = true,
160    .support_16bit_alu = true,
161    .preserve_mediump = true,
162    .discard_is_demote = true,
163    .scalarize_ddx = true,
164    .io_options = nir_io_dont_use_pos_for_non_fs_varyings | nir_io_mediump_is_32bit,
165 };
166 
167 const nir_shader_compiler_options*
dxil_get_base_nir_compiler_options(void)168 dxil_get_base_nir_compiler_options(void)
169 {
170    return &nir_options;
171 }
172 
173 void
dxil_get_nir_compiler_options(nir_shader_compiler_options * options,enum dxil_shader_model shader_model_max,unsigned supported_int_sizes,unsigned supported_float_sizes)174 dxil_get_nir_compiler_options(nir_shader_compiler_options *options,
175                               enum dxil_shader_model shader_model_max,
176                               unsigned supported_int_sizes,
177                               unsigned supported_float_sizes)
178 {
179    *options = nir_options;
180    if (!(supported_int_sizes & 64)) {
181       options->lower_pack_64_2x32_split = false;
182       options->lower_unpack_64_2x32_split = false;
183       options->lower_int64_options = ~0;
184    }
185    if (!(supported_float_sizes & 64))
186       options->lower_doubles_options = ~0;
187    if (shader_model_max >= SHADER_MODEL_6_4) {
188       options->has_sdot_4x8 = true;
189       options->has_udot_4x8 = true;
190    }
191 }
192 
193 static bool
emit_llvm_ident(struct dxil_module * m)194 emit_llvm_ident(struct dxil_module *m)
195 {
196    const struct dxil_mdnode *compiler = dxil_get_metadata_string(m, "Mesa version " PACKAGE_VERSION MESA_GIT_SHA1);
197    if (!compiler)
198       return false;
199 
200    const struct dxil_mdnode *llvm_ident = dxil_get_metadata_node(m, &compiler, 1);
201    return llvm_ident &&
202           dxil_add_metadata_named_node(m, "llvm.ident", &llvm_ident, 1);
203 }
204 
205 static bool
emit_named_version(struct dxil_module * m,const char * name,int major,int minor)206 emit_named_version(struct dxil_module *m, const char *name,
207                    int major, int minor)
208 {
209    const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, major);
210    const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, minor);
211    const struct dxil_mdnode *version_nodes[] = { major_node, minor_node };
212    const struct dxil_mdnode *version = dxil_get_metadata_node(m, version_nodes,
213                                                      ARRAY_SIZE(version_nodes));
214    return dxil_add_metadata_named_node(m, name, &version, 1);
215 }
216 
217 static const char *
get_shader_kind_str(enum dxil_shader_kind kind)218 get_shader_kind_str(enum dxil_shader_kind kind)
219 {
220    switch (kind) {
221    case DXIL_PIXEL_SHADER:
222       return "ps";
223    case DXIL_VERTEX_SHADER:
224       return "vs";
225    case DXIL_GEOMETRY_SHADER:
226       return "gs";
227    case DXIL_HULL_SHADER:
228       return "hs";
229    case DXIL_DOMAIN_SHADER:
230       return "ds";
231    case DXIL_COMPUTE_SHADER:
232       return "cs";
233    default:
234       unreachable("invalid shader kind");
235    }
236 }
237 
238 static bool
emit_dx_shader_model(struct dxil_module * m)239 emit_dx_shader_model(struct dxil_module *m)
240 {
241    const struct dxil_mdnode *type_node = dxil_get_metadata_string(m, get_shader_kind_str(m->shader_kind));
242    const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, m->major_version);
243    const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, m->minor_version);
244    const struct dxil_mdnode *shader_model[] = { type_node, major_node,
245                                                 minor_node };
246    const struct dxil_mdnode *dx_shader_model = dxil_get_metadata_node(m, shader_model, ARRAY_SIZE(shader_model));
247 
248    return dxil_add_metadata_named_node(m, "dx.shaderModel",
249                                        &dx_shader_model, 1);
250 }
251 
252 enum {
253    DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG = 0,
254    DXIL_STRUCTURED_BUFFER_ELEMENT_STRIDE_TAG = 1
255 };
256 
257 enum dxil_intr {
258    DXIL_INTR_LOAD_INPUT = 4,
259    DXIL_INTR_STORE_OUTPUT = 5,
260    DXIL_INTR_FABS = 6,
261    DXIL_INTR_SATURATE = 7,
262 
263    DXIL_INTR_ISFINITE = 10,
264    DXIL_INTR_ISNORMAL = 11,
265 
266    DXIL_INTR_FCOS = 12,
267    DXIL_INTR_FSIN = 13,
268 
269    DXIL_INTR_FEXP2 = 21,
270    DXIL_INTR_FRC = 22,
271    DXIL_INTR_FLOG2 = 23,
272 
273    DXIL_INTR_SQRT = 24,
274    DXIL_INTR_RSQRT = 25,
275    DXIL_INTR_ROUND_NE = 26,
276    DXIL_INTR_ROUND_NI = 27,
277    DXIL_INTR_ROUND_PI = 28,
278    DXIL_INTR_ROUND_Z = 29,
279 
280    DXIL_INTR_BFREV = 30,
281    DXIL_INTR_COUNTBITS = 31,
282    DXIL_INTR_FIRSTBIT_LO = 32,
283    DXIL_INTR_FIRSTBIT_HI = 33,
284    DXIL_INTR_FIRSTBIT_SHI = 34,
285 
286    DXIL_INTR_FMAX = 35,
287    DXIL_INTR_FMIN = 36,
288    DXIL_INTR_IMAX = 37,
289    DXIL_INTR_IMIN = 38,
290    DXIL_INTR_UMAX = 39,
291    DXIL_INTR_UMIN = 40,
292 
293    DXIL_INTR_FMA = 47,
294 
295    DXIL_INTR_IBFE = 51,
296    DXIL_INTR_UBFE = 52,
297    DXIL_INTR_BFI = 53,
298 
299    DXIL_INTR_CREATE_HANDLE = 57,
300    DXIL_INTR_CBUFFER_LOAD_LEGACY = 59,
301 
302    DXIL_INTR_SAMPLE = 60,
303    DXIL_INTR_SAMPLE_BIAS = 61,
304    DXIL_INTR_SAMPLE_LEVEL = 62,
305    DXIL_INTR_SAMPLE_GRAD = 63,
306    DXIL_INTR_SAMPLE_CMP = 64,
307    DXIL_INTR_SAMPLE_CMP_LVL_ZERO = 65,
308 
309    DXIL_INTR_TEXTURE_LOAD = 66,
310    DXIL_INTR_TEXTURE_STORE = 67,
311 
312    DXIL_INTR_BUFFER_LOAD = 68,
313    DXIL_INTR_BUFFER_STORE = 69,
314 
315    DXIL_INTR_TEXTURE_SIZE = 72,
316    DXIL_INTR_TEXTURE_GATHER = 73,
317    DXIL_INTR_TEXTURE_GATHER_CMP = 74,
318 
319    DXIL_INTR_TEXTURE2DMS_GET_SAMPLE_POSITION = 75,
320    DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION = 76,
321    DXIL_INTR_RENDER_TARGET_GET_SAMPLE_COUNT = 77,
322 
323    DXIL_INTR_ATOMIC_BINOP = 78,
324    DXIL_INTR_ATOMIC_CMPXCHG = 79,
325    DXIL_INTR_BARRIER = 80,
326    DXIL_INTR_TEXTURE_LOD = 81,
327 
328    DXIL_INTR_DISCARD = 82,
329    DXIL_INTR_DDX_COARSE = 83,
330    DXIL_INTR_DDY_COARSE = 84,
331    DXIL_INTR_DDX_FINE = 85,
332    DXIL_INTR_DDY_FINE = 86,
333 
334    DXIL_INTR_EVAL_SNAPPED = 87,
335    DXIL_INTR_EVAL_SAMPLE_INDEX = 88,
336    DXIL_INTR_EVAL_CENTROID = 89,
337 
338    DXIL_INTR_SAMPLE_INDEX = 90,
339    DXIL_INTR_COVERAGE = 91,
340 
341    DXIL_INTR_THREAD_ID = 93,
342    DXIL_INTR_GROUP_ID = 94,
343    DXIL_INTR_THREAD_ID_IN_GROUP = 95,
344    DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP = 96,
345 
346    DXIL_INTR_EMIT_STREAM = 97,
347    DXIL_INTR_CUT_STREAM = 98,
348 
349    DXIL_INTR_GS_INSTANCE_ID = 100,
350 
351    DXIL_INTR_MAKE_DOUBLE = 101,
352    DXIL_INTR_SPLIT_DOUBLE = 102,
353 
354    DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT = 103,
355    DXIL_INTR_LOAD_PATCH_CONSTANT = 104,
356    DXIL_INTR_DOMAIN_LOCATION = 105,
357    DXIL_INTR_STORE_PATCH_CONSTANT = 106,
358    DXIL_INTR_OUTPUT_CONTROL_POINT_ID = 107,
359    DXIL_INTR_PRIMITIVE_ID = 108,
360 
361    DXIL_INTR_WAVE_IS_FIRST_LANE = 110,
362    DXIL_INTR_WAVE_GET_LANE_INDEX = 111,
363    DXIL_INTR_WAVE_GET_LANE_COUNT = 112,
364    DXIL_INTR_WAVE_ANY_TRUE = 113,
365    DXIL_INTR_WAVE_ALL_TRUE = 114,
366    DXIL_INTR_WAVE_ACTIVE_ALL_EQUAL = 115,
367    DXIL_INTR_WAVE_ACTIVE_BALLOT = 116,
368    DXIL_INTR_WAVE_READ_LANE_AT = 117,
369    DXIL_INTR_WAVE_READ_LANE_FIRST = 118,
370    DXIL_INTR_WAVE_ACTIVE_OP = 119,
371    DXIL_INTR_WAVE_ACTIVE_BIT = 120,
372    DXIL_INTR_WAVE_PREFIX_OP = 121,
373    DXIL_INTR_QUAD_READ_LANE_AT = 122,
374    DXIL_INTR_QUAD_OP = 123,
375 
376    DXIL_INTR_LEGACY_F32TOF16 = 130,
377    DXIL_INTR_LEGACY_F16TOF32 = 131,
378 
379    DXIL_INTR_ATTRIBUTE_AT_VERTEX = 137,
380    DXIL_INTR_VIEW_ID = 138,
381 
382    DXIL_INTR_RAW_BUFFER_LOAD = 139,
383    DXIL_INTR_RAW_BUFFER_STORE = 140,
384 
385    DXIL_INTR_DOT4_ADD_I8_PACKED = 163,
386    DXIL_INTR_DOT4_ADD_U8_PACKED = 164,
387 
388    DXIL_INTR_ANNOTATE_HANDLE = 216,
389    DXIL_INTR_CREATE_HANDLE_FROM_BINDING = 217,
390    DXIL_INTR_CREATE_HANDLE_FROM_HEAP = 218,
391 
392    DXIL_INTR_IS_HELPER_LANE = 221,
393    DXIL_INTR_SAMPLE_CMP_LEVEL = 224,
394    DXIL_INTR_SAMPLE_CMP_GRAD = 254,
395    DXIL_INTR_SAMPLE_CMP_BIAS = 255,
396 
397    DXIL_INTR_START_VERTEX_LOCATION = 256,
398    DXIL_INTR_START_INSTANCE_LOCATION = 257,
399 };
400 
401 enum dxil_atomic_op {
402    DXIL_ATOMIC_ADD = 0,
403    DXIL_ATOMIC_AND = 1,
404    DXIL_ATOMIC_OR = 2,
405    DXIL_ATOMIC_XOR = 3,
406    DXIL_ATOMIC_IMIN = 4,
407    DXIL_ATOMIC_IMAX = 5,
408    DXIL_ATOMIC_UMIN = 6,
409    DXIL_ATOMIC_UMAX = 7,
410    DXIL_ATOMIC_EXCHANGE = 8,
411 };
412 
413 static enum dxil_atomic_op
nir_atomic_to_dxil_atomic(nir_atomic_op op)414 nir_atomic_to_dxil_atomic(nir_atomic_op op)
415 {
416    switch (op) {
417    case nir_atomic_op_iadd: return DXIL_ATOMIC_ADD;
418    case nir_atomic_op_iand: return DXIL_ATOMIC_AND;
419    case nir_atomic_op_ior: return DXIL_ATOMIC_OR;
420    case nir_atomic_op_ixor: return DXIL_ATOMIC_XOR;
421    case nir_atomic_op_imin: return DXIL_ATOMIC_IMIN;
422    case nir_atomic_op_imax: return DXIL_ATOMIC_IMAX;
423    case nir_atomic_op_umin: return DXIL_ATOMIC_UMIN;
424    case nir_atomic_op_umax: return DXIL_ATOMIC_UMAX;
425    case nir_atomic_op_xchg: return DXIL_ATOMIC_EXCHANGE;
426    default: unreachable("Unsupported atomic op");
427    }
428 }
429 
430 static enum dxil_rmw_op
nir_atomic_to_dxil_rmw(nir_atomic_op op)431 nir_atomic_to_dxil_rmw(nir_atomic_op op)
432 {
433    switch (op) {
434    case nir_atomic_op_iadd: return DXIL_RMWOP_ADD;
435    case nir_atomic_op_iand: return DXIL_RMWOP_AND;
436    case nir_atomic_op_ior: return DXIL_RMWOP_OR;
437    case nir_atomic_op_ixor: return DXIL_RMWOP_XOR;
438    case nir_atomic_op_imin: return DXIL_RMWOP_MIN;
439    case nir_atomic_op_imax: return DXIL_RMWOP_MAX;
440    case nir_atomic_op_umin: return DXIL_RMWOP_UMIN;
441    case nir_atomic_op_umax: return DXIL_RMWOP_UMAX;
442    case nir_atomic_op_xchg: return DXIL_RMWOP_XCHG;
443    default: unreachable("Unsupported atomic op");
444    }
445 }
446 
447 typedef struct {
448    unsigned id;
449    unsigned binding;
450    unsigned size;
451    unsigned space;
452 } resource_array_layout;
453 
454 static void
fill_resource_metadata(struct dxil_module * m,const struct dxil_mdnode ** fields,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout)455 fill_resource_metadata(struct dxil_module *m, const struct dxil_mdnode **fields,
456                        const struct dxil_type *struct_type,
457                        const char *name, const resource_array_layout *layout)
458 {
459    const struct dxil_type *pointer_type = dxil_module_get_pointer_type(m, struct_type);
460    const struct dxil_value *pointer_undef = dxil_module_get_undef(m, pointer_type);
461 
462    fields[0] = dxil_get_metadata_int32(m, layout->id); // resource ID
463    fields[1] = dxil_get_metadata_value(m, pointer_type, pointer_undef); // global constant symbol
464    fields[2] = dxil_get_metadata_string(m, name ? name : ""); // name
465    fields[3] = dxil_get_metadata_int32(m, layout->space); // space ID
466    fields[4] = dxil_get_metadata_int32(m, layout->binding); // lower bound
467    fields[5] = dxil_get_metadata_int32(m, layout->size); // range size
468 }
469 
470 static const struct dxil_mdnode *
emit_srv_metadata(struct dxil_module * m,const struct dxil_type * elem_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind)471 emit_srv_metadata(struct dxil_module *m, const struct dxil_type *elem_type,
472                   const char *name, const resource_array_layout *layout,
473                   enum dxil_component_type comp_type,
474                   enum dxil_resource_kind res_kind)
475 {
476    const struct dxil_mdnode *fields[9];
477 
478    const struct dxil_mdnode *metadata_tag_nodes[2];
479 
480    fill_resource_metadata(m, fields, elem_type, name, layout);
481    fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
482    fields[7] = dxil_get_metadata_int1(m, 0); // sample count
483    if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
484        res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
485       metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
486       metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
487       fields[8] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
488    } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
489       fields[8] = NULL;
490    else
491       unreachable("Structured buffers not supported yet");
492 
493    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
494 }
495 
496 static const struct dxil_mdnode *
emit_uav_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind,enum gl_access_qualifier access)497 emit_uav_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
498                   const char *name, const resource_array_layout *layout,
499                   enum dxil_component_type comp_type,
500                   enum dxil_resource_kind res_kind,
501                   enum gl_access_qualifier access)
502 {
503    const struct dxil_mdnode *fields[11];
504 
505    const struct dxil_mdnode *metadata_tag_nodes[2];
506 
507    fill_resource_metadata(m, fields, struct_type, name, layout);
508    fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
509    fields[7] = dxil_get_metadata_int1(m, (access & ACCESS_COHERENT) != 0); // globally-coherent
510    fields[8] = dxil_get_metadata_int1(m, false); // has counter
511    fields[9] = dxil_get_metadata_int1(m, false); // is ROV
512    if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
513        res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
514       metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
515       metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
516       fields[10] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
517    } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
518       fields[10] = NULL;
519    else
520       unreachable("Structured buffers not supported yet");
521 
522    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
523 }
524 
525 static const struct dxil_mdnode *
emit_cbv_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,unsigned size)526 emit_cbv_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
527                   const char *name, const resource_array_layout *layout,
528                   unsigned size)
529 {
530    const struct dxil_mdnode *fields[8];
531 
532    fill_resource_metadata(m, fields, struct_type, name, layout);
533    fields[6] = dxil_get_metadata_int32(m, size); // constant buffer size
534    fields[7] = NULL; // metadata
535 
536    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
537 }
538 
539 static const struct dxil_mdnode *
emit_sampler_metadata(struct dxil_module * m,const struct dxil_type * struct_type,nir_variable * var,const resource_array_layout * layout)540 emit_sampler_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
541                       nir_variable *var, const resource_array_layout *layout)
542 {
543    const struct dxil_mdnode *fields[8];
544    const struct glsl_type *type = glsl_without_array(var->type);
545 
546    fill_resource_metadata(m, fields, struct_type, var->name, layout);
547    enum dxil_sampler_kind sampler_kind = glsl_sampler_type_is_shadow(type) ?
548           DXIL_SAMPLER_KIND_COMPARISON : DXIL_SAMPLER_KIND_DEFAULT;
549    fields[6] = dxil_get_metadata_int32(m, sampler_kind); // sampler kind
550    fields[7] = NULL; // metadata
551 
552    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
553 }
554 
555 
556 #define MAX_SRVS 128
557 #define MAX_UAVS 64
558 #define MAX_CBVS 64 // ??
559 #define MAX_SAMPLERS 64 // ??
560 
561 struct dxil_def {
562    const struct dxil_value *chans[NIR_MAX_VEC_COMPONENTS];
563 };
564 
565 struct ntd_context {
566    void *ralloc_ctx;
567    const struct nir_to_dxil_options *opts;
568    struct nir_shader *shader;
569 
570    struct dxil_module mod;
571 
572    struct util_dynarray srv_metadata_nodes;
573    const struct dxil_value *srv_handles[MAX_SRVS];
574 
575    struct util_dynarray uav_metadata_nodes;
576    const struct dxil_value *ssbo_handles[MAX_UAVS];
577    const struct dxil_value *image_handles[MAX_UAVS];
578    uint32_t num_uavs;
579 
580    struct util_dynarray cbv_metadata_nodes;
581    const struct dxil_value *cbv_handles[MAX_CBVS];
582 
583    struct util_dynarray sampler_metadata_nodes;
584    const struct dxil_value *sampler_handles[MAX_SAMPLERS];
585 
586    struct util_dynarray resources;
587 
588    const struct dxil_mdnode *shader_property_nodes[6];
589    size_t num_shader_property_nodes;
590 
591    struct dxil_def *defs;
592    unsigned num_defs;
593    struct hash_table *phis;
594 
595    const struct dxil_value **sharedvars;
596    const struct dxil_value **scratchvars;
597    const struct dxil_value **consts;
598 
599    nir_variable *system_value[SYSTEM_VALUE_MAX];
600 
601    nir_function *tess_ctrl_patch_constant_func;
602    unsigned tess_input_control_point_count;
603 
604    struct dxil_func_def *main_func_def;
605    struct dxil_func_def *tess_ctrl_patch_constant_func_def;
606    unsigned unnamed_ubo_count;
607 
608    BITSET_WORD *float_types;
609    BITSET_WORD *int_types;
610 
611    const struct dxil_logger *logger;
612 };
613 
614 static const char*
unary_func_name(enum dxil_intr intr)615 unary_func_name(enum dxil_intr intr)
616 {
617    switch (intr) {
618    case DXIL_INTR_COUNTBITS:
619    case DXIL_INTR_FIRSTBIT_HI:
620    case DXIL_INTR_FIRSTBIT_SHI:
621    case DXIL_INTR_FIRSTBIT_LO:
622       return "dx.op.unaryBits";
623    case DXIL_INTR_ISFINITE:
624    case DXIL_INTR_ISNORMAL:
625       return "dx.op.isSpecialFloat";
626    default:
627       return "dx.op.unary";
628    }
629 }
630 
631 static const struct dxil_value *
emit_unary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0)632 emit_unary_call(struct ntd_context *ctx, enum overload_type overload,
633                 enum dxil_intr intr,
634                 const struct dxil_value *op0)
635 {
636    const struct dxil_func *func = dxil_get_function(&ctx->mod,
637                                                     unary_func_name(intr),
638                                                     overload);
639    if (!func)
640       return NULL;
641 
642    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
643    if (!opcode)
644       return NULL;
645 
646    const struct dxil_value *args[] = {
647      opcode,
648      op0
649    };
650 
651    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
652 }
653 
654 static const struct dxil_value *
emit_binary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)655 emit_binary_call(struct ntd_context *ctx, enum overload_type overload,
656                  enum dxil_intr intr,
657                  const struct dxil_value *op0, const struct dxil_value *op1)
658 {
659    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.binary", overload);
660    if (!func)
661       return NULL;
662 
663    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
664    if (!opcode)
665       return NULL;
666 
667    const struct dxil_value *args[] = {
668      opcode,
669      op0,
670      op1
671    };
672 
673    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
674 }
675 
676 static const struct dxil_value *
emit_tertiary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)677 emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload,
678                    enum dxil_intr intr,
679                    const struct dxil_value *op0,
680                    const struct dxil_value *op1,
681                    const struct dxil_value *op2)
682 {
683    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.tertiary", overload);
684    if (!func)
685       return NULL;
686 
687    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
688    if (!opcode)
689       return NULL;
690 
691    const struct dxil_value *args[] = {
692      opcode,
693      op0,
694      op1,
695      op2
696    };
697 
698    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
699 }
700 
701 static const struct dxil_value *
emit_quaternary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2,const struct dxil_value * op3)702 emit_quaternary_call(struct ntd_context *ctx, enum overload_type overload,
703                      enum dxil_intr intr,
704                      const struct dxil_value *op0,
705                      const struct dxil_value *op1,
706                      const struct dxil_value *op2,
707                      const struct dxil_value *op3)
708 {
709    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quaternary", overload);
710    if (!func)
711       return NULL;
712 
713    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
714    if (!opcode)
715       return NULL;
716 
717    const struct dxil_value *args[] = {
718      opcode,
719      op0,
720      op1,
721      op2,
722      op3
723    };
724 
725    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
726 }
727 
728 static const struct dxil_value *
emit_threadid_call(struct ntd_context * ctx,const struct dxil_value * comp)729 emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp)
730 {
731    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadId", DXIL_I32);
732    if (!func)
733       return NULL;
734 
735    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
736        DXIL_INTR_THREAD_ID);
737    if (!opcode)
738       return NULL;
739 
740    const struct dxil_value *args[] = {
741      opcode,
742      comp
743    };
744 
745    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
746 }
747 
748 static const struct dxil_value *
emit_threadidingroup_call(struct ntd_context * ctx,const struct dxil_value * comp)749 emit_threadidingroup_call(struct ntd_context *ctx,
750                           const struct dxil_value *comp)
751 {
752    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadIdInGroup", DXIL_I32);
753 
754    if (!func)
755       return NULL;
756 
757    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
758        DXIL_INTR_THREAD_ID_IN_GROUP);
759    if (!opcode)
760       return NULL;
761 
762    const struct dxil_value *args[] = {
763      opcode,
764      comp
765    };
766 
767    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
768 }
769 
770 static const struct dxil_value *
emit_flattenedthreadidingroup_call(struct ntd_context * ctx)771 emit_flattenedthreadidingroup_call(struct ntd_context *ctx)
772 {
773    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.flattenedThreadIdInGroup", DXIL_I32);
774 
775    if (!func)
776       return NULL;
777 
778    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
779       DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP);
780    if (!opcode)
781       return NULL;
782 
783    const struct dxil_value *args[] = {
784      opcode
785    };
786 
787    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
788 }
789 
790 static const struct dxil_value *
emit_groupid_call(struct ntd_context * ctx,const struct dxil_value * comp)791 emit_groupid_call(struct ntd_context *ctx, const struct dxil_value *comp)
792 {
793    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.groupId", DXIL_I32);
794 
795    if (!func)
796       return NULL;
797 
798    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
799        DXIL_INTR_GROUP_ID);
800    if (!opcode)
801       return NULL;
802 
803    const struct dxil_value *args[] = {
804      opcode,
805      comp
806    };
807 
808    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
809 }
810 
811 static const struct dxil_value *
emit_raw_bufferload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],enum overload_type overload,unsigned component_count,unsigned alignment)812 emit_raw_bufferload_call(struct ntd_context *ctx,
813                          const struct dxil_value *handle,
814                          const struct dxil_value *coord[2],
815                          enum overload_type overload,
816                          unsigned component_count,
817                          unsigned alignment)
818 {
819    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.rawBufferLoad", overload);
820    if (!func)
821       return NULL;
822 
823    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
824                                                                  DXIL_INTR_RAW_BUFFER_LOAD);
825    const struct dxil_value *args[] = {
826       opcode, handle, coord[0], coord[1],
827       dxil_module_get_int8_const(&ctx->mod, (1 << component_count) - 1),
828       dxil_module_get_int32_const(&ctx->mod, alignment),
829    };
830 
831    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
832 }
833 
834 static const struct dxil_value *
emit_bufferload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],enum overload_type overload)835 emit_bufferload_call(struct ntd_context *ctx,
836                      const struct dxil_value *handle,
837                      const struct dxil_value *coord[2],
838                      enum overload_type overload)
839 {
840    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferLoad", overload);
841    if (!func)
842       return NULL;
843 
844    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
845       DXIL_INTR_BUFFER_LOAD);
846    const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1] };
847 
848    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
849 }
850 
851 static bool
emit_raw_bufferstore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload,unsigned alignment)852 emit_raw_bufferstore_call(struct ntd_context *ctx,
853                           const struct dxil_value *handle,
854                           const struct dxil_value *coord[2],
855                           const struct dxil_value *value[4],
856                           const struct dxil_value *write_mask,
857                           enum overload_type overload,
858                           unsigned alignment)
859 {
860    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.rawBufferStore", overload);
861 
862    if (!func)
863       return false;
864 
865    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
866                                                                  DXIL_INTR_RAW_BUFFER_STORE);
867    const struct dxil_value *args[] = {
868       opcode, handle, coord[0], coord[1],
869       value[0], value[1], value[2], value[3],
870       write_mask,
871       dxil_module_get_int32_const(&ctx->mod, alignment),
872    };
873 
874    return dxil_emit_call_void(&ctx->mod, func,
875                               args, ARRAY_SIZE(args));
876 }
877 
878 static bool
emit_bufferstore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)879 emit_bufferstore_call(struct ntd_context *ctx,
880                       const struct dxil_value *handle,
881                       const struct dxil_value *coord[2],
882                       const struct dxil_value *value[4],
883                       const struct dxil_value *write_mask,
884                       enum overload_type overload)
885 {
886    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferStore", overload);
887 
888    if (!func)
889       return false;
890 
891    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
892       DXIL_INTR_BUFFER_STORE);
893    const struct dxil_value *args[] = {
894       opcode, handle, coord[0], coord[1],
895       value[0], value[1], value[2], value[3],
896       write_mask
897    };
898 
899    return dxil_emit_call_void(&ctx->mod, func,
900                               args, ARRAY_SIZE(args));
901 }
902 
903 static const struct dxil_value *
emit_textureload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],enum overload_type overload)904 emit_textureload_call(struct ntd_context *ctx,
905                       const struct dxil_value *handle,
906                       const struct dxil_value *coord[3],
907                       enum overload_type overload)
908 {
909    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", overload);
910    if (!func)
911       return NULL;
912    const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
913    const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
914 
915    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
916       DXIL_INTR_TEXTURE_LOAD);
917    const struct dxil_value *args[] = { opcode, handle,
918       /*lod_or_sample*/ int_undef,
919       coord[0], coord[1], coord[2],
920       /* offsets */ int_undef, int_undef, int_undef};
921 
922    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
923 }
924 
925 static bool
emit_texturestore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)926 emit_texturestore_call(struct ntd_context *ctx,
927                        const struct dxil_value *handle,
928                        const struct dxil_value *coord[3],
929                        const struct dxil_value *value[4],
930                        const struct dxil_value *write_mask,
931                        enum overload_type overload)
932 {
933    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureStore", overload);
934 
935    if (!func)
936       return false;
937 
938    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
939       DXIL_INTR_TEXTURE_STORE);
940    const struct dxil_value *args[] = {
941       opcode, handle, coord[0], coord[1], coord[2],
942       value[0], value[1], value[2], value[3],
943       write_mask
944    };
945 
946    return dxil_emit_call_void(&ctx->mod, func,
947                               args, ARRAY_SIZE(args));
948 }
949 
950 static const struct dxil_value *
emit_atomic_binop(struct ntd_context * ctx,const struct dxil_value * handle,enum dxil_atomic_op atomic_op,const struct dxil_value * coord[3],const struct dxil_value * value)951 emit_atomic_binop(struct ntd_context *ctx,
952                   const struct dxil_value *handle,
953                   enum dxil_atomic_op atomic_op,
954                   const struct dxil_value *coord[3],
955                   const struct dxil_value *value)
956 {
957    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicBinOp", DXIL_I32);
958 
959    if (!func)
960       return false;
961 
962    const struct dxil_value *opcode =
963       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_BINOP);
964    const struct dxil_value *atomic_op_value =
965       dxil_module_get_int32_const(&ctx->mod, atomic_op);
966    const struct dxil_value *args[] = {
967       opcode, handle, atomic_op_value,
968       coord[0], coord[1], coord[2], value
969    };
970 
971    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
972 }
973 
974 static const struct dxil_value *
emit_atomic_cmpxchg(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * cmpval,const struct dxil_value * newval)975 emit_atomic_cmpxchg(struct ntd_context *ctx,
976                     const struct dxil_value *handle,
977                     const struct dxil_value *coord[3],
978                     const struct dxil_value *cmpval,
979                     const struct dxil_value *newval)
980 {
981    const struct dxil_func *func =
982       dxil_get_function(&ctx->mod, "dx.op.atomicCompareExchange", DXIL_I32);
983 
984    if (!func)
985       return false;
986 
987    const struct dxil_value *opcode =
988       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_CMPXCHG);
989    const struct dxil_value *args[] = {
990       opcode, handle, coord[0], coord[1], coord[2], cmpval, newval
991    };
992 
993    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
994 }
995 
996 static const struct dxil_value *
emit_createhandle_call_pre_6_6(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)997 emit_createhandle_call_pre_6_6(struct ntd_context *ctx,
998                                enum dxil_resource_class resource_class,
999                                unsigned lower_bound,
1000                                unsigned upper_bound,
1001                                unsigned space,
1002                                unsigned resource_range_id,
1003                                const struct dxil_value *resource_range_index,
1004                                bool non_uniform_resource_index)
1005 {
1006    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE);
1007    const struct dxil_value *resource_class_value = dxil_module_get_int8_const(&ctx->mod, resource_class);
1008    const struct dxil_value *resource_range_id_value = dxil_module_get_int32_const(&ctx->mod, resource_range_id);
1009    const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1010    if (!opcode || !resource_class_value || !resource_range_id_value ||
1011        !non_uniform_resource_index_value)
1012       return NULL;
1013 
1014    const struct dxil_value *args[] = {
1015       opcode,
1016       resource_class_value,
1017       resource_range_id_value,
1018       resource_range_index,
1019       non_uniform_resource_index_value
1020    };
1021 
1022    const struct dxil_func *func =
1023          dxil_get_function(&ctx->mod, "dx.op.createHandle", DXIL_NONE);
1024 
1025    if (!func)
1026          return NULL;
1027 
1028    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1029 }
1030 
1031 static const struct dxil_value *
emit_annotate_handle(struct ntd_context * ctx,const struct dxil_value * unannotated_handle,const struct dxil_value * res_props)1032 emit_annotate_handle(struct ntd_context *ctx,
1033                      const struct dxil_value *unannotated_handle,
1034                      const struct dxil_value *res_props)
1035 {
1036    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ANNOTATE_HANDLE);
1037    if (!opcode)
1038       return NULL;
1039 
1040    const struct dxil_value *args[] = {
1041       opcode,
1042       unannotated_handle,
1043       res_props
1044    };
1045 
1046    const struct dxil_func *func =
1047       dxil_get_function(&ctx->mod, "dx.op.annotateHandle", DXIL_NONE);
1048 
1049    if (!func)
1050       return NULL;
1051 
1052    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1053 }
1054 
1055 static const struct dxil_value *
emit_annotate_handle_from_metadata(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned resource_range_id,const struct dxil_value * unannotated_handle)1056 emit_annotate_handle_from_metadata(struct ntd_context *ctx,
1057                                    enum dxil_resource_class resource_class,
1058                                    unsigned resource_range_id,
1059                                    const struct dxil_value *unannotated_handle)
1060 {
1061 
1062    const struct util_dynarray *mdnodes;
1063    switch (resource_class) {
1064    case DXIL_RESOURCE_CLASS_SRV:
1065       mdnodes = &ctx->srv_metadata_nodes;
1066       break;
1067    case DXIL_RESOURCE_CLASS_UAV:
1068       mdnodes = &ctx->uav_metadata_nodes;
1069       break;
1070    case DXIL_RESOURCE_CLASS_CBV:
1071       mdnodes = &ctx->cbv_metadata_nodes;
1072       break;
1073    case DXIL_RESOURCE_CLASS_SAMPLER:
1074       mdnodes = &ctx->sampler_metadata_nodes;
1075       break;
1076    default:
1077       unreachable("Invalid resource class");
1078    }
1079 
1080    const struct dxil_mdnode *mdnode = *util_dynarray_element(mdnodes, const struct dxil_mdnode *, resource_range_id);
1081    const struct dxil_value *res_props = dxil_module_get_res_props_const(&ctx->mod, resource_class, mdnode);
1082    if (!res_props)
1083       return NULL;
1084 
1085    return emit_annotate_handle(ctx, unannotated_handle, res_props);
1086 }
1087 
1088 static const struct dxil_value *
emit_createhandle_and_annotate(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1089 emit_createhandle_and_annotate(struct ntd_context *ctx,
1090                                enum dxil_resource_class resource_class,
1091                                unsigned lower_bound,
1092                                unsigned upper_bound,
1093                                unsigned space,
1094                                unsigned resource_range_id,
1095                                const struct dxil_value *resource_range_index,
1096                                bool non_uniform_resource_index)
1097 {
1098    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE_FROM_BINDING);
1099    const struct dxil_value *res_bind = dxil_module_get_res_bind_const(&ctx->mod, lower_bound, upper_bound, space, resource_class);
1100    const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1101    if (!opcode || !res_bind || !non_uniform_resource_index_value)
1102       return NULL;
1103 
1104    const struct dxil_value *args[] = {
1105       opcode,
1106       res_bind,
1107       resource_range_index,
1108       non_uniform_resource_index_value
1109    };
1110 
1111    const struct dxil_func *func =
1112       dxil_get_function(&ctx->mod, "dx.op.createHandleFromBinding", DXIL_NONE);
1113 
1114    if (!func)
1115       return NULL;
1116 
1117    const struct dxil_value *unannotated_handle = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1118    if (!unannotated_handle)
1119       return NULL;
1120 
1121    return emit_annotate_handle_from_metadata(ctx, resource_class, resource_range_id, unannotated_handle);
1122 }
1123 
1124 static const struct dxil_value *
emit_createhandle_call(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1125 emit_createhandle_call(struct ntd_context *ctx,
1126                        enum dxil_resource_class resource_class,
1127                        unsigned lower_bound,
1128                        unsigned upper_bound,
1129                        unsigned space,
1130                        unsigned resource_range_id,
1131                        const struct dxil_value *resource_range_index,
1132                        bool non_uniform_resource_index)
1133 {
1134    if (ctx->mod.minor_version < 6)
1135       return emit_createhandle_call_pre_6_6(ctx, resource_class, lower_bound, upper_bound, space, resource_range_id, resource_range_index, non_uniform_resource_index);
1136    else
1137       return emit_createhandle_and_annotate(ctx, resource_class, lower_bound, upper_bound, space, resource_range_id, resource_range_index, non_uniform_resource_index);
1138 }
1139 
1140 static const struct dxil_value *
emit_createhandle_call_const_index(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,unsigned resource_range_index,bool non_uniform_resource_index)1141 emit_createhandle_call_const_index(struct ntd_context *ctx,
1142                                    enum dxil_resource_class resource_class,
1143                                    unsigned lower_bound,
1144                                    unsigned upper_bound,
1145                                    unsigned space,
1146                                    unsigned resource_range_id,
1147                                    unsigned resource_range_index,
1148                                    bool non_uniform_resource_index)
1149 {
1150 
1151    const struct dxil_value *resource_range_index_value = dxil_module_get_int32_const(&ctx->mod, resource_range_index);
1152    if (!resource_range_index_value)
1153       return NULL;
1154 
1155    return emit_createhandle_call(ctx, resource_class, lower_bound, upper_bound, space,
1156                                  resource_range_id, resource_range_index_value,
1157                                  non_uniform_resource_index);
1158 }
1159 
1160 static const struct dxil_value *
emit_createhandle_heap(struct ntd_context * ctx,const struct dxil_value * resource_range_index,bool is_sampler,bool non_uniform_resource_index)1161 emit_createhandle_heap(struct ntd_context *ctx,
1162                        const struct dxil_value *resource_range_index,
1163                        bool is_sampler,
1164                        bool non_uniform_resource_index)
1165 {
1166    if (is_sampler)
1167       ctx->mod.feats.sampler_descriptor_heap_indexing = true;
1168    else
1169       ctx->mod.feats.resource_descriptor_heap_indexing = true;
1170 
1171    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE_FROM_HEAP);
1172    const struct dxil_value *sampler = dxil_module_get_int1_const(&ctx->mod, is_sampler);
1173    const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1174    if (!opcode || !sampler || !non_uniform_resource_index_value)
1175       return NULL;
1176 
1177    const struct dxil_value *args[] = {
1178       opcode,
1179       resource_range_index,
1180       sampler,
1181       non_uniform_resource_index_value
1182    };
1183 
1184    const struct dxil_func *func =
1185       dxil_get_function(&ctx->mod, "dx.op.createHandleFromHeap", DXIL_NONE);
1186 
1187    if (!func)
1188       return NULL;
1189 
1190    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1191 }
1192 
1193 static void
add_resource(struct ntd_context * ctx,enum dxil_resource_type type,enum dxil_resource_kind kind,const resource_array_layout * layout)1194 add_resource(struct ntd_context *ctx, enum dxil_resource_type type,
1195              enum dxil_resource_kind kind,
1196              const resource_array_layout *layout)
1197 {
1198    struct dxil_resource_v0 *resource_v0 = NULL;
1199    struct dxil_resource_v1 *resource_v1 = NULL;
1200    if (ctx->mod.minor_validator >= 6) {
1201       resource_v1 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v1, 1);
1202       resource_v0 = &resource_v1->v0;
1203    } else {
1204       resource_v0 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v0, 1);
1205    }
1206    resource_v0->resource_type = type;
1207    resource_v0->space = layout->space;
1208    resource_v0->lower_bound = layout->binding;
1209    if (layout->size == 0 || (uint64_t)layout->size + layout->binding >= UINT_MAX)
1210       resource_v0->upper_bound = UINT_MAX;
1211    else
1212       resource_v0->upper_bound = layout->binding + layout->size - 1;
1213    if (type == DXIL_RES_UAV_TYPED ||
1214        type == DXIL_RES_UAV_RAW ||
1215        type == DXIL_RES_UAV_STRUCTURED) {
1216       uint32_t new_uav_count = ctx->num_uavs + layout->size;
1217       if (layout->size == 0 || new_uav_count < ctx->num_uavs)
1218          ctx->num_uavs = UINT_MAX;
1219       else
1220          ctx->num_uavs = new_uav_count;
1221       if (ctx->mod.minor_validator >= 6 && ctx->num_uavs > 8)
1222          ctx->mod.feats.use_64uavs = 1;
1223    }
1224 
1225    if (resource_v1) {
1226       resource_v1->resource_kind = kind;
1227       /* No flags supported yet */
1228       resource_v1->resource_flags = 0;
1229    }
1230 }
1231 
1232 static const struct dxil_value *
emit_createhandle_call_dynamic(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned space,unsigned binding,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1233 emit_createhandle_call_dynamic(struct ntd_context *ctx,
1234                                enum dxil_resource_class resource_class,
1235                                unsigned space,
1236                                unsigned binding,
1237                                const struct dxil_value *resource_range_index,
1238                                bool non_uniform_resource_index)
1239 {
1240    unsigned offset = 0;
1241    unsigned count = 0;
1242 
1243    unsigned num_srvs = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
1244    unsigned num_uavs = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
1245    unsigned num_cbvs = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1246    unsigned num_samplers = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1247 
1248    switch (resource_class) {
1249    case DXIL_RESOURCE_CLASS_UAV:
1250       offset = num_srvs + num_samplers + num_cbvs;
1251       count = num_uavs;
1252       break;
1253    case DXIL_RESOURCE_CLASS_SRV:
1254       offset = num_samplers + num_cbvs;
1255       count = num_srvs;
1256       break;
1257    case DXIL_RESOURCE_CLASS_SAMPLER:
1258       offset = num_cbvs;
1259       count = num_samplers;
1260       break;
1261    case DXIL_RESOURCE_CLASS_CBV:
1262       offset = 0;
1263       count = num_cbvs;
1264       break;
1265    }
1266 
1267    unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
1268       sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
1269    assert(offset + count <= ctx->resources.size / resource_element_size);
1270    for (unsigned i = offset; i < offset + count; ++i) {
1271       const struct dxil_resource_v0 *resource = (const struct dxil_resource_v0 *)((const char *)ctx->resources.data + resource_element_size * i);
1272       if (resource->space == space &&
1273           resource->lower_bound <= binding &&
1274           resource->upper_bound >= binding) {
1275          return emit_createhandle_call(ctx, resource_class, resource->lower_bound,
1276                                        resource->upper_bound, space,
1277                                        i - offset,
1278                                        resource_range_index,
1279                                        non_uniform_resource_index);
1280       }
1281    }
1282 
1283    unreachable("Resource access for undeclared range");
1284 }
1285 
1286 static bool
emit_srv(struct ntd_context * ctx,nir_variable * var,unsigned count)1287 emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned count)
1288 {
1289    unsigned id = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
1290    unsigned binding = var->data.binding;
1291    resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1292 
1293    enum dxil_component_type comp_type;
1294    enum dxil_resource_kind res_kind;
1295    enum dxil_resource_type res_type;
1296    if (var->data.mode == nir_var_mem_ssbo) {
1297       comp_type = DXIL_COMP_TYPE_INVALID;
1298       res_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
1299       res_type = DXIL_RES_SRV_RAW;
1300    } else {
1301       comp_type = dxil_get_comp_type(var->type);
1302       res_kind = dxil_get_resource_kind(var->type);
1303       res_type = DXIL_RES_SRV_TYPED;
1304    }
1305    const struct dxil_type *res_type_as_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, 4, false /* readwrite */);
1306 
1307    if (glsl_type_is_array(var->type))
1308       res_type_as_type = dxil_module_get_array_type(&ctx->mod, res_type_as_type, count);
1309 
1310    const struct dxil_mdnode *srv_meta = emit_srv_metadata(&ctx->mod, res_type_as_type, var->name,
1311                                                           &layout, comp_type, res_kind);
1312 
1313    if (!srv_meta)
1314       return false;
1315 
1316    util_dynarray_append(&ctx->srv_metadata_nodes, const struct dxil_mdnode *, srv_meta);
1317    add_resource(ctx, res_type, res_kind, &layout);
1318    if (res_type == DXIL_RES_SRV_RAW)
1319       ctx->mod.raw_and_structured_buffers = true;
1320 
1321    return true;
1322 }
1323 
1324 static bool
emit_uav(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned count,enum dxil_component_type comp_type,unsigned num_comps,enum dxil_resource_kind res_kind,enum gl_access_qualifier access,const char * name)1325 emit_uav(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned count,
1326          enum dxil_component_type comp_type, unsigned num_comps, enum dxil_resource_kind res_kind,
1327          enum gl_access_qualifier access, const char *name)
1328 {
1329    unsigned id = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
1330    resource_array_layout layout = { id, binding, count, space };
1331 
1332    const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, num_comps, true /* readwrite */);
1333    res_type = dxil_module_get_array_type(&ctx->mod, res_type, count);
1334    const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, res_type, name,
1335                                                           &layout, comp_type, res_kind, access);
1336 
1337    if (!uav_meta)
1338       return false;
1339 
1340    util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
1341    if (ctx->mod.minor_validator < 6 &&
1342        util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
1343       ctx->mod.feats.use_64uavs = 1;
1344 
1345    add_resource(ctx, res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER ? DXIL_RES_UAV_RAW : DXIL_RES_UAV_TYPED, res_kind, &layout);
1346    if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
1347       ctx->mod.raw_and_structured_buffers = true;
1348    if (ctx->mod.shader_kind != DXIL_PIXEL_SHADER &&
1349        ctx->mod.shader_kind != DXIL_COMPUTE_SHADER)
1350       ctx->mod.feats.uavs_at_every_stage = true;
1351 
1352    return true;
1353 }
1354 
1355 static bool
emit_globals(struct ntd_context * ctx,unsigned size)1356 emit_globals(struct ntd_context *ctx, unsigned size)
1357 {
1358    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo)
1359       size++;
1360 
1361    if (!size)
1362       return true;
1363 
1364    if (!emit_uav(ctx, 0, 0, size, DXIL_COMP_TYPE_INVALID, 1, DXIL_RESOURCE_KIND_RAW_BUFFER, 0, "globals"))
1365       return false;
1366 
1367    return true;
1368 }
1369 
1370 static bool
emit_uav_var(struct ntd_context * ctx,nir_variable * var,unsigned count)1371 emit_uav_var(struct ntd_context *ctx, nir_variable *var, unsigned count)
1372 {
1373    unsigned binding, space;
1374    if (ctx->opts->environment == DXIL_ENVIRONMENT_GL) {
1375       /* For GL, the image intrinsics are already lowered, using driver_location
1376        * as the 0-based image index. Use space 1 so that we can keep using these
1377        * NIR constants without having to remap them, and so they don't overlap
1378        * SSBOs, which are also 0-based UAV bindings.
1379        */
1380       binding = var->data.driver_location;
1381       space = 1;
1382    } else {
1383       binding = var->data.binding;
1384       space = var->data.descriptor_set;
1385    }
1386    enum dxil_component_type comp_type = dxil_get_comp_type(var->type);
1387    enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type);
1388    const char *name = var->name;
1389 
1390    return emit_uav(ctx, binding, space, count, comp_type,
1391                    util_format_get_nr_components(var->data.image.format),
1392                    res_kind, var->data.access, name);
1393 }
1394 
1395 static const struct dxil_value *
get_value_for_const(struct dxil_module * mod,nir_const_value * c,const struct dxil_type * type)1396 get_value_for_const(struct dxil_module *mod, nir_const_value *c, const struct dxil_type *type)
1397 {
1398    if (type == mod->int1_type) return dxil_module_get_int1_const(mod, c->b);
1399    if (type == mod->float32_type) return dxil_module_get_float_const(mod, c->f32);
1400    if (type == mod->int32_type) return dxil_module_get_int32_const(mod, c->i32);
1401    if (type == mod->int16_type) {
1402       mod->feats.min_precision = true;
1403       return dxil_module_get_int16_const(mod, c->i16);
1404    }
1405    if (type == mod->int64_type) {
1406       mod->feats.int64_ops = true;
1407       return dxil_module_get_int64_const(mod, c->i64);
1408    }
1409    if (type == mod->float16_type) {
1410       mod->feats.min_precision = true;
1411       return dxil_module_get_float16_const(mod, c->u16);
1412    }
1413    if (type == mod->float64_type) {
1414       mod->feats.doubles = true;
1415       return dxil_module_get_double_const(mod, c->f64);
1416    }
1417    unreachable("Invalid type");
1418 }
1419 
1420 static const struct dxil_type *
get_type_for_glsl_base_type(struct dxil_module * mod,enum glsl_base_type type)1421 get_type_for_glsl_base_type(struct dxil_module *mod, enum glsl_base_type type)
1422 {
1423    uint32_t bit_size = glsl_base_type_bit_size(type);
1424    if (nir_alu_type_get_base_type(nir_get_nir_type_for_glsl_base_type(type)) == nir_type_float)
1425       return dxil_module_get_float_type(mod, bit_size);
1426    return dxil_module_get_int_type(mod, bit_size);
1427 }
1428 
1429 static const struct dxil_type *
get_type_for_glsl_type(struct dxil_module * mod,const struct glsl_type * type)1430 get_type_for_glsl_type(struct dxil_module *mod, const struct glsl_type *type)
1431 {
1432    if (glsl_type_is_scalar(type))
1433       return get_type_for_glsl_base_type(mod, glsl_get_base_type(type));
1434 
1435    if (glsl_type_is_vector(type))
1436       return dxil_module_get_vector_type(mod, get_type_for_glsl_base_type(mod, glsl_get_base_type(type)),
1437                                          glsl_get_vector_elements(type));
1438 
1439    if (glsl_type_is_array(type))
1440       return dxil_module_get_array_type(mod, get_type_for_glsl_type(mod, glsl_get_array_element(type)),
1441                                         glsl_array_size(type));
1442 
1443    assert(glsl_type_is_struct(type));
1444    uint32_t size = glsl_get_length(type);
1445    const struct dxil_type **fields = calloc(sizeof(const struct dxil_type *), size);
1446    for (uint32_t i = 0; i < size; ++i)
1447       fields[i] = get_type_for_glsl_type(mod, glsl_get_struct_field(type, i));
1448    const struct dxil_type *ret = dxil_module_get_struct_type(mod, glsl_get_type_name(type), fields, size);
1449    free((void *)fields);
1450    return ret;
1451 }
1452 
1453 static const struct dxil_value *
get_value_for_const_aggregate(struct dxil_module * mod,nir_constant * c,const struct glsl_type * type)1454 get_value_for_const_aggregate(struct dxil_module *mod, nir_constant *c, const struct glsl_type *type)
1455 {
1456    const struct dxil_type *dxil_type = get_type_for_glsl_type(mod, type);
1457    if (glsl_type_is_vector_or_scalar(type)) {
1458       const struct dxil_type *element_type = get_type_for_glsl_base_type(mod, glsl_get_base_type(type));
1459       const struct dxil_value *elements[NIR_MAX_VEC_COMPONENTS];
1460       for (uint32_t i = 0; i < glsl_get_vector_elements(type); ++i)
1461          elements[i] = get_value_for_const(mod, &c->values[i], element_type);
1462       if (glsl_type_is_scalar(type))
1463          return elements[0];
1464       return dxil_module_get_vector_const(mod, dxil_type, elements);
1465    }
1466 
1467    uint32_t num_values = glsl_get_length(type);
1468    assert(num_values == c->num_elements);
1469    const struct dxil_value **values = calloc(sizeof(const struct dxil_value *), num_values);
1470    const struct dxil_value *ret;
1471    if (glsl_type_is_array(type)) {
1472       const struct glsl_type *element_type = glsl_get_array_element(type);
1473       for (uint32_t i = 0; i < num_values; ++i)
1474          values[i] = get_value_for_const_aggregate(mod, c->elements[i], element_type);
1475       ret = dxil_module_get_array_const(mod, dxil_type, values);
1476    } else {
1477       for (uint32_t i = 0; i < num_values; ++i)
1478          values[i] = get_value_for_const_aggregate(mod, c->elements[i], glsl_get_struct_field(type, i));
1479       ret = dxil_module_get_struct_const(mod, dxil_type, values);
1480    }
1481    free((void *)values);
1482    return ret;
1483 }
1484 
1485 static bool
emit_global_consts(struct ntd_context * ctx)1486 emit_global_consts(struct ntd_context *ctx)
1487 {
1488    uint32_t index = 0;
1489    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_constant) {
1490       assert(var->constant_initializer);
1491       var->data.driver_location = index++;
1492    }
1493 
1494    ctx->consts = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
1495 
1496    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_constant) {
1497       if (!var->name)
1498          var->name = ralloc_asprintf(var, "const_%d", var->data.driver_location);
1499 
1500       const struct dxil_value *agg_vals =
1501          get_value_for_const_aggregate(&ctx->mod, var->constant_initializer, var->type);
1502       if (!agg_vals)
1503          return false;
1504 
1505       const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name,
1506                                                               dxil_value_get_type(agg_vals),
1507                                                               DXIL_AS_DEFAULT, 16,
1508                                                               agg_vals);
1509       if (!gvar)
1510          return false;
1511 
1512       ctx->consts[var->data.driver_location] = gvar;
1513    }
1514 
1515    return true;
1516 }
1517 
1518 static bool
emit_shared_vars(struct ntd_context * ctx)1519 emit_shared_vars(struct ntd_context *ctx)
1520 {
1521    uint32_t index = 0;
1522    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_shared)
1523       var->data.driver_location = index++;
1524 
1525    ctx->sharedvars = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
1526 
1527    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_shared) {
1528       if (!var->name)
1529          var->name = ralloc_asprintf(var, "shared_%d", var->data.driver_location);
1530       const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name,
1531                                                               get_type_for_glsl_type(&ctx->mod, var->type),
1532                                                               DXIL_AS_GROUPSHARED, 16,
1533                                                               NULL);
1534       if (!gvar)
1535          return false;
1536 
1537       ctx->sharedvars[var->data.driver_location] = gvar;
1538    }
1539 
1540    return true;
1541 }
1542 
1543 static bool
emit_cbv(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned size,unsigned count,char * name)1544 emit_cbv(struct ntd_context *ctx, unsigned binding, unsigned space,
1545          unsigned size, unsigned count, char *name)
1546 {
1547    assert(count != 0);
1548 
1549    unsigned idx = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1550 
1551    const struct dxil_type *float32 = dxil_module_get_float_type(&ctx->mod, 32);
1552    const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, float32, size);
1553    const struct dxil_type *buffer_type = dxil_module_get_struct_type(&ctx->mod, name,
1554                                                                      &array_type, 1);
1555    // All ubo[1]s should have been lowered to ubo with static indexing
1556    const struct dxil_type *final_type = count != 1 ? dxil_module_get_array_type(&ctx->mod, buffer_type, count) : buffer_type;
1557    resource_array_layout layout = {idx, binding, count, space};
1558    const struct dxil_mdnode *cbv_meta = emit_cbv_metadata(&ctx->mod, final_type,
1559                                                           name, &layout, 4 * size);
1560 
1561    if (!cbv_meta)
1562       return false;
1563 
1564    util_dynarray_append(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *, cbv_meta);
1565    add_resource(ctx, DXIL_RES_CBV, DXIL_RESOURCE_KIND_CBUFFER, &layout);
1566 
1567    return true;
1568 }
1569 
1570 static bool
emit_ubo_var(struct ntd_context * ctx,nir_variable * var)1571 emit_ubo_var(struct ntd_context *ctx, nir_variable *var)
1572 {
1573    unsigned count = 1;
1574    if (glsl_type_is_array(var->type))
1575       count = glsl_get_length(var->type);
1576 
1577    char *name = var->name;
1578    char temp_name[30];
1579    if (name && strlen(name) == 0) {
1580       snprintf(temp_name, sizeof(temp_name), "__unnamed_ubo_%d",
1581                ctx->unnamed_ubo_count++);
1582       name = temp_name;
1583    }
1584 
1585    const struct glsl_type *type = glsl_without_array(var->type);
1586    assert(glsl_type_is_struct(type) || glsl_type_is_interface(type));
1587    unsigned dwords = ALIGN_POT(glsl_get_explicit_size(type, false), 16) / 4;
1588 
1589    return emit_cbv(ctx, var->data.binding, var->data.descriptor_set,
1590                    dwords, count, name);
1591 }
1592 
1593 static bool
emit_sampler(struct ntd_context * ctx,nir_variable * var,unsigned count)1594 emit_sampler(struct ntd_context *ctx, nir_variable *var, unsigned count)
1595 {
1596    unsigned id = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1597    unsigned binding = var->data.binding;
1598    resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1599    const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
1600    const struct dxil_type *sampler_type = dxil_module_get_struct_type(&ctx->mod, "struct.SamplerState", &int32_type, 1);
1601 
1602    if (glsl_type_is_array(var->type))
1603       sampler_type = dxil_module_get_array_type(&ctx->mod, sampler_type, count);
1604 
1605    const struct dxil_mdnode *sampler_meta = emit_sampler_metadata(&ctx->mod, sampler_type, var, &layout);
1606 
1607    if (!sampler_meta)
1608       return false;
1609 
1610    util_dynarray_append(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *, sampler_meta);
1611    add_resource(ctx, DXIL_RES_SAMPLER, DXIL_RESOURCE_KIND_SAMPLER, &layout);
1612 
1613    return true;
1614 }
1615 
1616 static bool
emit_static_indexing_handles(struct ntd_context * ctx)1617 emit_static_indexing_handles(struct ntd_context *ctx)
1618 {
1619    /* Vulkan always uses dynamic handles, from instructions in the NIR */
1620    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN)
1621       return true;
1622 
1623    unsigned last_res_class = -1;
1624    unsigned id = 0;
1625 
1626    unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
1627       sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
1628    for (struct dxil_resource_v0 *res = (struct dxil_resource_v0 *)ctx->resources.data;
1629         res < (struct dxil_resource_v0 *)((char *)ctx->resources.data + ctx->resources.size);
1630         res = (struct dxil_resource_v0 *)((char *)res + resource_element_size)) {
1631       enum dxil_resource_class res_class;
1632       const struct dxil_value **handle_array;
1633       switch (res->resource_type) {
1634       case DXIL_RES_SRV_TYPED:
1635       case DXIL_RES_SRV_RAW:
1636       case DXIL_RES_SRV_STRUCTURED:
1637          res_class = DXIL_RESOURCE_CLASS_SRV;
1638          handle_array = ctx->srv_handles;
1639          break;
1640       case DXIL_RES_CBV:
1641          res_class = DXIL_RESOURCE_CLASS_CBV;
1642          handle_array = ctx->cbv_handles;
1643          break;
1644       case DXIL_RES_SAMPLER:
1645          res_class = DXIL_RESOURCE_CLASS_SAMPLER;
1646          handle_array = ctx->sampler_handles;
1647          break;
1648       case DXIL_RES_UAV_RAW:
1649          res_class = DXIL_RESOURCE_CLASS_UAV;
1650          handle_array = ctx->ssbo_handles;
1651          break;
1652       case DXIL_RES_UAV_TYPED:
1653       case DXIL_RES_UAV_STRUCTURED:
1654       case DXIL_RES_UAV_STRUCTURED_WITH_COUNTER:
1655          res_class = DXIL_RESOURCE_CLASS_UAV;
1656          handle_array = ctx->image_handles;
1657          break;
1658       default:
1659          unreachable("Unexpected resource type");
1660       }
1661 
1662       if (last_res_class != res_class)
1663          id = 0;
1664       else
1665          id++;
1666       last_res_class = res_class;
1667 
1668       if (res->space > 1)
1669          continue;
1670       assert(res->space == 0 ||
1671          (res->space == 1 &&
1672             res->resource_type != DXIL_RES_UAV_RAW &&
1673             ctx->opts->environment == DXIL_ENVIRONMENT_GL));
1674 
1675       /* CL uses dynamic handles for the "globals" UAV array, but uses static
1676        * handles for UBOs, textures, and samplers.
1677        */
1678       if (ctx->opts->environment == DXIL_ENVIRONMENT_CL &&
1679           res->resource_type == DXIL_RES_UAV_RAW)
1680          continue;
1681 
1682       for (unsigned i = res->lower_bound; i <= res->upper_bound; ++i) {
1683          handle_array[i] = emit_createhandle_call_const_index(ctx,
1684                                                               res_class,
1685                                                               res->lower_bound,
1686                                                               res->upper_bound,
1687                                                               res->space,
1688                                                               id,
1689                                                               i,
1690                                                               false);
1691          if (!handle_array[i])
1692             return false;
1693       }
1694    }
1695    return true;
1696 }
1697 
1698 static const struct dxil_mdnode *
emit_gs_state(struct ntd_context * ctx)1699 emit_gs_state(struct ntd_context *ctx)
1700 {
1701    const struct dxil_mdnode *gs_state_nodes[5];
1702    const nir_shader *s = ctx->shader;
1703 
1704    gs_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, dxil_get_input_primitive(s->info.gs.input_primitive));
1705    gs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.vertices_out);
1706    gs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.gs.active_stream_mask, 1));
1707    gs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, dxil_get_primitive_topology(s->info.gs.output_primitive));
1708    gs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.invocations);
1709 
1710    for (unsigned i = 0; i < ARRAY_SIZE(gs_state_nodes); ++i) {
1711       if (!gs_state_nodes[i])
1712          return NULL;
1713    }
1714 
1715    return dxil_get_metadata_node(&ctx->mod, gs_state_nodes, ARRAY_SIZE(gs_state_nodes));
1716 }
1717 
1718 static enum dxil_tessellator_domain
get_tessellator_domain(enum tess_primitive_mode primitive_mode)1719 get_tessellator_domain(enum tess_primitive_mode primitive_mode)
1720 {
1721    switch (primitive_mode) {
1722    case TESS_PRIMITIVE_QUADS: return DXIL_TESSELLATOR_DOMAIN_QUAD;
1723    case TESS_PRIMITIVE_TRIANGLES: return DXIL_TESSELLATOR_DOMAIN_TRI;
1724    case TESS_PRIMITIVE_ISOLINES: return DXIL_TESSELLATOR_DOMAIN_ISOLINE;
1725    default:
1726       unreachable("Invalid tessellator primitive mode");
1727    }
1728 }
1729 
1730 static enum dxil_tessellator_partitioning
get_tessellator_partitioning(enum gl_tess_spacing spacing)1731 get_tessellator_partitioning(enum gl_tess_spacing spacing)
1732 {
1733    switch (spacing) {
1734    default:
1735    case TESS_SPACING_EQUAL:
1736       return DXIL_TESSELLATOR_PARTITIONING_INTEGER;
1737    case TESS_SPACING_FRACTIONAL_EVEN:
1738       return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
1739    case TESS_SPACING_FRACTIONAL_ODD:
1740       return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
1741    }
1742 }
1743 
1744 static enum dxil_tessellator_output_primitive
get_tessellator_output_primitive(const struct shader_info * info)1745 get_tessellator_output_primitive(const struct shader_info *info)
1746 {
1747    if (info->tess.point_mode)
1748       return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_POINT;
1749    if (info->tess._primitive_mode == TESS_PRIMITIVE_ISOLINES)
1750       return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_LINE;
1751    /* Note: GL tessellation domain is inverted from D3D, which means triangle
1752     * winding needs to be inverted.
1753     */
1754    if (info->tess.ccw)
1755       return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CW;
1756    return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CCW;
1757 }
1758 
1759 static const struct dxil_mdnode *
emit_hs_state(struct ntd_context * ctx)1760 emit_hs_state(struct ntd_context *ctx)
1761 {
1762    const struct dxil_mdnode *hs_state_nodes[7];
1763 
1764    hs_state_nodes[0] = dxil_get_metadata_func(&ctx->mod, ctx->tess_ctrl_patch_constant_func_def->func);
1765    hs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->tess_input_control_point_count);
1766    hs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1767    hs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1768    hs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_partitioning(ctx->shader->info.tess.spacing));
1769    hs_state_nodes[5] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_output_primitive(&ctx->shader->info));
1770    hs_state_nodes[6] = dxil_get_metadata_float32(&ctx->mod, 64.0f);
1771 
1772    return dxil_get_metadata_node(&ctx->mod, hs_state_nodes, ARRAY_SIZE(hs_state_nodes));
1773 }
1774 
1775 static const struct dxil_mdnode *
emit_ds_state(struct ntd_context * ctx)1776 emit_ds_state(struct ntd_context *ctx)
1777 {
1778    const struct dxil_mdnode *ds_state_nodes[2];
1779 
1780    ds_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1781    ds_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1782 
1783    return dxil_get_metadata_node(&ctx->mod, ds_state_nodes, ARRAY_SIZE(ds_state_nodes));
1784 }
1785 
1786 static const struct dxil_mdnode *
emit_threads(struct ntd_context * ctx)1787 emit_threads(struct ntd_context *ctx)
1788 {
1789    const nir_shader *s = ctx->shader;
1790    const struct dxil_mdnode *threads_x = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[0], 1));
1791    const struct dxil_mdnode *threads_y = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[1], 1));
1792    const struct dxil_mdnode *threads_z = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[2], 1));
1793    if (!threads_x || !threads_y || !threads_z)
1794       return false;
1795 
1796    const struct dxil_mdnode *threads_nodes[] = { threads_x, threads_y, threads_z };
1797    return dxil_get_metadata_node(&ctx->mod, threads_nodes, ARRAY_SIZE(threads_nodes));
1798 }
1799 
1800 static const struct dxil_mdnode *
emit_wave_size(struct ntd_context * ctx)1801 emit_wave_size(struct ntd_context *ctx)
1802 {
1803    const nir_shader *s = ctx->shader;
1804    const struct dxil_mdnode *wave_size_node = dxil_get_metadata_int32(&ctx->mod, s->info.subgroup_size);
1805    return dxil_get_metadata_node(&ctx->mod, &wave_size_node, 1);
1806 }
1807 
1808 static const struct dxil_mdnode *
emit_wave_size_range(struct ntd_context * ctx)1809 emit_wave_size_range(struct ntd_context *ctx)
1810 {
1811    const nir_shader *s = ctx->shader;
1812    const struct dxil_mdnode *wave_size_nodes[3];
1813    wave_size_nodes[0] = dxil_get_metadata_int32(&ctx->mod, s->info.subgroup_size);
1814    wave_size_nodes[1] = wave_size_nodes[0];
1815    wave_size_nodes[2] = wave_size_nodes[0];
1816    return dxil_get_metadata_node(&ctx->mod, wave_size_nodes, ARRAY_SIZE(wave_size_nodes));
1817 }
1818 
1819 static int64_t
get_module_flags(struct ntd_context * ctx)1820 get_module_flags(struct ntd_context *ctx)
1821 {
1822    /* See the DXIL documentation for the definition of these flags:
1823     *
1824     * https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-flags
1825     */
1826 
1827    uint64_t flags = 0;
1828    if (ctx->mod.feats.doubles)
1829       flags |= (1 << 2);
1830    if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
1831        ctx->shader->info.fs.early_fragment_tests)
1832       flags |= (1 << 3);
1833    if (ctx->mod.raw_and_structured_buffers)
1834       flags |= (1 << 4);
1835    if (ctx->mod.feats.min_precision)
1836       flags |= (1 << 5);
1837    if (ctx->mod.feats.dx11_1_double_extensions)
1838       flags |= (1 << 6);
1839    if (ctx->mod.feats.array_layer_from_vs_or_ds)
1840       flags |= (1 << 9);
1841    if (ctx->mod.feats.inner_coverage)
1842       flags |= (1 << 10);
1843    if (ctx->mod.feats.stencil_ref)
1844       flags |= (1 << 11);
1845    if (ctx->mod.feats.tiled_resources)
1846       flags |= (1 << 12);
1847    if (ctx->mod.feats.typed_uav_load_additional_formats)
1848       flags |= (1 << 13);
1849    if (ctx->mod.feats.use_64uavs)
1850       flags |= (1 << 15);
1851    if (ctx->mod.feats.uavs_at_every_stage)
1852       flags |= (1 << 16);
1853    if (ctx->mod.feats.cs_4x_raw_sb)
1854       flags |= (1 << 17);
1855    if (ctx->mod.feats.rovs)
1856       flags |= (1 << 18);
1857    if (ctx->mod.feats.wave_ops)
1858       flags |= (1 << 19);
1859    if (ctx->mod.feats.int64_ops)
1860       flags |= (1 << 20);
1861    if (ctx->mod.feats.view_id)
1862       flags |= (1 << 21);
1863    if (ctx->mod.feats.barycentrics)
1864       flags |= (1 << 22);
1865    if (ctx->mod.feats.native_low_precision)
1866       flags |= (1 << 23) | (1 << 5);
1867    if (ctx->mod.feats.shading_rate)
1868       flags |= (1 << 24);
1869    if (ctx->mod.feats.raytracing_tier_1_1)
1870       flags |= (1 << 25);
1871    if (ctx->mod.feats.sampler_feedback)
1872       flags |= (1 << 26);
1873    if (ctx->mod.feats.atomic_int64_typed)
1874       flags |= (1 << 27);
1875    if (ctx->mod.feats.atomic_int64_tgsm)
1876       flags |= (1 << 28);
1877    if (ctx->mod.feats.derivatives_in_mesh_or_amp)
1878       flags |= (1 << 29);
1879    if (ctx->mod.feats.resource_descriptor_heap_indexing)
1880       flags |= (1 << 30);
1881    if (ctx->mod.feats.sampler_descriptor_heap_indexing)
1882       flags |= (1ull << 31);
1883    if (ctx->mod.feats.atomic_int64_heap_resource)
1884       flags |= (1ull << 32);
1885    if (ctx->mod.feats.advanced_texture_ops)
1886       flags |= (1ull << 34);
1887    if (ctx->mod.feats.writable_msaa)
1888       flags |= (1ull << 35);
1889    // Bit 36 is wave MMA
1890    if (ctx->mod.feats.sample_cmp_bias_gradient)
1891       flags |= (1ull << 37);
1892    if (ctx->mod.feats.extended_command_info)
1893       flags |= (1ull << 38);
1894 
1895    if (ctx->opts->disable_math_refactoring)
1896       flags |= (1 << 1);
1897 
1898    /* Work around https://github.com/microsoft/DirectXShaderCompiler/issues/4616
1899     * When targeting SM6.7 and with at least one UAV, if no other flags are present,
1900     * set the resources-may-not-alias flag, or else the DXIL validator may end up
1901     * with uninitialized memory which will fail validation, due to missing that flag.
1902     */
1903    if (flags == 0 && ctx->mod.minor_version >= 7 && ctx->num_uavs > 0)
1904       flags |= (1ull << 33);
1905 
1906    return flags;
1907 }
1908 
1909 static const struct dxil_mdnode *
emit_entrypoint(struct ntd_context * ctx,const struct dxil_func * func,const char * name,const struct dxil_mdnode * signatures,const struct dxil_mdnode * resources,const struct dxil_mdnode * shader_props)1910 emit_entrypoint(struct ntd_context *ctx,
1911                 const struct dxil_func *func, const char *name,
1912                 const struct dxil_mdnode *signatures,
1913                 const struct dxil_mdnode *resources,
1914                 const struct dxil_mdnode *shader_props)
1915 {
1916    char truncated_name[254] = { 0 };
1917    strncpy(truncated_name, name, ARRAY_SIZE(truncated_name) - 1);
1918 
1919    const struct dxil_mdnode *func_md = dxil_get_metadata_func(&ctx->mod, func);
1920    const struct dxil_mdnode *name_md = dxil_get_metadata_string(&ctx->mod, truncated_name);
1921    const struct dxil_mdnode *nodes[] = {
1922       func_md,
1923       name_md,
1924       signatures,
1925       resources,
1926       shader_props
1927    };
1928    return dxil_get_metadata_node(&ctx->mod, nodes,
1929                                  ARRAY_SIZE(nodes));
1930 }
1931 
1932 static const struct dxil_mdnode *
emit_resources(struct ntd_context * ctx)1933 emit_resources(struct ntd_context *ctx)
1934 {
1935    bool emit_resources = false;
1936    const struct dxil_mdnode *resources_nodes[] = {
1937       NULL, NULL, NULL, NULL
1938    };
1939 
1940 #define ARRAY_AND_SIZE(arr) arr.data, util_dynarray_num_elements(&arr, const struct dxil_mdnode *)
1941 
1942    if (ctx->srv_metadata_nodes.size) {
1943       resources_nodes[0] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->srv_metadata_nodes));
1944       emit_resources = true;
1945    }
1946 
1947    if (ctx->uav_metadata_nodes.size) {
1948       resources_nodes[1] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->uav_metadata_nodes));
1949       emit_resources = true;
1950    }
1951 
1952    if (ctx->cbv_metadata_nodes.size) {
1953       resources_nodes[2] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->cbv_metadata_nodes));
1954       emit_resources = true;
1955    }
1956 
1957    if (ctx->sampler_metadata_nodes.size) {
1958       resources_nodes[3] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->sampler_metadata_nodes));
1959       emit_resources = true;
1960    }
1961 
1962 #undef ARRAY_AND_SIZE
1963 
1964    return emit_resources ?
1965       dxil_get_metadata_node(&ctx->mod, resources_nodes, ARRAY_SIZE(resources_nodes)): NULL;
1966 }
1967 
1968 static bool
emit_tag(struct ntd_context * ctx,enum dxil_shader_tag tag,const struct dxil_mdnode * value_node)1969 emit_tag(struct ntd_context *ctx, enum dxil_shader_tag tag,
1970          const struct dxil_mdnode *value_node)
1971 {
1972    const struct dxil_mdnode *tag_node = dxil_get_metadata_int32(&ctx->mod, tag);
1973    if (!tag_node || !value_node)
1974       return false;
1975    assert(ctx->num_shader_property_nodes <= ARRAY_SIZE(ctx->shader_property_nodes) - 2);
1976    ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = tag_node;
1977    ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = value_node;
1978 
1979    return true;
1980 }
1981 
1982 static bool
emit_metadata(struct ntd_context * ctx)1983 emit_metadata(struct ntd_context *ctx)
1984 {
1985    /* DXIL versions are 1.x for shader model 6.x */
1986    assert(ctx->mod.major_version == 6);
1987    unsigned dxilMajor = 1;
1988    unsigned dxilMinor = ctx->mod.minor_version;
1989    unsigned valMajor = ctx->mod.major_validator;
1990    unsigned valMinor = ctx->mod.minor_validator;
1991    if (!emit_llvm_ident(&ctx->mod) ||
1992        !emit_named_version(&ctx->mod, "dx.version", dxilMajor, dxilMinor) ||
1993        !emit_named_version(&ctx->mod, "dx.valver", valMajor, valMinor) ||
1994        !emit_dx_shader_model(&ctx->mod))
1995       return false;
1996 
1997    const struct dxil_func_def *main_func_def = ctx->main_func_def;
1998    if (!main_func_def)
1999       return false;
2000    const struct dxil_func *main_func = main_func_def->func;
2001 
2002    const struct dxil_mdnode *resources_node = emit_resources(ctx);
2003 
2004    const struct dxil_mdnode *main_entrypoint = dxil_get_metadata_func(&ctx->mod, main_func);
2005    const struct dxil_mdnode *node27 = dxil_get_metadata_node(&ctx->mod, NULL, 0);
2006 
2007    const struct dxil_mdnode *node4 = dxil_get_metadata_int32(&ctx->mod, 0);
2008    const struct dxil_mdnode *nodes_4_27_27[] = {
2009       node4, node27, node27
2010    };
2011    const struct dxil_mdnode *node28 = dxil_get_metadata_node(&ctx->mod, nodes_4_27_27,
2012                                                       ARRAY_SIZE(nodes_4_27_27));
2013 
2014    const struct dxil_mdnode *node29 = dxil_get_metadata_node(&ctx->mod, &node28, 1);
2015 
2016    const struct dxil_mdnode *node3 = dxil_get_metadata_int32(&ctx->mod, 1);
2017    const struct dxil_mdnode *main_type_annotation_nodes[] = {
2018       node3, main_entrypoint, node29
2019    };
2020    const struct dxil_mdnode *main_type_annotation = dxil_get_metadata_node(&ctx->mod, main_type_annotation_nodes,
2021                                                                            ARRAY_SIZE(main_type_annotation_nodes));
2022 
2023    if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {
2024       if (!emit_tag(ctx, DXIL_SHADER_TAG_GS_STATE, emit_gs_state(ctx)))
2025          return false;
2026    } else if (ctx->mod.shader_kind == DXIL_HULL_SHADER) {
2027       ctx->tess_input_control_point_count = 32;
2028       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
2029          if (nir_is_arrayed_io(var, MESA_SHADER_TESS_CTRL)) {
2030             ctx->tess_input_control_point_count = glsl_array_size(var->type);
2031             break;
2032          }
2033       }
2034 
2035       if (!emit_tag(ctx, DXIL_SHADER_TAG_HS_STATE, emit_hs_state(ctx)))
2036          return false;
2037    } else if (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
2038       if (!emit_tag(ctx, DXIL_SHADER_TAG_DS_STATE, emit_ds_state(ctx)))
2039          return false;
2040    } else if (ctx->mod.shader_kind == DXIL_COMPUTE_SHADER) {
2041       if (!emit_tag(ctx, DXIL_SHADER_TAG_NUM_THREADS, emit_threads(ctx)))
2042          return false;
2043       if (ctx->mod.minor_version >= 6 &&
2044           ctx->shader->info.subgroup_size >= SUBGROUP_SIZE_REQUIRE_4) {
2045          if (ctx->mod.minor_version < 8) {
2046             if (!emit_tag(ctx, DXIL_SHADER_TAG_WAVE_SIZE, emit_wave_size(ctx)))
2047                return false;
2048          } else {
2049             if (!emit_tag(ctx, DXIL_SHADER_TAG_WAVE_SIZE_RANGE, emit_wave_size_range(ctx)))
2050                return false;
2051          }
2052       }
2053    }
2054 
2055    uint64_t flags = get_module_flags(ctx);
2056    if (flags != 0) {
2057       if (!emit_tag(ctx, DXIL_SHADER_TAG_FLAGS, dxil_get_metadata_int64(&ctx->mod, flags)))
2058          return false;
2059    }
2060    const struct dxil_mdnode *shader_properties = NULL;
2061    if (ctx->num_shader_property_nodes > 0) {
2062       shader_properties = dxil_get_metadata_node(&ctx->mod, ctx->shader_property_nodes,
2063                                                  ctx->num_shader_property_nodes);
2064       if (!shader_properties)
2065          return false;
2066    }
2067 
2068    nir_function_impl *entry_func_impl = nir_shader_get_entrypoint(ctx->shader);
2069    const struct dxil_mdnode *dx_entry_point = emit_entrypoint(ctx, main_func,
2070        entry_func_impl->function->name, get_signatures(&ctx->mod), resources_node, shader_properties);
2071    if (!dx_entry_point)
2072       return false;
2073 
2074    if (resources_node) {
2075       const struct dxil_mdnode *dx_resources = resources_node;
2076       dxil_add_metadata_named_node(&ctx->mod, "dx.resources",
2077                                        &dx_resources, 1);
2078    }
2079 
2080    if (ctx->mod.minor_version >= 2 &&
2081        dxil_nir_analyze_io_dependencies(&ctx->mod, ctx->shader)) {
2082       const struct dxil_type *i32_type = dxil_module_get_int_type(&ctx->mod, 32);
2083       if (!i32_type)
2084          return false;
2085 
2086       const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, i32_type, ctx->mod.serialized_dependency_table_size);
2087       if (!array_type)
2088          return false;
2089 
2090       const struct dxil_value **array_entries = malloc(sizeof(const struct value *) * ctx->mod.serialized_dependency_table_size);
2091       if (!array_entries)
2092          return false;
2093 
2094       for (uint32_t i = 0; i < ctx->mod.serialized_dependency_table_size; ++i)
2095          array_entries[i] = dxil_module_get_int32_const(&ctx->mod, ctx->mod.serialized_dependency_table[i]);
2096       const struct dxil_value *array_val = dxil_module_get_array_const(&ctx->mod, array_type, array_entries);
2097       free((void *)array_entries);
2098 
2099       const struct dxil_mdnode *view_id_state_val = dxil_get_metadata_value(&ctx->mod, array_type, array_val);
2100       if (!view_id_state_val)
2101          return false;
2102 
2103       const struct dxil_mdnode *view_id_state_node = dxil_get_metadata_node(&ctx->mod, &view_id_state_val, 1);
2104 
2105       dxil_add_metadata_named_node(&ctx->mod, "dx.viewIdState", &view_id_state_node, 1);
2106    }
2107 
2108    const struct dxil_mdnode *dx_type_annotations[] = { main_type_annotation };
2109    return dxil_add_metadata_named_node(&ctx->mod, "dx.typeAnnotations",
2110                                        dx_type_annotations,
2111                                        ARRAY_SIZE(dx_type_annotations)) &&
2112           dxil_add_metadata_named_node(&ctx->mod, "dx.entryPoints",
2113                                        &dx_entry_point, 1);
2114 }
2115 
2116 static const struct dxil_value *
bitcast_to_int(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)2117 bitcast_to_int(struct ntd_context *ctx, unsigned bit_size,
2118                const struct dxil_value *value)
2119 {
2120    const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size);
2121    if (!type)
2122       return NULL;
2123 
2124    return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
2125 }
2126 
2127 static const struct dxil_value *
bitcast_to_float(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)2128 bitcast_to_float(struct ntd_context *ctx, unsigned bit_size,
2129                  const struct dxil_value *value)
2130 {
2131    const struct dxil_type *type = dxil_module_get_float_type(&ctx->mod, bit_size);
2132    if (!type)
2133       return NULL;
2134 
2135    return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
2136 }
2137 
2138 static bool
is_phi_src(nir_def * ssa)2139 is_phi_src(nir_def *ssa)
2140 {
2141    nir_foreach_use(src, ssa)
2142       if (nir_src_parent_instr(src)->type == nir_instr_type_phi)
2143          return true;
2144    return false;
2145 }
2146 
2147 static void
store_ssa_def(struct ntd_context * ctx,nir_def * ssa,unsigned chan,const struct dxil_value * value)2148 store_ssa_def(struct ntd_context *ctx, nir_def *ssa, unsigned chan,
2149               const struct dxil_value *value)
2150 {
2151    assert(ssa->index < ctx->num_defs);
2152    assert(chan < ssa->num_components);
2153    /* Insert bitcasts for phi srcs in the parent block */
2154    if (is_phi_src(ssa)) {
2155       /* Prefer ints over floats if it could be both or if we have no type info */
2156       nir_alu_type expect_type =
2157          BITSET_TEST(ctx->int_types, ssa->index) ? nir_type_int :
2158          (BITSET_TEST(ctx->float_types, ssa->index) ? nir_type_float :
2159           nir_type_int);
2160       assert(ssa->bit_size != 1 || expect_type == nir_type_int);
2161       if (ssa->bit_size != 1 && expect_type != dxil_type_to_nir_type(dxil_value_get_type(value)))
2162          value = dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST,
2163                                 expect_type == nir_type_int ?
2164                                  dxil_module_get_int_type(&ctx->mod, ssa->bit_size) :
2165                                  dxil_module_get_float_type(&ctx->mod, ssa->bit_size), value);
2166       if (ssa->bit_size == 64) {
2167          if (expect_type == nir_type_int)
2168             ctx->mod.feats.int64_ops = true;
2169          if (expect_type == nir_type_float)
2170             ctx->mod.feats.doubles = true;
2171       }
2172    }
2173    ctx->defs[ssa->index].chans[chan] = value;
2174 }
2175 
2176 static void
store_def(struct ntd_context * ctx,nir_def * def,unsigned chan,const struct dxil_value * value)2177 store_def(struct ntd_context *ctx, nir_def *def, unsigned chan,
2178            const struct dxil_value *value)
2179 {
2180    const struct dxil_type *type = dxil_value_get_type(value);
2181    if (type == ctx->mod.float64_type)
2182       ctx->mod.feats.doubles = true;
2183    if (type == ctx->mod.float16_type ||
2184        type == ctx->mod.int16_type)
2185       ctx->mod.feats.min_precision = true;
2186    if (type == ctx->mod.int64_type)
2187       ctx->mod.feats.int64_ops = true;
2188    store_ssa_def(ctx, def, chan, value);
2189 }
2190 
2191 static void
store_alu_dest(struct ntd_context * ctx,nir_alu_instr * alu,unsigned chan,const struct dxil_value * value)2192 store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan,
2193                const struct dxil_value *value)
2194 {
2195    store_def(ctx, &alu->def, chan, value);
2196 }
2197 
2198 static const struct dxil_value *
get_src_ssa(struct ntd_context * ctx,const nir_def * ssa,unsigned chan)2199 get_src_ssa(struct ntd_context *ctx, const nir_def *ssa, unsigned chan)
2200 {
2201    assert(ssa->index < ctx->num_defs);
2202    assert(chan < ssa->num_components);
2203    assert(ctx->defs[ssa->index].chans[chan]);
2204    return ctx->defs[ssa->index].chans[chan];
2205 }
2206 
2207 static const struct dxil_value *
get_src(struct ntd_context * ctx,nir_src * src,unsigned chan,nir_alu_type type)2208 get_src(struct ntd_context *ctx, nir_src *src, unsigned chan,
2209         nir_alu_type type)
2210 {
2211    const struct dxil_value *value = get_src_ssa(ctx, src->ssa, chan);
2212 
2213    const int bit_size = nir_src_bit_size(*src);
2214 
2215    switch (nir_alu_type_get_base_type(type)) {
2216    case nir_type_int:
2217    case nir_type_uint: {
2218       const struct dxil_type *expect_type =  dxil_module_get_int_type(&ctx->mod, bit_size);
2219       /* nohing to do */
2220       if (dxil_value_type_equal_to(value, expect_type)) {
2221          assert(bit_size != 64 || ctx->mod.feats.int64_ops);
2222          return value;
2223       }
2224       if (bit_size == 64) {
2225          assert(ctx->mod.feats.doubles);
2226          ctx->mod.feats.int64_ops = true;
2227       }
2228       if (bit_size == 16)
2229          ctx->mod.feats.native_low_precision = true;
2230       assert(dxil_value_type_bitsize_equal_to(value, bit_size));
2231       return bitcast_to_int(ctx,  bit_size, value);
2232       }
2233 
2234    case nir_type_float:
2235       assert(nir_src_bit_size(*src) >= 16);
2236       if (dxil_value_type_equal_to(value, dxil_module_get_float_type(&ctx->mod, bit_size))) {
2237          assert(nir_src_bit_size(*src) != 64 || ctx->mod.feats.doubles);
2238          return value;
2239       }
2240       if (bit_size == 64) {
2241          assert(ctx->mod.feats.int64_ops);
2242          ctx->mod.feats.doubles = true;
2243       }
2244       if (bit_size == 16)
2245          ctx->mod.feats.native_low_precision = true;
2246       assert(dxil_value_type_bitsize_equal_to(value, bit_size));
2247       return bitcast_to_float(ctx, bit_size, value);
2248 
2249    case nir_type_bool:
2250       if (!dxil_value_type_bitsize_equal_to(value, 1)) {
2251          return dxil_emit_cast(&ctx->mod, DXIL_CAST_TRUNC,
2252                                dxil_module_get_int_type(&ctx->mod, 1), value);
2253       }
2254       return value;
2255 
2256    default:
2257       unreachable("unexpected nir_alu_type");
2258    }
2259 }
2260 
2261 static const struct dxil_value *
get_alu_src(struct ntd_context * ctx,nir_alu_instr * alu,unsigned src)2262 get_alu_src(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)
2263 {
2264    unsigned chan = alu->src[src].swizzle[0];
2265    return get_src(ctx, &alu->src[src].src, chan,
2266                   nir_op_infos[alu->op].input_types[src]);
2267 }
2268 
2269 static bool
emit_binop(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)2270 emit_binop(struct ntd_context *ctx, nir_alu_instr *alu,
2271            enum dxil_bin_opcode opcode,
2272            const struct dxil_value *op0, const struct dxil_value *op1)
2273 {
2274    bool is_float_op = nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float;
2275 
2276    enum dxil_opt_flags flags = 0;
2277    if (is_float_op && !alu->exact)
2278       flags |= DXIL_UNSAFE_ALGEBRA;
2279 
2280    const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, flags);
2281    if (!v)
2282       return false;
2283    store_alu_dest(ctx, alu, 0, v);
2284    return true;
2285 }
2286 
2287 static bool
emit_shift(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)2288 emit_shift(struct ntd_context *ctx, nir_alu_instr *alu,
2289            enum dxil_bin_opcode opcode,
2290            const struct dxil_value *op0, const struct dxil_value *op1)
2291 {
2292    unsigned op0_bit_size = nir_src_bit_size(alu->src[0].src);
2293    unsigned op1_bit_size = nir_src_bit_size(alu->src[1].src);
2294 
2295    uint64_t shift_mask = op0_bit_size - 1;
2296    if (!nir_src_is_const(alu->src[1].src)) {
2297       if (op0_bit_size != op1_bit_size) {
2298          const struct dxil_type *type =
2299             dxil_module_get_int_type(&ctx->mod, op0_bit_size);
2300          enum dxil_cast_opcode cast_op =
2301             op1_bit_size < op0_bit_size ? DXIL_CAST_ZEXT : DXIL_CAST_TRUNC;
2302          op1 = dxil_emit_cast(&ctx->mod, cast_op, type, op1);
2303       }
2304       op1 = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND,
2305                             op1,
2306                             dxil_module_get_int_const(&ctx->mod, shift_mask, op0_bit_size),
2307                             0);
2308    } else {
2309       uint64_t val = nir_scalar_as_uint(
2310          nir_scalar_chase_alu_src(nir_get_scalar(&alu->def, 0), 1));
2311       op1 = dxil_module_get_int_const(&ctx->mod, val & shift_mask, op0_bit_size);
2312    }
2313 
2314    const struct dxil_value *v =
2315       dxil_emit_binop(&ctx->mod, opcode, op0, op1, 0);
2316    if (!v)
2317       return false;
2318    store_alu_dest(ctx, alu, 0, v);
2319    return true;
2320 }
2321 
2322 static bool
emit_cmp(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_cmp_pred pred,const struct dxil_value * op0,const struct dxil_value * op1)2323 emit_cmp(struct ntd_context *ctx, nir_alu_instr *alu,
2324          enum dxil_cmp_pred pred,
2325          const struct dxil_value *op0, const struct dxil_value *op1)
2326 {
2327    const struct dxil_value *v = dxil_emit_cmp(&ctx->mod, pred, op0, op1);
2328    if (!v)
2329       return false;
2330    store_alu_dest(ctx, alu, 0, v);
2331    return true;
2332 }
2333 
2334 static enum dxil_cast_opcode
get_cast_op(nir_alu_instr * alu)2335 get_cast_op(nir_alu_instr *alu)
2336 {
2337    unsigned dst_bits = alu->def.bit_size;
2338    unsigned src_bits = nir_src_bit_size(alu->src[0].src);
2339 
2340    switch (alu->op) {
2341    /* bool -> int */
2342    case nir_op_b2i16:
2343    case nir_op_b2i32:
2344    case nir_op_b2i64:
2345       return DXIL_CAST_ZEXT;
2346 
2347    /* float -> float */
2348    case nir_op_f2f16_rtz:
2349    case nir_op_f2f16:
2350    case nir_op_f2fmp:
2351    case nir_op_f2f32:
2352    case nir_op_f2f64:
2353       assert(dst_bits != src_bits);
2354       if (dst_bits < src_bits)
2355          return DXIL_CAST_FPTRUNC;
2356       else
2357          return DXIL_CAST_FPEXT;
2358 
2359    /* int -> int */
2360    case nir_op_i2i1:
2361    case nir_op_i2i16:
2362    case nir_op_i2imp:
2363    case nir_op_i2i32:
2364    case nir_op_i2i64:
2365       assert(dst_bits != src_bits);
2366       if (dst_bits < src_bits)
2367          return DXIL_CAST_TRUNC;
2368       else
2369          return DXIL_CAST_SEXT;
2370 
2371    /* uint -> uint */
2372    case nir_op_u2u1:
2373    case nir_op_u2u16:
2374    case nir_op_u2u32:
2375    case nir_op_u2u64:
2376       assert(dst_bits != src_bits);
2377       if (dst_bits < src_bits)
2378          return DXIL_CAST_TRUNC;
2379       else
2380          return DXIL_CAST_ZEXT;
2381 
2382    /* float -> int */
2383    case nir_op_f2i16:
2384    case nir_op_f2imp:
2385    case nir_op_f2i32:
2386    case nir_op_f2i64:
2387       return DXIL_CAST_FPTOSI;
2388 
2389    /* float -> uint */
2390    case nir_op_f2u16:
2391    case nir_op_f2ump:
2392    case nir_op_f2u32:
2393    case nir_op_f2u64:
2394       return DXIL_CAST_FPTOUI;
2395 
2396    /* int -> float */
2397    case nir_op_i2f16:
2398    case nir_op_i2fmp:
2399    case nir_op_i2f32:
2400    case nir_op_i2f64:
2401       return DXIL_CAST_SITOFP;
2402 
2403    /* uint -> float */
2404    case nir_op_u2f16:
2405    case nir_op_u2fmp:
2406    case nir_op_u2f32:
2407    case nir_op_u2f64:
2408       return DXIL_CAST_UITOFP;
2409 
2410    default:
2411       unreachable("unexpected cast op");
2412    }
2413 }
2414 
2415 static const struct dxil_type *
get_cast_dest_type(struct ntd_context * ctx,nir_alu_instr * alu)2416 get_cast_dest_type(struct ntd_context *ctx, nir_alu_instr *alu)
2417 {
2418    unsigned dst_bits = alu->def.bit_size;
2419    switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type)) {
2420    case nir_type_bool:
2421       assert(dst_bits == 1);
2422       FALLTHROUGH;
2423    case nir_type_int:
2424    case nir_type_uint:
2425       return dxil_module_get_int_type(&ctx->mod, dst_bits);
2426 
2427    case nir_type_float:
2428       return dxil_module_get_float_type(&ctx->mod, dst_bits);
2429 
2430    default:
2431       unreachable("unknown nir_alu_type");
2432    }
2433 }
2434 
2435 static bool
is_double(nir_alu_type alu_type,unsigned bit_size)2436 is_double(nir_alu_type alu_type, unsigned bit_size)
2437 {
2438    return nir_alu_type_get_base_type(alu_type) == nir_type_float &&
2439           bit_size == 64;
2440 }
2441 
2442 static bool
emit_cast(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * value)2443 emit_cast(struct ntd_context *ctx, nir_alu_instr *alu,
2444           const struct dxil_value *value)
2445 {
2446    enum dxil_cast_opcode opcode = get_cast_op(alu);
2447    const struct dxil_type *type = get_cast_dest_type(ctx, alu);
2448    if (!type)
2449       return false;
2450 
2451    const nir_op_info *info = &nir_op_infos[alu->op];
2452    switch (opcode) {
2453    case DXIL_CAST_UITOFP:
2454    case DXIL_CAST_SITOFP:
2455       if (is_double(info->output_type, alu->def.bit_size))
2456          ctx->mod.feats.dx11_1_double_extensions = true;
2457       break;
2458    case DXIL_CAST_FPTOUI:
2459    case DXIL_CAST_FPTOSI:
2460       if (is_double(info->input_types[0], nir_src_bit_size(alu->src[0].src)))
2461          ctx->mod.feats.dx11_1_double_extensions = true;
2462       break;
2463    default:
2464       break;
2465    }
2466 
2467    if (alu->def.bit_size == 16) {
2468       switch (alu->op) {
2469       case nir_op_f2fmp:
2470       case nir_op_i2imp:
2471       case nir_op_f2imp:
2472       case nir_op_f2ump:
2473       case nir_op_i2fmp:
2474       case nir_op_u2fmp:
2475          break;
2476       default:
2477          ctx->mod.feats.native_low_precision = true;
2478       }
2479    }
2480 
2481    const struct dxil_value *v = dxil_emit_cast(&ctx->mod, opcode, type,
2482                                                value);
2483    if (!v)
2484       return false;
2485    store_alu_dest(ctx, alu, 0, v);
2486    return true;
2487 }
2488 
2489 static enum overload_type
get_overload(nir_alu_type alu_type,unsigned bit_size)2490 get_overload(nir_alu_type alu_type, unsigned bit_size)
2491 {
2492    switch (nir_alu_type_get_base_type(alu_type)) {
2493    case nir_type_int:
2494    case nir_type_uint:
2495    case nir_type_bool:
2496       switch (bit_size) {
2497       case 1: return DXIL_I1;
2498       case 16: return DXIL_I16;
2499       case 32: return DXIL_I32;
2500       case 64: return DXIL_I64;
2501       default:
2502          unreachable("unexpected bit_size");
2503       }
2504    case nir_type_float:
2505       switch (bit_size) {
2506       case 16: return DXIL_F16;
2507       case 32: return DXIL_F32;
2508       case 64: return DXIL_F64;
2509       default:
2510          unreachable("unexpected bit_size");
2511       }
2512    case nir_type_invalid:
2513       return DXIL_NONE;
2514    default:
2515       unreachable("unexpected output type");
2516    }
2517 }
2518 
2519 static enum overload_type
get_ambiguous_overload(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum overload_type default_type)2520 get_ambiguous_overload(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2521                        enum overload_type default_type)
2522 {
2523    if (BITSET_TEST(ctx->int_types, intr->def.index))
2524       return get_overload(nir_type_int, intr->def.bit_size);
2525    if (BITSET_TEST(ctx->float_types, intr->def.index))
2526       return get_overload(nir_type_float, intr->def.bit_size);
2527    return default_type;
2528 }
2529 
2530 static enum overload_type
get_ambiguous_overload_alu_type(struct ntd_context * ctx,nir_intrinsic_instr * intr,nir_alu_type alu_type)2531 get_ambiguous_overload_alu_type(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2532                                 nir_alu_type alu_type)
2533 {
2534    return get_ambiguous_overload(ctx, intr, get_overload(alu_type, intr->def.bit_size));
2535 }
2536 
2537 static bool
emit_unary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op)2538 emit_unary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2539                  enum dxil_intr intr, const struct dxil_value *op)
2540 {
2541    const nir_op_info *info = &nir_op_infos[alu->op];
2542    unsigned src_bits = nir_src_bit_size(alu->src[0].src);
2543    enum overload_type overload = get_overload(info->input_types[0], src_bits);
2544 
2545    const struct dxil_value *v = emit_unary_call(ctx, overload, intr, op);
2546    if (!v)
2547       return false;
2548    store_alu_dest(ctx, alu, 0, v);
2549    return true;
2550 }
2551 
2552 static bool
emit_binary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)2553 emit_binary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2554                   enum dxil_intr intr,
2555                   const struct dxil_value *op0, const struct dxil_value *op1)
2556 {
2557    const nir_op_info *info = &nir_op_infos[alu->op];
2558    assert(info->output_type == info->input_types[0]);
2559    assert(info->output_type == info->input_types[1]);
2560    unsigned dst_bits = alu->def.bit_size;
2561    assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2562    assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2563    enum overload_type overload = get_overload(info->output_type, dst_bits);
2564 
2565    const struct dxil_value *v = emit_binary_call(ctx, overload, intr,
2566                                                  op0, op1);
2567    if (!v)
2568       return false;
2569    store_alu_dest(ctx, alu, 0, v);
2570    return true;
2571 }
2572 
2573 static bool
emit_tertiary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)2574 emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2575                     enum dxil_intr intr,
2576                     const struct dxil_value *op0,
2577                     const struct dxil_value *op1,
2578                     const struct dxil_value *op2)
2579 {
2580    const nir_op_info *info = &nir_op_infos[alu->op];
2581    unsigned dst_bits = alu->def.bit_size;
2582    assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2583    assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2584    assert(nir_src_bit_size(alu->src[2].src) == dst_bits);
2585 
2586    assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[0], dst_bits));
2587    assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[1], dst_bits));
2588    assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[2], dst_bits));
2589 
2590    enum overload_type overload = get_overload(info->output_type, dst_bits);
2591 
2592    const struct dxil_value *v = emit_tertiary_call(ctx, overload, intr,
2593                                                    op0, op1, op2);
2594    if (!v)
2595       return false;
2596    store_alu_dest(ctx, alu, 0, v);
2597    return true;
2598 }
2599 
2600 static bool
emit_derivative(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_intr dxil_intr)2601 emit_derivative(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2602                  enum dxil_intr dxil_intr)
2603 {
2604    const struct dxil_value *src = get_src(ctx, &intr->src[0], 0, nir_type_float);
2605    enum overload_type overload = get_overload(nir_type_float, intr->src[0].ssa->bit_size);
2606    const struct dxil_value *v = emit_unary_call(ctx, overload, dxil_intr, src);
2607    if (!v)
2608       return false;
2609    store_def(ctx, &intr->def, 0, v);
2610    return true;
2611 }
2612 
2613 static bool
emit_bitfield_insert(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * base,const struct dxil_value * insert,const struct dxil_value * offset,const struct dxil_value * width)2614 emit_bitfield_insert(struct ntd_context *ctx, nir_alu_instr *alu,
2615                      const struct dxil_value *base,
2616                      const struct dxil_value *insert,
2617                      const struct dxil_value *offset,
2618                      const struct dxil_value *width)
2619 {
2620    /* DXIL is width, offset, insert, base, NIR is base, insert, offset, width */
2621    const struct dxil_value *v = emit_quaternary_call(ctx, DXIL_I32, DXIL_INTR_BFI,
2622                                                      width, offset, insert, base);
2623    if (!v)
2624       return false;
2625 
2626    /* DXIL uses the 5 LSB from width/offset. Special-case width >= 32 == copy insert. */
2627    const struct dxil_value *compare_width = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_SGE,
2628       width, dxil_module_get_int32_const(&ctx->mod, 32));
2629    v = dxil_emit_select(&ctx->mod, compare_width, insert, v);
2630    store_alu_dest(ctx, alu, 0, v);
2631    return true;
2632 }
2633 
2634 static bool
emit_dot4add_packed(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * src0,const struct dxil_value * src1,const struct dxil_value * accum)2635 emit_dot4add_packed(struct ntd_context *ctx, nir_alu_instr *alu,
2636                     enum dxil_intr intr,
2637                     const struct dxil_value *src0,
2638                     const struct dxil_value *src1,
2639                     const struct dxil_value *accum)
2640 {
2641    const struct dxil_func *f = dxil_get_function(&ctx->mod, "dx.op.dot4AddPacked", DXIL_I32);
2642    if (!f)
2643       return false;
2644    const struct dxil_value *srcs[] = { dxil_module_get_int32_const(&ctx->mod, intr), accum, src0, src1 };
2645    const struct dxil_value *v = dxil_emit_call(&ctx->mod, f, srcs, ARRAY_SIZE(srcs));
2646    if (!v)
2647       return false;
2648 
2649    store_alu_dest(ctx, alu, 0, v);
2650    return true;
2651 }
2652 
emit_select(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * sel,const struct dxil_value * val_true,const struct dxil_value * val_false)2653 static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu,
2654                         const struct dxil_value *sel,
2655                         const struct dxil_value *val_true,
2656                         const struct dxil_value *val_false)
2657 {
2658    assert(sel);
2659    assert(val_true);
2660    assert(val_false);
2661 
2662    const struct dxil_value *v = dxil_emit_select(&ctx->mod, sel, val_true, val_false);
2663    if (!v)
2664       return false;
2665 
2666    store_alu_dest(ctx, alu, 0, v);
2667    return true;
2668 }
2669 
2670 static bool
emit_b2f16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2671 emit_b2f16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2672 {
2673    assert(val);
2674 
2675    struct dxil_module *m = &ctx->mod;
2676 
2677    const struct dxil_value *c1 = dxil_module_get_float16_const(m, 0x3C00);
2678    const struct dxil_value *c0 = dxil_module_get_float16_const(m, 0);
2679 
2680    if (!c0 || !c1)
2681       return false;
2682 
2683    return emit_select(ctx, alu, val, c1, c0);
2684 }
2685 
2686 static bool
emit_b2f32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2687 emit_b2f32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2688 {
2689    assert(val);
2690 
2691    struct dxil_module *m = &ctx->mod;
2692 
2693    const struct dxil_value *c1 = dxil_module_get_float_const(m, 1.0f);
2694    const struct dxil_value *c0 = dxil_module_get_float_const(m, 0.0f);
2695 
2696    if (!c0 || !c1)
2697       return false;
2698 
2699    return emit_select(ctx, alu, val, c1, c0);
2700 }
2701 
2702 static bool
emit_b2f64(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2703 emit_b2f64(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2704 {
2705    assert(val);
2706 
2707    struct dxil_module *m = &ctx->mod;
2708 
2709    const struct dxil_value *c1 = dxil_module_get_double_const(m, 1.0);
2710    const struct dxil_value *c0 = dxil_module_get_double_const(m, 0.0);
2711 
2712    if (!c0 || !c1)
2713       return false;
2714 
2715    ctx->mod.feats.doubles = 1;
2716    return emit_select(ctx, alu, val, c1, c0);
2717 }
2718 
2719 static bool
emit_f16tof32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val,bool shift)2720 emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val, bool shift)
2721 {
2722    if (shift) {
2723       val = dxil_emit_binop(&ctx->mod, DXIL_BINOP_LSHR, val,
2724          dxil_module_get_int32_const(&ctx->mod, 16), 0);
2725       if (!val)
2726          return false;
2727    }
2728 
2729    const struct dxil_func *func = dxil_get_function(&ctx->mod,
2730                                                     "dx.op.legacyF16ToF32",
2731                                                     DXIL_NONE);
2732    if (!func)
2733       return false;
2734 
2735    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F16TOF32);
2736    if (!opcode)
2737       return false;
2738 
2739    const struct dxil_value *args[] = {
2740      opcode,
2741      val
2742    };
2743 
2744    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2745    if (!v)
2746       return false;
2747    store_alu_dest(ctx, alu, 0, v);
2748    return true;
2749 }
2750 
2751 static bool
emit_f32tof16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val0,const struct dxil_value * val1)2752 emit_f32tof16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val0, const struct dxil_value *val1)
2753 {
2754    const struct dxil_func *func = dxil_get_function(&ctx->mod,
2755                                                     "dx.op.legacyF32ToF16",
2756                                                     DXIL_NONE);
2757    if (!func)
2758       return false;
2759 
2760    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F32TOF16);
2761    if (!opcode)
2762       return false;
2763 
2764    const struct dxil_value *args[] = {
2765      opcode,
2766      val0
2767    };
2768 
2769    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2770    if (!v)
2771       return false;
2772 
2773    if (!nir_src_is_const(alu->src[1].src) || nir_src_as_int(alu->src[1].src) != 0) {
2774       args[1] = val1;
2775       const struct dxil_value *v_high = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2776       if (!v_high)
2777          return false;
2778 
2779       v_high = dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL, v_high,
2780          dxil_module_get_int32_const(&ctx->mod, 16), 0);
2781       if (!v_high)
2782          return false;
2783 
2784       v = dxil_emit_binop(&ctx->mod, DXIL_BINOP_OR, v, v_high, 0);
2785       if (!v)
2786          return false;
2787    }
2788 
2789    store_alu_dest(ctx, alu, 0, v);
2790    return true;
2791 }
2792 
2793 static bool
emit_vec(struct ntd_context * ctx,nir_alu_instr * alu,unsigned num_inputs)2794 emit_vec(struct ntd_context *ctx, nir_alu_instr *alu, unsigned num_inputs)
2795 {
2796    for (unsigned i = 0; i < num_inputs; i++) {
2797       const struct dxil_value *src =
2798          get_src_ssa(ctx, alu->src[i].src.ssa, alu->src[i].swizzle[0]);
2799       if (!src)
2800          return false;
2801 
2802       store_alu_dest(ctx, alu, i, src);
2803    }
2804    return true;
2805 }
2806 
2807 static bool
emit_make_double(struct ntd_context * ctx,nir_alu_instr * alu)2808 emit_make_double(struct ntd_context *ctx, nir_alu_instr *alu)
2809 {
2810    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.makeDouble", DXIL_F64);
2811    if (!func)
2812       return false;
2813 
2814    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_MAKE_DOUBLE);
2815    if (!opcode)
2816       return false;
2817 
2818    const struct dxil_value *args[3] = {
2819       opcode,
2820       get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_uint32),
2821       get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[1], nir_type_uint32),
2822    };
2823    if (!args[1] || !args[2])
2824       return false;
2825 
2826    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2827    if (!v)
2828       return false;
2829    store_def(ctx, &alu->def, 0, v);
2830    return true;
2831 }
2832 
2833 static bool
emit_split_double(struct ntd_context * ctx,nir_alu_instr * alu)2834 emit_split_double(struct ntd_context *ctx, nir_alu_instr *alu)
2835 {
2836    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.splitDouble", DXIL_F64);
2837    if (!func)
2838       return false;
2839 
2840    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SPLIT_DOUBLE);
2841    if (!opcode)
2842       return false;
2843 
2844    const struct dxil_value *args[] = {
2845       opcode,
2846       get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_float64)
2847    };
2848    if (!args[1])
2849       return false;
2850 
2851    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2852    if (!v)
2853       return false;
2854 
2855    const struct dxil_value *hi = dxil_emit_extractval(&ctx->mod, v, 0);
2856    const struct dxil_value *lo = dxil_emit_extractval(&ctx->mod, v, 1);
2857    if (!hi || !lo)
2858       return false;
2859 
2860    store_def(ctx, &alu->def, 0, hi);
2861    store_def(ctx, &alu->def, 1, lo);
2862    return true;
2863 }
2864 
2865 static bool
emit_alu(struct ntd_context * ctx,nir_alu_instr * alu)2866 emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
2867 {
2868    /* handle vec-instructions first; they are the only ones that produce
2869     * vector results.
2870     */
2871    switch (alu->op) {
2872    case nir_op_vec2:
2873    case nir_op_vec3:
2874    case nir_op_vec4:
2875    case nir_op_vec8:
2876    case nir_op_vec16:
2877       return emit_vec(ctx, alu, nir_op_infos[alu->op].num_inputs);
2878    case nir_op_mov: {
2879          assert(alu->def.num_components == 1);
2880          store_ssa_def(ctx, &alu->def, 0, get_src_ssa(ctx,
2881                         alu->src->src.ssa, alu->src->swizzle[0]));
2882          return true;
2883       }
2884    case nir_op_pack_double_2x32_dxil:
2885       return emit_make_double(ctx, alu);
2886    case nir_op_unpack_double_2x32_dxil:
2887       return emit_split_double(ctx, alu);
2888    case nir_op_bcsel: {
2889       /* Handled here to avoid type forced bitcast to int, since bcsel is used for ints and floats.
2890        * Ideally, the back-typing got both sources to match, but if it didn't, explicitly get src1's type */
2891       const struct dxil_value *src1 = get_src_ssa(ctx, alu->src[1].src.ssa, alu->src[1].swizzle[0]);
2892       nir_alu_type src1_type = dxil_type_to_nir_type(dxil_value_get_type(src1));
2893       return emit_select(ctx, alu,
2894                          get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_bool),
2895                          src1,
2896                          get_src(ctx, &alu->src[2].src, alu->src[2].swizzle[0], src1_type));
2897    }
2898    default:
2899       /* silence warnings */
2900       ;
2901    }
2902 
2903    /* other ops should be scalar */
2904    const struct dxil_value *src[4];
2905    assert(nir_op_infos[alu->op].num_inputs <= 4);
2906    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
2907       src[i] = get_alu_src(ctx, alu, i);
2908       if (!src[i])
2909          return false;
2910    }
2911 
2912    switch (alu->op) {
2913    case nir_op_iadd:
2914    case nir_op_fadd: return emit_binop(ctx, alu, DXIL_BINOP_ADD, src[0], src[1]);
2915 
2916    case nir_op_isub:
2917    case nir_op_fsub: return emit_binop(ctx, alu, DXIL_BINOP_SUB, src[0], src[1]);
2918 
2919    case nir_op_imul:
2920    case nir_op_fmul: return emit_binop(ctx, alu, DXIL_BINOP_MUL, src[0], src[1]);
2921 
2922    case nir_op_fdiv:
2923       if (alu->def.bit_size == 64)
2924          ctx->mod.feats.dx11_1_double_extensions = 1;
2925       return emit_binop(ctx, alu, DXIL_BINOP_SDIV, src[0], src[1]);
2926 
2927    case nir_op_idiv:
2928    case nir_op_udiv:
2929       if (nir_src_is_const(alu->src[1].src)) {
2930          /* It's illegal to emit a literal divide by 0 in DXIL */
2931          nir_scalar divisor = nir_scalar_chase_alu_src(nir_get_scalar(&alu->def, 0), 1);
2932          if (nir_scalar_as_int(divisor) == 0) {
2933             store_alu_dest(ctx, alu, 0,
2934                            dxil_module_get_int_const(&ctx->mod, 0, alu->def.bit_size));
2935             return true;
2936          }
2937       }
2938       return emit_binop(ctx, alu, alu->op == nir_op_idiv ? DXIL_BINOP_SDIV : DXIL_BINOP_UDIV, src[0], src[1]);
2939 
2940    case nir_op_irem: return emit_binop(ctx, alu, DXIL_BINOP_SREM, src[0], src[1]);
2941    case nir_op_imod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2942    case nir_op_umod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2943    case nir_op_ishl: return emit_shift(ctx, alu, DXIL_BINOP_SHL, src[0], src[1]);
2944    case nir_op_ishr: return emit_shift(ctx, alu, DXIL_BINOP_ASHR, src[0], src[1]);
2945    case nir_op_ushr: return emit_shift(ctx, alu, DXIL_BINOP_LSHR, src[0], src[1]);
2946    case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]);
2947    case nir_op_ior:  return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]);
2948    case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]);
2949    case nir_op_inot: {
2950       unsigned bit_size = alu->def.bit_size;
2951       intmax_t val = bit_size == 1 ? 1 : -1;
2952       const struct dxil_value *negative_one = dxil_module_get_int_const(&ctx->mod, val, bit_size);
2953       return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], negative_one);
2954    }
2955    case nir_op_ieq:  return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]);
2956    case nir_op_ine:  return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]);
2957    case nir_op_ige:  return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]);
2958    case nir_op_uge:  return emit_cmp(ctx, alu, DXIL_ICMP_UGE, src[0], src[1]);
2959    case nir_op_ilt:  return emit_cmp(ctx, alu, DXIL_ICMP_SLT, src[0], src[1]);
2960    case nir_op_ult:  return emit_cmp(ctx, alu, DXIL_ICMP_ULT, src[0], src[1]);
2961    case nir_op_feq:  return emit_cmp(ctx, alu, DXIL_FCMP_OEQ, src[0], src[1]);
2962    case nir_op_fneu: return emit_cmp(ctx, alu, DXIL_FCMP_UNE, src[0], src[1]);
2963    case nir_op_flt:  return emit_cmp(ctx, alu, DXIL_FCMP_OLT, src[0], src[1]);
2964    case nir_op_fge:  return emit_cmp(ctx, alu, DXIL_FCMP_OGE, src[0], src[1]);
2965    case nir_op_ftrunc: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_Z, src[0]);
2966    case nir_op_fabs: return emit_unary_intin(ctx, alu, DXIL_INTR_FABS, src[0]);
2967    case nir_op_fcos: return emit_unary_intin(ctx, alu, DXIL_INTR_FCOS, src[0]);
2968    case nir_op_fsin: return emit_unary_intin(ctx, alu, DXIL_INTR_FSIN, src[0]);
2969    case nir_op_fceil: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_PI, src[0]);
2970    case nir_op_fexp2: return emit_unary_intin(ctx, alu, DXIL_INTR_FEXP2, src[0]);
2971    case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]);
2972    case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]);
2973    case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]);
2974    case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]);
2975    case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]);
2976 
2977    case nir_op_fround_even: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NE, src[0]);
2978    case nir_op_frcp: {
2979       const struct dxil_value *one;
2980       switch (alu->def.bit_size) {
2981       case 16:
2982          one = dxil_module_get_float16_const(&ctx->mod, 0x3C00);
2983          break;
2984       case 32:
2985          one = dxil_module_get_float_const(&ctx->mod, 1.0f);
2986          break;
2987       case 64:
2988          one = dxil_module_get_double_const(&ctx->mod, 1.0);
2989          break;
2990       default: unreachable("Invalid float size");
2991       }
2992       return emit_binop(ctx, alu, DXIL_BINOP_SDIV, one, src[0]);
2993    }
2994    case nir_op_fsat: return emit_unary_intin(ctx, alu, DXIL_INTR_SATURATE, src[0]);
2995    case nir_op_bit_count: return emit_unary_intin(ctx, alu, DXIL_INTR_COUNTBITS, src[0]);
2996    case nir_op_bitfield_reverse: return emit_unary_intin(ctx, alu, DXIL_INTR_BFREV, src[0]);
2997    case nir_op_ufind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_HI, src[0]);
2998    case nir_op_ifind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_SHI, src[0]);
2999    case nir_op_find_lsb: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_LO, src[0]);
3000    case nir_op_imax: return emit_binary_intin(ctx, alu, DXIL_INTR_IMAX, src[0], src[1]);
3001    case nir_op_imin: return emit_binary_intin(ctx, alu, DXIL_INTR_IMIN, src[0], src[1]);
3002    case nir_op_umax: return emit_binary_intin(ctx, alu, DXIL_INTR_UMAX, src[0], src[1]);
3003    case nir_op_umin: return emit_binary_intin(ctx, alu, DXIL_INTR_UMIN, src[0], src[1]);
3004    case nir_op_frsq: return emit_unary_intin(ctx, alu, DXIL_INTR_RSQRT, src[0]);
3005    case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]);
3006    case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]);
3007    case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]);
3008    case nir_op_ffma:
3009       if (alu->def.bit_size == 64)
3010          ctx->mod.feats.dx11_1_double_extensions = 1;
3011       return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]);
3012 
3013    case nir_op_ibfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_IBFE, src[2], src[1], src[0]);
3014    case nir_op_ubfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_UBFE, src[2], src[1], src[0]);
3015    case nir_op_bitfield_insert: return emit_bitfield_insert(ctx, alu, src[0], src[1], src[2], src[3]);
3016 
3017    case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0], false);
3018    case nir_op_unpack_half_2x16_split_y: return emit_f16tof32(ctx, alu, src[0], true);
3019    case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0], src[1]);
3020 
3021    case nir_op_sdot_4x8_iadd: return emit_dot4add_packed(ctx, alu, DXIL_INTR_DOT4_ADD_I8_PACKED, src[0], src[1], src[2]);
3022    case nir_op_udot_4x8_uadd: return emit_dot4add_packed(ctx, alu, DXIL_INTR_DOT4_ADD_U8_PACKED, src[0], src[1], src[2]);
3023 
3024    case nir_op_i2i1:
3025    case nir_op_u2u1:
3026    case nir_op_b2i16:
3027    case nir_op_i2i16:
3028    case nir_op_i2imp:
3029    case nir_op_f2i16:
3030    case nir_op_f2imp:
3031    case nir_op_f2u16:
3032    case nir_op_f2ump:
3033    case nir_op_u2u16:
3034    case nir_op_u2f16:
3035    case nir_op_u2fmp:
3036    case nir_op_i2f16:
3037    case nir_op_i2fmp:
3038    case nir_op_f2f16_rtz:
3039    case nir_op_f2f16:
3040    case nir_op_f2fmp:
3041    case nir_op_b2i32:
3042    case nir_op_f2f32:
3043    case nir_op_f2i32:
3044    case nir_op_f2u32:
3045    case nir_op_i2f32:
3046    case nir_op_i2i32:
3047    case nir_op_u2f32:
3048    case nir_op_u2u32:
3049    case nir_op_b2i64:
3050    case nir_op_f2f64:
3051    case nir_op_f2i64:
3052    case nir_op_f2u64:
3053    case nir_op_i2f64:
3054    case nir_op_i2i64:
3055    case nir_op_u2f64:
3056    case nir_op_u2u64:
3057       return emit_cast(ctx, alu, src[0]);
3058 
3059    case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]);
3060    case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]);
3061    case nir_op_b2f64: return emit_b2f64(ctx, alu, src[0]);
3062    default:
3063       log_nir_instr_unsupported(ctx->logger, "Unimplemented ALU instruction",
3064                                 &alu->instr);
3065       return false;
3066    }
3067 }
3068 
3069 static const struct dxil_value *
load_ubo(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * offset,enum overload_type overload)3070 load_ubo(struct ntd_context *ctx, const struct dxil_value *handle,
3071          const struct dxil_value *offset, enum overload_type overload)
3072 {
3073    assert(handle && offset);
3074 
3075    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CBUFFER_LOAD_LEGACY);
3076    if (!opcode)
3077       return NULL;
3078 
3079    const struct dxil_value *args[] = {
3080       opcode, handle, offset
3081    };
3082 
3083    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cbufferLoadLegacy", overload);
3084    if (!func)
3085       return NULL;
3086    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3087 }
3088 
3089 static bool
emit_barrier_impl(struct ntd_context * ctx,nir_variable_mode modes,mesa_scope execution_scope,mesa_scope mem_scope)3090 emit_barrier_impl(struct ntd_context *ctx, nir_variable_mode modes, mesa_scope execution_scope, mesa_scope mem_scope)
3091 {
3092    const struct dxil_value *opcode, *mode;
3093    const struct dxil_func *func;
3094    uint32_t flags = 0;
3095 
3096    if (execution_scope == SCOPE_WORKGROUP)
3097       flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP;
3098 
3099    bool is_compute = ctx->mod.shader_kind == DXIL_COMPUTE_SHADER;
3100 
3101    if ((modes & (nir_var_mem_ssbo | nir_var_mem_global | nir_var_image)) &&
3102        (mem_scope > SCOPE_WORKGROUP || !is_compute)) {
3103       flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL;
3104    } else {
3105       flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP;
3106    }
3107 
3108    if ((modes & nir_var_mem_shared) && is_compute)
3109       flags |= DXIL_BARRIER_MODE_GROUPSHARED_MEM_FENCE;
3110 
3111    func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE);
3112    if (!func)
3113       return false;
3114 
3115    opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER);
3116    if (!opcode)
3117       return false;
3118 
3119    mode = dxil_module_get_int32_const(&ctx->mod, flags);
3120    if (!mode)
3121       return false;
3122 
3123    const struct dxil_value *args[] = { opcode, mode };
3124 
3125    return dxil_emit_call_void(&ctx->mod, func,
3126                               args, ARRAY_SIZE(args));
3127 }
3128 
3129 static bool
emit_barrier(struct ntd_context * ctx,nir_intrinsic_instr * intr)3130 emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3131 {
3132    return emit_barrier_impl(ctx,
3133       nir_intrinsic_memory_modes(intr),
3134       nir_intrinsic_execution_scope(intr),
3135       nir_intrinsic_memory_scope(intr));
3136 }
3137 
3138 static bool
emit_load_global_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3139 emit_load_global_invocation_id(struct ntd_context *ctx,
3140                                     nir_intrinsic_instr *intr)
3141 {
3142    nir_component_mask_t comps = nir_def_components_read(&intr->def);
3143 
3144    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3145       if (comps & (1 << i)) {
3146          const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
3147          if (!idx)
3148             return false;
3149          const struct dxil_value *globalid = emit_threadid_call(ctx, idx);
3150 
3151          if (!globalid)
3152             return false;
3153 
3154          store_def(ctx, &intr->def, i, globalid);
3155       }
3156    }
3157    return true;
3158 }
3159 
3160 static bool
emit_load_local_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3161 emit_load_local_invocation_id(struct ntd_context *ctx,
3162                               nir_intrinsic_instr *intr)
3163 {
3164    nir_component_mask_t comps = nir_def_components_read(&intr->def);
3165 
3166    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3167       if (comps & (1 << i)) {
3168          const struct dxil_value
3169             *idx = dxil_module_get_int32_const(&ctx->mod, i);
3170          if (!idx)
3171             return false;
3172          const struct dxil_value
3173             *threadidingroup = emit_threadidingroup_call(ctx, idx);
3174          if (!threadidingroup)
3175             return false;
3176          store_def(ctx, &intr->def, i, threadidingroup);
3177       }
3178    }
3179    return true;
3180 }
3181 
3182 static bool
emit_load_local_invocation_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)3183 emit_load_local_invocation_index(struct ntd_context *ctx,
3184                                  nir_intrinsic_instr *intr)
3185 {
3186    const struct dxil_value
3187       *flattenedthreadidingroup = emit_flattenedthreadidingroup_call(ctx);
3188    if (!flattenedthreadidingroup)
3189       return false;
3190    store_def(ctx, &intr->def, 0, flattenedthreadidingroup);
3191 
3192    return true;
3193 }
3194 
3195 static bool
emit_load_local_workgroup_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3196 emit_load_local_workgroup_id(struct ntd_context *ctx,
3197                               nir_intrinsic_instr *intr)
3198 {
3199    nir_component_mask_t comps = nir_def_components_read(&intr->def);
3200 
3201    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3202       if (comps & (1 << i)) {
3203          const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
3204          if (!idx)
3205             return false;
3206          const struct dxil_value *groupid = emit_groupid_call(ctx, idx);
3207          if (!groupid)
3208             return false;
3209          store_def(ctx, &intr->def, i, groupid);
3210       }
3211    }
3212    return true;
3213 }
3214 
3215 static const struct dxil_value *
call_unary_external_function(struct ntd_context * ctx,const char * name,int32_t dxil_intr,enum overload_type overload)3216 call_unary_external_function(struct ntd_context *ctx,
3217                              const char *name,
3218                              int32_t dxil_intr,
3219                              enum overload_type overload)
3220 {
3221    const struct dxil_func *func =
3222       dxil_get_function(&ctx->mod, name, overload);
3223    if (!func)
3224       return false;
3225 
3226    const struct dxil_value *opcode =
3227       dxil_module_get_int32_const(&ctx->mod, dxil_intr);
3228    if (!opcode)
3229       return false;
3230 
3231    const struct dxil_value *args[] = {opcode};
3232 
3233    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3234 }
3235 
3236 static bool
emit_load_unary_external_function(struct ntd_context * ctx,nir_intrinsic_instr * intr,const char * name,int32_t dxil_intr,nir_alu_type type)3237 emit_load_unary_external_function(struct ntd_context *ctx,
3238                                   nir_intrinsic_instr *intr, const char *name,
3239                                   int32_t dxil_intr,
3240                                   nir_alu_type type)
3241 {
3242    const struct dxil_value *value = call_unary_external_function(ctx, name, dxil_intr,
3243                                                                  get_overload(type, intr->def.bit_size));
3244    store_def(ctx, &intr->def, 0, value);
3245 
3246    return true;
3247 }
3248 
3249 static bool
emit_load_sample_mask_in(struct ntd_context * ctx,nir_intrinsic_instr * intr)3250 emit_load_sample_mask_in(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3251 {
3252    const struct dxil_value *value = call_unary_external_function(ctx,
3253       "dx.op.coverage", DXIL_INTR_COVERAGE, DXIL_I32);
3254 
3255    /* Mask coverage with (1 << sample index). Note, done as an AND to handle extrapolation cases. */
3256    if (ctx->mod.info.has_per_sample_input) {
3257       value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND, value,
3258          dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL,
3259             dxil_module_get_int32_const(&ctx->mod, 1),
3260             call_unary_external_function(ctx, "dx.op.sampleIndex", DXIL_INTR_SAMPLE_INDEX, DXIL_I32), 0), 0);
3261    }
3262 
3263    store_def(ctx, &intr->def, 0, value);
3264    return true;
3265 }
3266 
3267 static bool
emit_load_tess_coord(struct ntd_context * ctx,nir_intrinsic_instr * intr)3268 emit_load_tess_coord(struct ntd_context *ctx,
3269                      nir_intrinsic_instr *intr)
3270 {
3271    const struct dxil_func *func =
3272       dxil_get_function(&ctx->mod, "dx.op.domainLocation", DXIL_F32);
3273    if (!func)
3274       return false;
3275 
3276    const struct dxil_value *opcode =
3277       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DOMAIN_LOCATION);
3278    if (!opcode)
3279       return false;
3280 
3281    unsigned num_coords = ctx->shader->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES ? 3 : 2;
3282    for (unsigned i = 0; i < num_coords; ++i) {
3283       unsigned component_idx = i;
3284 
3285       const struct dxil_value *component = dxil_module_get_int8_const(&ctx->mod, component_idx);
3286       if (!component)
3287          return false;
3288 
3289       const struct dxil_value *args[] = { opcode, component };
3290 
3291       const struct dxil_value *value =
3292          dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3293       store_def(ctx, &intr->def, i, value);
3294    }
3295 
3296    for (unsigned i = num_coords; i < intr->def.num_components; ++i) {
3297       const struct dxil_value *value = dxil_module_get_float_const(&ctx->mod, 0.0f);
3298       store_def(ctx, &intr->def, i, value);
3299    }
3300 
3301    return true;
3302 }
3303 
3304 static const struct dxil_value *
get_int32_undef(struct dxil_module * m)3305 get_int32_undef(struct dxil_module *m)
3306 {
3307    const struct dxil_type *int32_type =
3308       dxil_module_get_int_type(m, 32);
3309    if (!int32_type)
3310       return NULL;
3311 
3312    return dxil_module_get_undef(m, int32_type);
3313 }
3314 
3315 static const struct dxil_value *
get_resource_handle(struct ntd_context * ctx,nir_src * src,enum dxil_resource_class class,enum dxil_resource_kind kind)3316 get_resource_handle(struct ntd_context *ctx, nir_src *src, enum dxil_resource_class class,
3317                     enum dxil_resource_kind kind)
3318 {
3319    /* This source might be one of:
3320     * 1. Constant resource index - just look it up in precomputed handle arrays
3321     *    If it's null in that array, create a handle
3322     * 2. A handle from load_vulkan_descriptor - just get the stored SSA value
3323     * 3. Dynamic resource index - create a handle for it here
3324     */
3325    assert(src->ssa->num_components == 1 && src->ssa->bit_size == 32);
3326    nir_const_value *const_block_index = nir_src_as_const_value(*src);
3327    const struct dxil_value *handle_entry = NULL;
3328    if (const_block_index) {
3329       assert(ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN);
3330       switch (kind) {
3331       case DXIL_RESOURCE_KIND_CBUFFER:
3332          handle_entry = ctx->cbv_handles[const_block_index->u32];
3333          break;
3334       case DXIL_RESOURCE_KIND_RAW_BUFFER:
3335          if (class == DXIL_RESOURCE_CLASS_UAV)
3336             handle_entry = ctx->ssbo_handles[const_block_index->u32];
3337          else
3338             handle_entry = ctx->srv_handles[const_block_index->u32];
3339          break;
3340       case DXIL_RESOURCE_KIND_SAMPLER:
3341          handle_entry = ctx->sampler_handles[const_block_index->u32];
3342          break;
3343       default:
3344          if (class == DXIL_RESOURCE_CLASS_UAV)
3345             handle_entry = ctx->image_handles[const_block_index->u32];
3346          else
3347             handle_entry = ctx->srv_handles[const_block_index->u32];
3348          break;
3349       }
3350    }
3351 
3352    if (handle_entry)
3353       return handle_entry;
3354 
3355    if (nir_src_as_deref(*src) ||
3356        ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
3357       return get_src_ssa(ctx, src->ssa, 0);
3358    }
3359 
3360    unsigned space = 0;
3361    if (ctx->opts->environment == DXIL_ENVIRONMENT_GL &&
3362        class == DXIL_RESOURCE_CLASS_UAV) {
3363       if (kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
3364          space = 2;
3365       else
3366          space = 1;
3367    }
3368 
3369    /* The base binding here will almost always be zero. The only cases where we end
3370     * up in this type of dynamic indexing are:
3371     * 1. GL UBOs
3372     * 2. GL SSBOs
3373     * 3. CL SSBOs
3374     * In all cases except GL UBOs, the resources are a single zero-based array.
3375     * In that case, the base is 1, because uniforms use 0 and cannot by dynamically
3376     * indexed. All other cases should either fall into static indexing (first early return),
3377     * deref-based dynamic handle creation (images, or Vulkan textures/samplers), or
3378     * load_vulkan_descriptor handle creation.
3379     */
3380    unsigned base_binding = 0;
3381    if (ctx->shader->info.first_ubo_is_default_ubo &&
3382        class == DXIL_RESOURCE_CLASS_CBV)
3383       base_binding = 1;
3384 
3385    const struct dxil_value *value = get_src(ctx, src, 0, nir_type_uint);
3386    const struct dxil_value *handle = emit_createhandle_call_dynamic(ctx, class,
3387       space, base_binding, value, !const_block_index);
3388 
3389    return handle;
3390 }
3391 
3392 static const struct dxil_value *
create_image_handle(struct ntd_context * ctx,nir_intrinsic_instr * image_intr)3393 create_image_handle(struct ntd_context *ctx, nir_intrinsic_instr *image_intr)
3394 {
3395    const struct dxil_value *unannotated_handle =
3396       emit_createhandle_heap(ctx, get_src(ctx, &image_intr->src[0], 0, nir_type_uint32), false, true /*TODO: divergence*/);
3397    const struct dxil_value *res_props =
3398       dxil_module_get_uav_res_props_const(&ctx->mod, image_intr);
3399 
3400    if (!unannotated_handle || !res_props)
3401       return NULL;
3402 
3403    return emit_annotate_handle(ctx, unannotated_handle, res_props);
3404 }
3405 
3406 static const struct dxil_value *
create_srv_handle(struct ntd_context * ctx,nir_tex_instr * tex,nir_src * src)3407 create_srv_handle(struct ntd_context *ctx, nir_tex_instr *tex, nir_src *src)
3408 {
3409    const struct dxil_value *unannotated_handle =
3410       emit_createhandle_heap(ctx, get_src(ctx, src, 0, nir_type_uint32), false, true /*TODO: divergence*/);
3411    const struct dxil_value *res_props =
3412       dxil_module_get_srv_res_props_const(&ctx->mod, tex);
3413 
3414    if (!unannotated_handle || !res_props)
3415       return NULL;
3416 
3417    return emit_annotate_handle(ctx, unannotated_handle, res_props);
3418 }
3419 
3420 static const struct dxil_value *
create_sampler_handle(struct ntd_context * ctx,bool is_shadow,nir_src * src)3421 create_sampler_handle(struct ntd_context *ctx, bool is_shadow, nir_src *src)
3422 {
3423    const struct dxil_value *unannotated_handle =
3424       emit_createhandle_heap(ctx, get_src(ctx, src, 0, nir_type_uint32), true, true /*TODO: divergence*/);
3425    const struct dxil_value *res_props =
3426       dxil_module_get_sampler_res_props_const(&ctx->mod, is_shadow);
3427 
3428    if (!unannotated_handle || !res_props)
3429       return NULL;
3430 
3431    return emit_annotate_handle(ctx, unannotated_handle, res_props);
3432 }
3433 
3434 static bool
emit_load_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)3435 emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3436 {
3437    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3438 
3439    enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
3440    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
3441       nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
3442       if (var && var->data.access & ACCESS_NON_WRITEABLE)
3443          class = DXIL_RESOURCE_CLASS_SRV;
3444    }
3445 
3446    const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
3447    const struct dxil_value *offset =
3448       get_src(ctx, &intr->src[1], 0, nir_type_uint);
3449    if (!int32_undef || !handle || !offset)
3450       return false;
3451 
3452    assert(nir_src_bit_size(intr->src[0]) == 32);
3453    assert(nir_intrinsic_dest_components(intr) <= 4);
3454 
3455    const struct dxil_value *coord[2] = {
3456       offset,
3457       int32_undef
3458    };
3459 
3460    enum overload_type overload = get_ambiguous_overload_alu_type(ctx, intr, nir_type_uint);
3461    const struct dxil_value *load = ctx->mod.minor_version >= 2 ?
3462       emit_raw_bufferload_call(ctx, handle, coord,
3463                                overload,
3464                                nir_intrinsic_dest_components(intr),
3465                                intr->def.bit_size / 8) :
3466       emit_bufferload_call(ctx, handle, coord, overload);
3467    if (!load)
3468       return false;
3469 
3470    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3471       const struct dxil_value *val =
3472          dxil_emit_extractval(&ctx->mod, load, i);
3473       if (!val)
3474          return false;
3475       store_def(ctx, &intr->def, i, val);
3476    }
3477    if (intr->def.bit_size == 16)
3478       ctx->mod.feats.native_low_precision = true;
3479    return true;
3480 }
3481 
3482 static bool
emit_store_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)3483 emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3484 {
3485    const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[1], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
3486    const struct dxil_value *offset =
3487       get_src(ctx, &intr->src[2], 0, nir_type_uint);
3488    if (!handle || !offset)
3489       return false;
3490 
3491    unsigned num_components = nir_src_num_components(intr->src[0]);
3492    assert(num_components <= 4);
3493    if (nir_src_bit_size(intr->src[0]) == 16)
3494       ctx->mod.feats.native_low_precision = true;
3495 
3496    nir_alu_type type =
3497       dxil_type_to_nir_type(dxil_value_get_type(get_src_ssa(ctx, intr->src[0].ssa, 0)));
3498    const struct dxil_value *value[4] = { 0 };
3499    for (unsigned i = 0; i < num_components; ++i) {
3500       value[i] = get_src(ctx, &intr->src[0], i, type);
3501       if (!value[i])
3502          return false;
3503    }
3504 
3505    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3506    if (!int32_undef)
3507       return false;
3508 
3509    const struct dxil_value *coord[2] = {
3510       offset,
3511       int32_undef
3512    };
3513 
3514    enum overload_type overload = get_overload(type, intr->src[0].ssa->bit_size);
3515    if (num_components < 4) {
3516       const struct dxil_value *value_undef = dxil_module_get_undef(&ctx->mod, dxil_value_get_type(value[0]));
3517       if (!value_undef)
3518          return false;
3519 
3520       for (int i = num_components; i < 4; ++i)
3521          value[i] = value_undef;
3522    }
3523 
3524    const struct dxil_value *write_mask =
3525       dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
3526    if (!write_mask)
3527       return false;
3528 
3529    return ctx->mod.minor_version >= 2 ?
3530       emit_raw_bufferstore_call(ctx, handle, coord, value, write_mask, overload, intr->src[0].ssa->bit_size / 8) :
3531       emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
3532 }
3533 
3534 static bool
emit_load_ubo_vec4(struct ntd_context * ctx,nir_intrinsic_instr * intr)3535 emit_load_ubo_vec4(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3536 {
3537    const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER);
3538    const struct dxil_value *offset =
3539       get_src(ctx, &intr->src[1], 0, nir_type_uint);
3540 
3541    if (!handle || !offset)
3542       return false;
3543 
3544    enum overload_type overload = get_ambiguous_overload_alu_type(ctx, intr, nir_type_uint);
3545    const struct dxil_value *agg = load_ubo(ctx, handle, offset, overload);
3546    if (!agg)
3547       return false;
3548 
3549    unsigned first_component = nir_intrinsic_has_component(intr) ?
3550       nir_intrinsic_component(intr) : 0;
3551    for (unsigned i = 0; i < intr->def.num_components; i++)
3552       store_def(ctx, &intr->def, i,
3553                  dxil_emit_extractval(&ctx->mod, agg, i + first_component));
3554 
3555    if (intr->def.bit_size == 16)
3556       ctx->mod.feats.native_low_precision = true;
3557    return true;
3558 }
3559 
3560 /* Need to add patch-ness as a matching parameter, since driver_location is *not* unique
3561  * between control points and patch variables in HS/DS
3562  */
3563 static nir_variable *
find_patch_matching_variable_by_driver_location(nir_shader * s,nir_variable_mode mode,unsigned driver_location,bool patch)3564 find_patch_matching_variable_by_driver_location(nir_shader *s, nir_variable_mode mode, unsigned driver_location, bool patch)
3565 {
3566    nir_foreach_variable_with_modes(var, s, mode) {
3567       if (var->data.driver_location == driver_location &&
3568           var->data.patch == patch)
3569          return var;
3570    }
3571    return NULL;
3572 }
3573 
3574 static bool
emit_store_output_via_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3575 emit_store_output_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3576 {
3577    assert(intr->intrinsic == nir_intrinsic_store_output ||
3578           ctx->mod.shader_kind == DXIL_HULL_SHADER);
3579    bool is_patch_constant = intr->intrinsic == nir_intrinsic_store_output &&
3580       ctx->mod.shader_kind == DXIL_HULL_SHADER;
3581    nir_alu_type out_type = nir_intrinsic_src_type(intr);
3582    enum overload_type overload = get_overload(out_type, intr->src[0].ssa->bit_size);
3583    const struct dxil_func *func = dxil_get_function(&ctx->mod, is_patch_constant ?
3584       "dx.op.storePatchConstant" : "dx.op.storeOutput",
3585       overload);
3586 
3587    if (!func)
3588       return false;
3589 
3590    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, is_patch_constant ?
3591       DXIL_INTR_STORE_PATCH_CONSTANT : DXIL_INTR_STORE_OUTPUT);
3592    uint8_t *io_mappings = is_patch_constant ? ctx->mod.patch_mappings : ctx->mod.output_mappings;
3593    uint8_t io_index = io_mappings[nir_intrinsic_base(intr)];
3594    const struct dxil_value *output_id = dxil_module_get_int32_const(&ctx->mod, io_index);
3595    unsigned row_index = intr->intrinsic == nir_intrinsic_store_output ? 1 : 2;
3596 
3597    /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3598     * generation, so muck with them here too.
3599     */
3600    nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3601    bool is_tess_level = is_patch_constant &&
3602                         (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3603                          semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3604 
3605    const struct dxil_value *row = NULL;
3606    const struct dxil_value *col = NULL;
3607    if (is_tess_level)
3608       col = dxil_module_get_int8_const(&ctx->mod, 0);
3609    else
3610       row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3611 
3612    bool success = true;
3613    uint32_t writemask = nir_intrinsic_write_mask(intr);
3614 
3615    nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_out, nir_intrinsic_base(intr), is_patch_constant);
3616    unsigned var_base_component = var->data.location_frac;
3617    unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3618 
3619    if (ctx->mod.minor_validator >= 5) {
3620       struct dxil_signature_record *sig_rec = is_patch_constant ?
3621          &ctx->mod.patch_consts[io_index] :
3622          &ctx->mod.outputs[io_index];
3623       unsigned comp_size = intr->src[0].ssa->bit_size == 64 ? 2 : 1;
3624       unsigned comp_mask = 0;
3625       if (is_tess_level)
3626          comp_mask = 1;
3627       else if (comp_size == 1)
3628          comp_mask = writemask << var_base_component;
3629       else {
3630          for (unsigned i = 0; i < intr->num_components; ++i)
3631             if ((writemask & (1 << i)))
3632                comp_mask |= 3 << ((i + var_base_component) * comp_size);
3633       }
3634       for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3635          sig_rec->elements[r].never_writes_mask &= ~comp_mask;
3636 
3637       if (!nir_src_is_const(intr->src[row_index])) {
3638          struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3639             &ctx->mod.psv_patch_consts[io_index] :
3640             &ctx->mod.psv_outputs[io_index];
3641          psv_rec->dynamic_mask_and_stream |= comp_mask;
3642       }
3643    }
3644 
3645    for (unsigned i = 0; i < intr->num_components && success; ++i) {
3646       if (writemask & (1 << i)) {
3647          if (is_tess_level)
3648             row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3649          else
3650             col = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3651          const struct dxil_value *value = get_src(ctx, &intr->src[0], i, out_type);
3652          if (!col || !row || !value)
3653             return false;
3654 
3655          const struct dxil_value *args[] = {
3656             opcode, output_id, row, col, value
3657          };
3658          success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3659       }
3660    }
3661 
3662    return success;
3663 }
3664 
3665 static bool
emit_load_input_via_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3666 emit_load_input_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3667 {
3668    bool attr_at_vertex = false;
3669    if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER &&
3670       ctx->opts->interpolate_at_vertex &&
3671       ctx->opts->provoking_vertex != 0 &&
3672       (nir_intrinsic_dest_type(intr) & nir_type_float)) {
3673       nir_variable *var = nir_find_variable_with_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr));
3674 
3675       attr_at_vertex = var && var->data.interpolation == INTERP_MODE_FLAT;
3676    }
3677 
3678    bool is_patch_constant = (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER &&
3679                              intr->intrinsic == nir_intrinsic_load_input) ||
3680                             (ctx->mod.shader_kind == DXIL_HULL_SHADER &&
3681                              intr->intrinsic == nir_intrinsic_load_output);
3682    bool is_output_control_point = intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3683 
3684    unsigned opcode_val;
3685    const char *func_name;
3686    if (attr_at_vertex) {
3687       opcode_val = DXIL_INTR_ATTRIBUTE_AT_VERTEX;
3688       func_name = "dx.op.attributeAtVertex";
3689       if (ctx->mod.minor_validator >= 6)
3690          ctx->mod.feats.barycentrics = 1;
3691    } else if (is_patch_constant) {
3692       opcode_val = DXIL_INTR_LOAD_PATCH_CONSTANT;
3693       func_name = "dx.op.loadPatchConstant";
3694    } else if (is_output_control_point) {
3695       opcode_val = DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT;
3696       func_name = "dx.op.loadOutputControlPoint";
3697    } else {
3698       opcode_val = DXIL_INTR_LOAD_INPUT;
3699       func_name = "dx.op.loadInput";
3700    }
3701 
3702    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3703    if (!opcode)
3704       return false;
3705 
3706    uint8_t *io_mappings =
3707       is_patch_constant ? ctx->mod.patch_mappings :
3708       is_output_control_point ? ctx->mod.output_mappings :
3709       ctx->mod.input_mappings;
3710    uint8_t io_index = io_mappings[nir_intrinsic_base(intr)];
3711    const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, io_index);
3712    if (!input_id)
3713       return false;
3714 
3715    bool is_per_vertex =
3716       intr->intrinsic == nir_intrinsic_load_per_vertex_input ||
3717       intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3718    int row_index = is_per_vertex ? 1 : 0;
3719    const struct dxil_value *vertex_id = NULL;
3720    if (!is_patch_constant) {
3721       if (is_per_vertex) {
3722          vertex_id = get_src(ctx, &intr->src[0], 0, nir_type_int);
3723       } else if (attr_at_vertex) {
3724          vertex_id = dxil_module_get_int8_const(&ctx->mod, ctx->opts->provoking_vertex);
3725       } else {
3726          const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
3727          if (!int32_type)
3728             return false;
3729 
3730          vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);
3731       }
3732       if (!vertex_id)
3733          return false;
3734    }
3735 
3736    /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3737     * generation, so muck with them here too.
3738     */
3739    nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3740    bool is_tess_level = is_patch_constant &&
3741                         (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3742                          semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3743 
3744    const struct dxil_value *row = NULL;
3745    const struct dxil_value *comp = NULL;
3746    if (is_tess_level)
3747       comp = dxil_module_get_int8_const(&ctx->mod, 0);
3748    else
3749       row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3750 
3751    nir_alu_type out_type = nir_intrinsic_dest_type(intr);
3752    enum overload_type overload = get_overload(out_type, intr->def.bit_size);
3753 
3754    const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, overload);
3755 
3756    if (!func)
3757       return false;
3758 
3759    nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), is_patch_constant);
3760    unsigned var_base_component = var ? var->data.location_frac : 0;
3761    unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3762 
3763    if (ctx->mod.minor_validator >= 5 &&
3764        !is_output_control_point &&
3765        intr->intrinsic != nir_intrinsic_load_output) {
3766       struct dxil_signature_record *sig_rec = is_patch_constant ?
3767          &ctx->mod.patch_consts[io_index] :
3768          &ctx->mod.inputs[io_index];
3769       unsigned comp_size = intr->def.bit_size == 64 ? 2 : 1;
3770       unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3771       comp_mask <<= (var_base_component * comp_size);
3772       if (is_tess_level)
3773          comp_mask = 1;
3774       for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3775          sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3776 
3777       if (!nir_src_is_const(intr->src[row_index])) {
3778          struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3779             &ctx->mod.psv_patch_consts[io_index] :
3780             &ctx->mod.psv_inputs[io_index];
3781          psv_rec->dynamic_mask_and_stream |= comp_mask;
3782       }
3783    }
3784 
3785    for (unsigned i = 0; i < intr->num_components; ++i) {
3786       if (is_tess_level)
3787          row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3788       else
3789          comp = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3790 
3791       if (!row || !comp)
3792          return false;
3793 
3794       const struct dxil_value *args[] = {
3795          opcode, input_id, row, comp, vertex_id
3796       };
3797 
3798       unsigned num_args = ARRAY_SIZE(args) - (is_patch_constant ? 1 : 0);
3799       const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3800       if (!retval)
3801          return false;
3802       store_def(ctx, &intr->def, i, retval);
3803    }
3804    return true;
3805 }
3806 
3807 static bool
emit_load_interpolated_input(struct ntd_context * ctx,nir_intrinsic_instr * intr)3808 emit_load_interpolated_input(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3809 {
3810    nir_intrinsic_instr *barycentric = nir_src_as_intrinsic(intr->src[0]);
3811 
3812    const struct dxil_value *args[6] = { 0 };
3813 
3814    unsigned opcode_val;
3815    const char *func_name;
3816    unsigned num_args;
3817    switch (barycentric->intrinsic) {
3818    case nir_intrinsic_load_barycentric_at_offset:
3819       opcode_val = DXIL_INTR_EVAL_SNAPPED;
3820       func_name = "dx.op.evalSnapped";
3821       num_args = 6;
3822       for (unsigned i = 0; i < 2; ++i) {
3823          const struct dxil_value *float_offset = get_src(ctx, &barycentric->src[0], i, nir_type_float);
3824          /* GLSL uses [-0.5f, 0.5f), DXIL uses (-8, 7) */
3825          const struct dxil_value *offset_16 = dxil_emit_binop(&ctx->mod,
3826             DXIL_BINOP_MUL, float_offset, dxil_module_get_float_const(&ctx->mod, 16.0f), 0);
3827          args[i + 4] = dxil_emit_cast(&ctx->mod, DXIL_CAST_FPTOSI,
3828             dxil_module_get_int_type(&ctx->mod, 32), offset_16);
3829       }
3830       break;
3831    case nir_intrinsic_load_barycentric_pixel:
3832       opcode_val = DXIL_INTR_EVAL_SNAPPED;
3833       func_name = "dx.op.evalSnapped";
3834       num_args = 6;
3835       args[4] = args[5] = dxil_module_get_int32_const(&ctx->mod, 0);
3836       break;
3837    case nir_intrinsic_load_barycentric_at_sample:
3838       opcode_val = DXIL_INTR_EVAL_SAMPLE_INDEX;
3839       func_name = "dx.op.evalSampleIndex";
3840       num_args = 5;
3841       args[4] = get_src(ctx, &barycentric->src[0], 0, nir_type_int);
3842       break;
3843    case nir_intrinsic_load_barycentric_centroid:
3844       opcode_val = DXIL_INTR_EVAL_CENTROID;
3845       func_name = "dx.op.evalCentroid";
3846       num_args = 4;
3847       break;
3848    default:
3849       unreachable("Unsupported interpolation barycentric intrinsic");
3850    }
3851    uint8_t io_index = ctx->mod.input_mappings[nir_intrinsic_base(intr)];
3852    args[0] = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3853    args[1] = dxil_module_get_int32_const(&ctx->mod, io_index);
3854    args[2] = get_src(ctx, &intr->src[1], 0, nir_type_int);
3855 
3856    const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, DXIL_F32);
3857 
3858    if (!func)
3859       return false;
3860 
3861    nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), false);
3862    unsigned var_base_component = var ? var->data.location_frac : 0;
3863    unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3864 
3865    if (ctx->mod.minor_validator >= 5) {
3866       struct dxil_signature_record *sig_rec = &ctx->mod.inputs[io_index];
3867       unsigned comp_size = intr->def.bit_size == 64 ? 2 : 1;
3868       unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3869       comp_mask <<= (var_base_component * comp_size);
3870       for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3871          sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3872 
3873       if (!nir_src_is_const(intr->src[1])) {
3874          struct dxil_psv_signature_element *psv_rec = &ctx->mod.psv_inputs[io_index];
3875          psv_rec->dynamic_mask_and_stream |= comp_mask;
3876       }
3877    }
3878 
3879    for (unsigned i = 0; i < intr->num_components; ++i) {
3880       args[3] = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3881 
3882       const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3883       if (!retval)
3884          return false;
3885       store_def(ctx, &intr->def, i, retval);
3886    }
3887    return true;
3888 }
3889 
3890 static const struct dxil_value *
deref_to_gep(struct ntd_context * ctx,nir_deref_instr * deref)3891 deref_to_gep(struct ntd_context *ctx, nir_deref_instr *deref)
3892 {
3893    nir_deref_path path;
3894    nir_deref_path_init(&path, deref, ctx->ralloc_ctx);
3895    assert(path.path[0]->deref_type == nir_deref_type_var);
3896    uint32_t count = 0;
3897    while (path.path[count])
3898       ++count;
3899 
3900    const struct dxil_value **gep_indices = ralloc_array(ctx->ralloc_ctx,
3901                                                        const struct dxil_value *,
3902                                                        count + 1);
3903    nir_variable *var = path.path[0]->var;
3904    const struct dxil_value **var_array;
3905    switch (deref->modes) {
3906    case nir_var_mem_constant: var_array = ctx->consts; break;
3907    case nir_var_mem_shared: var_array = ctx->sharedvars; break;
3908    case nir_var_function_temp: var_array = ctx->scratchvars; break;
3909    default: unreachable("Invalid deref mode");
3910    }
3911    gep_indices[0] = var_array[var->data.driver_location];
3912 
3913    for (uint32_t i = 0; i < count; ++i)
3914       gep_indices[i + 1] = get_src_ssa(ctx, &path.path[i]->def, 0);
3915 
3916    return dxil_emit_gep_inbounds(&ctx->mod, gep_indices, count + 1);
3917 }
3918 
3919 static bool
emit_load_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3920 emit_load_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3921 {
3922    const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3923    if (!ptr)
3924       return false;
3925 
3926    const struct dxil_value *retval =
3927       dxil_emit_load(&ctx->mod, ptr, intr->def.bit_size / 8, false);
3928    if (!retval)
3929       return false;
3930 
3931    store_def(ctx, &intr->def, 0, retval);
3932    return true;
3933 }
3934 
3935 static bool
emit_store_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3936 emit_store_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3937 {
3938    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
3939    const struct dxil_value *ptr = deref_to_gep(ctx, deref);
3940    if (!ptr)
3941       return false;
3942 
3943    const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, nir_get_nir_type_for_glsl_type(deref->type));
3944    return dxil_emit_store(&ctx->mod, value, ptr, nir_src_bit_size(intr->src[1]) / 8, false);
3945 }
3946 
3947 static bool
emit_atomic_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3948 emit_atomic_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3949 {
3950    const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3951    if (!ptr)
3952       return false;
3953 
3954    const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3955    if (!value)
3956       return false;
3957 
3958    enum dxil_rmw_op dxil_op = nir_atomic_to_dxil_rmw(nir_intrinsic_atomic_op(intr));
3959    const struct dxil_value *retval = dxil_emit_atomicrmw(&ctx->mod, value, ptr, dxil_op, false,
3960                                                          DXIL_ATOMIC_ORDERING_ACQREL,
3961                                                          DXIL_SYNC_SCOPE_CROSSTHREAD);
3962    if (!retval)
3963       return false;
3964 
3965    store_def(ctx, &intr->def, 0, retval);
3966    return true;
3967 }
3968 
3969 static bool
emit_atomic_deref_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)3970 emit_atomic_deref_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3971 {
3972    const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3973    if (!ptr)
3974       return false;
3975 
3976    const struct dxil_value *cmp = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3977    const struct dxil_value *value = get_src(ctx, &intr->src[2], 0, nir_type_uint);
3978    if (!value)
3979       return false;
3980 
3981    const struct dxil_value *retval = dxil_emit_cmpxchg(&ctx->mod, cmp, value, ptr, false,
3982                                                        DXIL_ATOMIC_ORDERING_ACQREL,
3983                                                        DXIL_SYNC_SCOPE_CROSSTHREAD);
3984    if (!retval)
3985       return false;
3986 
3987    store_def(ctx, &intr->def, 0, retval);
3988    return true;
3989 }
3990 
3991 static bool
emit_discard_if_with_value(struct ntd_context * ctx,const struct dxil_value * value)3992 emit_discard_if_with_value(struct ntd_context *ctx, const struct dxil_value *value)
3993 {
3994    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DISCARD);
3995    if (!opcode)
3996       return false;
3997 
3998    const struct dxil_value *args[] = {
3999      opcode,
4000      value
4001    };
4002 
4003    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.discard", DXIL_NONE);
4004    if (!func)
4005       return false;
4006 
4007    return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4008 }
4009 
4010 static bool
emit_discard_if(struct ntd_context * ctx,nir_intrinsic_instr * intr)4011 emit_discard_if(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4012 {
4013    const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_bool);
4014    if (!value)
4015       return false;
4016 
4017    return emit_discard_if_with_value(ctx, value);
4018 }
4019 
4020 static bool
emit_discard(struct ntd_context * ctx)4021 emit_discard(struct ntd_context *ctx)
4022 {
4023    const struct dxil_value *value = dxil_module_get_int1_const(&ctx->mod, true);
4024    return emit_discard_if_with_value(ctx, value);
4025 }
4026 
4027 static bool
emit_emit_vertex(struct ntd_context * ctx,nir_intrinsic_instr * intr)4028 emit_emit_vertex(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4029 {
4030    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_EMIT_STREAM);
4031    const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
4032    if (!opcode || !stream_id)
4033       return false;
4034 
4035    const struct dxil_value *args[] = {
4036      opcode,
4037      stream_id
4038    };
4039 
4040    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.emitStream", DXIL_NONE);
4041    if (!func)
4042       return false;
4043 
4044    return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4045 }
4046 
4047 static bool
emit_end_primitive(struct ntd_context * ctx,nir_intrinsic_instr * intr)4048 emit_end_primitive(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4049 {
4050    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CUT_STREAM);
4051    const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
4052    if (!opcode || !stream_id)
4053       return false;
4054 
4055    const struct dxil_value *args[] = {
4056      opcode,
4057      stream_id
4058    };
4059 
4060    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cutStream", DXIL_NONE);
4061    if (!func)
4062       return false;
4063 
4064    return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4065 }
4066 
4067 static bool
emit_image_store(struct ntd_context * ctx,nir_intrinsic_instr * intr)4068 emit_image_store(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4069 {
4070    const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_store ?
4071       create_image_handle(ctx, intr) :
4072       get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4073    if (!handle)
4074       return false;
4075 
4076    bool is_array = false;
4077    if (intr->intrinsic == nir_intrinsic_image_deref_store)
4078       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4079    else
4080       is_array = nir_intrinsic_image_array(intr);
4081 
4082    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4083    if (!int32_undef)
4084       return false;
4085 
4086    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4087    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_store ?
4088       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4089       nir_intrinsic_image_dim(intr);
4090    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4091    if (is_array)
4092       ++num_coords;
4093 
4094    assert(num_coords <= nir_src_num_components(intr->src[1]));
4095    for (unsigned i = 0; i < num_coords; ++i) {
4096       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4097       if (!coord[i])
4098          return false;
4099    }
4100 
4101    nir_alu_type in_type = nir_intrinsic_src_type(intr);
4102    enum overload_type overload = get_overload(in_type, 32);
4103 
4104    assert(nir_src_bit_size(intr->src[3]) == 32);
4105    unsigned num_components = nir_src_num_components(intr->src[3]);
4106    assert(num_components <= 4);
4107    const struct dxil_value *value[4];
4108    for (unsigned i = 0; i < num_components; ++i) {
4109       value[i] = get_src(ctx, &intr->src[3], i, in_type);
4110       if (!value[i])
4111          return false;
4112    }
4113 
4114    for (int i = num_components; i < 4; ++i)
4115       value[i] = dxil_module_get_undef(&ctx->mod, dxil_value_get_type(value[0]));
4116 
4117    const struct dxil_value *write_mask =
4118       dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
4119    if (!write_mask)
4120       return false;
4121 
4122    if (image_dim == GLSL_SAMPLER_DIM_BUF) {
4123       coord[1] = int32_undef;
4124       return emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
4125    } else
4126       return emit_texturestore_call(ctx, handle, coord, value, write_mask, overload);
4127 }
4128 
4129 static bool
emit_image_load(struct ntd_context * ctx,nir_intrinsic_instr * intr)4130 emit_image_load(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4131 {
4132    const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_load ?
4133       create_image_handle(ctx, intr) :
4134       get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4135    if (!handle)
4136       return false;
4137 
4138    bool is_array = false;
4139    if (intr->intrinsic == nir_intrinsic_image_deref_load)
4140       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4141    else
4142       is_array = nir_intrinsic_image_array(intr);
4143 
4144    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4145    if (!int32_undef)
4146       return false;
4147 
4148    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4149    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_load ?
4150       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4151       nir_intrinsic_image_dim(intr);
4152    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4153    if (is_array)
4154       ++num_coords;
4155 
4156    assert(num_coords <= nir_src_num_components(intr->src[1]));
4157    for (unsigned i = 0; i < num_coords; ++i) {
4158       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4159       if (!coord[i])
4160          return false;
4161    }
4162 
4163    nir_alu_type out_type = nir_intrinsic_dest_type(intr);
4164    enum overload_type overload = get_overload(out_type, 32);
4165 
4166    const struct dxil_value *load_result;
4167    if (image_dim == GLSL_SAMPLER_DIM_BUF) {
4168       coord[1] = int32_undef;
4169       load_result = emit_bufferload_call(ctx, handle, coord, overload);
4170    } else
4171       load_result = emit_textureload_call(ctx, handle, coord, overload);
4172 
4173    if (!load_result)
4174       return false;
4175 
4176    assert(intr->def.bit_size == 32);
4177    unsigned num_components = intr->def.num_components;
4178    assert(num_components <= 4);
4179    for (unsigned i = 0; i < num_components; ++i) {
4180       const struct dxil_value *component = dxil_emit_extractval(&ctx->mod, load_result, i);
4181       if (!component)
4182          return false;
4183       store_def(ctx, &intr->def, i, component);
4184    }
4185 
4186    if (util_format_get_nr_components(nir_intrinsic_format(intr)) > 1)
4187       ctx->mod.feats.typed_uav_load_additional_formats = true;
4188 
4189    return true;
4190 }
4191 
4192 static bool
emit_image_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4193 emit_image_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4194 {
4195    const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_atomic ?
4196       create_image_handle(ctx, intr) :
4197       get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4198    if (!handle)
4199       return false;
4200 
4201    bool is_array = false;
4202    if (intr->intrinsic == nir_intrinsic_image_deref_atomic)
4203       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4204    else
4205       is_array = nir_intrinsic_image_array(intr);
4206 
4207    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4208    if (!int32_undef)
4209       return false;
4210 
4211    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4212    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_atomic ?
4213       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4214       nir_intrinsic_image_dim(intr);
4215    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4216    if (is_array)
4217       ++num_coords;
4218 
4219    assert(num_coords <= nir_src_num_components(intr->src[1]));
4220    for (unsigned i = 0; i < num_coords; ++i) {
4221       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4222       if (!coord[i])
4223          return false;
4224    }
4225 
4226    nir_atomic_op nir_op = nir_intrinsic_atomic_op(intr);
4227    enum dxil_atomic_op dxil_op = nir_atomic_to_dxil_atomic(nir_op);
4228    nir_alu_type type = nir_atomic_op_type(nir_op);
4229    const struct dxil_value *value = get_src(ctx, &intr->src[3], 0, type);
4230    if (!value)
4231       return false;
4232 
4233    const struct dxil_value *retval =
4234       emit_atomic_binop(ctx, handle, dxil_op, coord, value);
4235 
4236    if (!retval)
4237       return false;
4238 
4239    store_def(ctx, &intr->def, 0, retval);
4240    return true;
4241 }
4242 
4243 static bool
emit_image_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)4244 emit_image_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4245 {
4246    const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_atomic_swap ?
4247       create_image_handle(ctx, intr) :
4248       get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4249    if (!handle)
4250       return false;
4251 
4252    bool is_array = false;
4253    if (intr->intrinsic == nir_intrinsic_image_deref_atomic_swap)
4254       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4255    else
4256       is_array = nir_intrinsic_image_array(intr);
4257 
4258    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4259    if (!int32_undef)
4260       return false;
4261 
4262    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4263    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ?
4264       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4265       nir_intrinsic_image_dim(intr);
4266    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4267    if (is_array)
4268       ++num_coords;
4269 
4270    assert(num_coords <= nir_src_num_components(intr->src[1]));
4271    for (unsigned i = 0; i < num_coords; ++i) {
4272       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4273       if (!coord[i])
4274          return false;
4275    }
4276 
4277    const struct dxil_value *cmpval = get_src(ctx, &intr->src[3], 0, nir_type_uint);
4278    const struct dxil_value *newval = get_src(ctx, &intr->src[4], 0, nir_type_uint);
4279    if (!cmpval || !newval)
4280       return false;
4281 
4282    const struct dxil_value *retval =
4283       emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
4284 
4285    if (!retval)
4286       return false;
4287 
4288    store_def(ctx, &intr->def, 0, retval);
4289    return true;
4290 }
4291 
4292 struct texop_parameters {
4293    const struct dxil_value *tex;
4294    const struct dxil_value *sampler;
4295    const struct dxil_value *bias, *lod_or_sample, *min_lod;
4296    const struct dxil_value *coord[4], *offset[3], *dx[3], *dy[3];
4297    const struct dxil_value *cmp;
4298    enum overload_type overload;
4299 };
4300 
4301 static const struct dxil_value *
emit_texture_size(struct ntd_context * ctx,struct texop_parameters * params)4302 emit_texture_size(struct ntd_context *ctx, struct texop_parameters *params)
4303 {
4304    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.getDimensions", DXIL_NONE);
4305    if (!func)
4306       return false;
4307 
4308    const struct dxil_value *args[] = {
4309       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_SIZE),
4310       params->tex,
4311       params->lod_or_sample
4312    };
4313 
4314    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4315 }
4316 
4317 static bool
emit_image_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)4318 emit_image_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4319 {
4320    const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_size ?
4321       create_image_handle(ctx, intr) :
4322       get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4323    if (!handle)
4324       return false;
4325 
4326    enum glsl_sampler_dim sampler_dim = intr->intrinsic == nir_intrinsic_image_deref_size ?
4327       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4328       nir_intrinsic_image_dim(intr);
4329    const struct dxil_value *lod = sampler_dim == GLSL_SAMPLER_DIM_BUF ?
4330       dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32)) :
4331       get_src(ctx, &intr->src[1], 0, nir_type_uint);
4332    if (!lod)
4333       return false;
4334 
4335    struct texop_parameters params = {
4336       .tex = handle,
4337       .lod_or_sample = lod
4338    };
4339    const struct dxil_value *dimensions = emit_texture_size(ctx, &params);
4340    if (!dimensions)
4341       return false;
4342 
4343    for (unsigned i = 0; i < intr->def.num_components; ++i) {
4344       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, i);
4345       store_def(ctx, &intr->def, i, retval);
4346    }
4347 
4348    return true;
4349 }
4350 
4351 static bool
emit_get_ssbo_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)4352 emit_get_ssbo_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4353 {
4354    enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
4355    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
4356       nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
4357       if (var && var->data.access & ACCESS_NON_WRITEABLE)
4358          class = DXIL_RESOURCE_CLASS_SRV;
4359    }
4360 
4361    const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
4362    if (!handle)
4363       return false;
4364 
4365    struct texop_parameters params = {
4366       .tex = handle,
4367       .lod_or_sample = dxil_module_get_undef(
4368                         &ctx->mod, dxil_module_get_int_type(&ctx->mod, 32))
4369    };
4370 
4371    const struct dxil_value *dimensions = emit_texture_size(ctx, &params);
4372    if (!dimensions)
4373       return false;
4374 
4375    const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, 0);
4376    store_def(ctx, &intr->def, 0, retval);
4377 
4378    return true;
4379 }
4380 
4381 static bool
emit_ssbo_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4382 emit_ssbo_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4383 {
4384    nir_atomic_op nir_op = nir_intrinsic_atomic_op(intr);
4385    enum dxil_atomic_op dxil_op = nir_atomic_to_dxil_atomic(nir_op);
4386    nir_alu_type type = nir_atomic_op_type(nir_op);
4387    const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
4388    const struct dxil_value *offset =
4389       get_src(ctx, &intr->src[1], 0, nir_type_uint);
4390    const struct dxil_value *value =
4391       get_src(ctx, &intr->src[2], 0, type);
4392 
4393    if (!value || !handle || !offset)
4394       return false;
4395 
4396    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4397    if (!int32_undef)
4398       return false;
4399 
4400    const struct dxil_value *coord[3] = {
4401       offset, int32_undef, int32_undef
4402    };
4403 
4404    const struct dxil_value *retval =
4405       emit_atomic_binop(ctx, handle, dxil_op, coord, value);
4406 
4407    if (!retval)
4408       return false;
4409 
4410    store_def(ctx, &intr->def, 0, retval);
4411    return true;
4412 }
4413 
4414 static bool
emit_ssbo_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)4415 emit_ssbo_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4416 {
4417    const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
4418    const struct dxil_value *offset =
4419       get_src(ctx, &intr->src[1], 0, nir_type_uint);
4420    const struct dxil_value *cmpval =
4421       get_src(ctx, &intr->src[2], 0, nir_type_int);
4422    const struct dxil_value *newval =
4423       get_src(ctx, &intr->src[3], 0, nir_type_int);
4424 
4425    if (!cmpval || !newval || !handle || !offset)
4426       return false;
4427 
4428    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4429    if (!int32_undef)
4430       return false;
4431 
4432    const struct dxil_value *coord[3] = {
4433       offset, int32_undef, int32_undef
4434    };
4435 
4436    const struct dxil_value *retval =
4437       emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
4438 
4439    if (!retval)
4440       return false;
4441 
4442    store_def(ctx, &intr->def, 0, retval);
4443    return true;
4444 }
4445 
4446 static bool
emit_vulkan_resource_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)4447 emit_vulkan_resource_index(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4448 {
4449    unsigned int binding = nir_intrinsic_binding(intr);
4450 
4451    bool const_index = nir_src_is_const(intr->src[0]);
4452    if (const_index) {
4453       binding += nir_src_as_const_value(intr->src[0])->u32;
4454    }
4455 
4456    const struct dxil_value *index_value = dxil_module_get_int32_const(&ctx->mod, binding);
4457    if (!index_value)
4458       return false;
4459 
4460    if (!const_index) {
4461       const struct dxil_value *offset = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4462       if (!offset)
4463          return false;
4464 
4465       index_value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, index_value, offset, 0);
4466       if (!index_value)
4467          return false;
4468    }
4469 
4470    store_def(ctx, &intr->def, 0, index_value);
4471    store_def(ctx, &intr->def, 1, dxil_module_get_int32_const(&ctx->mod, 0));
4472    return true;
4473 }
4474 
4475 static bool
emit_load_vulkan_descriptor(struct ntd_context * ctx,nir_intrinsic_instr * intr)4476 emit_load_vulkan_descriptor(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4477 {
4478    nir_intrinsic_instr* index = nir_src_as_intrinsic(intr->src[0]);
4479    const struct dxil_value *handle = NULL;
4480 
4481    enum dxil_resource_class resource_class;
4482    enum dxil_resource_kind resource_kind;
4483    switch (nir_intrinsic_desc_type(intr)) {
4484    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4485       resource_class = DXIL_RESOURCE_CLASS_CBV;
4486       resource_kind = DXIL_RESOURCE_KIND_CBUFFER;
4487       break;
4488    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4489       resource_class = DXIL_RESOURCE_CLASS_UAV;
4490       resource_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
4491       break;
4492    default:
4493       unreachable("unknown descriptor type");
4494       return false;
4495    }
4496 
4497    if (index && index->intrinsic == nir_intrinsic_vulkan_resource_index) {
4498       unsigned binding = nir_intrinsic_binding(index);
4499       unsigned space = nir_intrinsic_desc_set(index);
4500 
4501       /* The descriptor_set field for variables is only 5 bits. We shouldn't have intrinsics trying to go beyond that. */
4502       assert(space < 32);
4503 
4504       nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
4505       if (resource_class == DXIL_RESOURCE_CLASS_UAV &&
4506           (var->data.access & ACCESS_NON_WRITEABLE))
4507          resource_class = DXIL_RESOURCE_CLASS_SRV;
4508 
4509       const struct dxil_value *index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4510       if (!index_value)
4511          return false;
4512 
4513       handle = emit_createhandle_call_dynamic(ctx, resource_class, space, binding, index_value, false);
4514    } else {
4515       const struct dxil_value *heap_index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4516       if (!heap_index_value)
4517          return false;
4518       const struct dxil_value *unannotated_handle = emit_createhandle_heap(ctx, heap_index_value, false, true);
4519       const struct dxil_value *res_props = dxil_module_get_buffer_res_props_const(&ctx->mod, resource_class, resource_kind);
4520       if (!unannotated_handle || !res_props)
4521          return false;
4522       handle = emit_annotate_handle(ctx, unannotated_handle, res_props);
4523    }
4524 
4525    store_ssa_def(ctx, &intr->def, 0, handle);
4526    store_def(ctx, &intr->def, 1, get_src(ctx, &intr->src[0], 1, nir_type_uint32));
4527 
4528    return true;
4529 }
4530 
4531 static bool
emit_load_sample_pos_from_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4532 emit_load_sample_pos_from_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4533 {
4534    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.renderTargetGetSamplePosition", DXIL_NONE);
4535    if (!func)
4536       return false;
4537 
4538    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION);
4539    if (!opcode)
4540       return false;
4541 
4542    const struct dxil_value *args[] = {
4543       opcode,
4544       get_src(ctx, &intr->src[0], 0, nir_type_uint32),
4545    };
4546    if (!args[1])
4547       return false;
4548 
4549    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4550    if (!v)
4551       return false;
4552 
4553    for (unsigned i = 0; i < 2; ++i) {
4554       /* GL coords go from 0 -> 1, D3D from -0.5 -> 0.5 */
4555       const struct dxil_value *coord = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
4556          dxil_emit_extractval(&ctx->mod, v, i),
4557          dxil_module_get_float_const(&ctx->mod, 0.5f), 0);
4558       store_def(ctx, &intr->def, i, coord);
4559    }
4560    return true;
4561 }
4562 
4563 static bool
emit_load_sample_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4564 emit_load_sample_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4565 {
4566    assert(ctx->mod.info.has_per_sample_input ||
4567           intr->intrinsic == nir_intrinsic_load_sample_id_no_per_sample);
4568 
4569    if (ctx->mod.info.has_per_sample_input)
4570       return emit_load_unary_external_function(ctx, intr, "dx.op.sampleIndex",
4571                                                DXIL_INTR_SAMPLE_INDEX, nir_type_int);
4572 
4573    store_def(ctx, &intr->def, 0, dxil_module_get_int32_const(&ctx->mod, 0));
4574    return true;
4575 }
4576 
4577 static bool
emit_read_first_invocation(struct ntd_context * ctx,nir_intrinsic_instr * intr)4578 emit_read_first_invocation(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4579 {
4580    ctx->mod.feats.wave_ops = 1;
4581    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveReadLaneFirst",
4582                                                     get_overload(nir_type_uint, intr->def.bit_size));
4583    const struct dxil_value *args[] = {
4584       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_READ_LANE_FIRST),
4585       get_src(ctx, intr->src, 0, nir_type_uint),
4586    };
4587    if (!func || !args[0] || !args[1])
4588       return false;
4589 
4590    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4591    if (!ret)
4592       return false;
4593    store_def(ctx, &intr->def, 0, ret);
4594    return true;
4595 }
4596 
4597 static bool
emit_read_invocation(struct ntd_context * ctx,nir_intrinsic_instr * intr)4598 emit_read_invocation(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4599 {
4600    ctx->mod.feats.wave_ops = 1;
4601    bool quad = intr->intrinsic == nir_intrinsic_quad_broadcast;
4602    const struct dxil_func *func = dxil_get_function(&ctx->mod, quad ? "dx.op.quadReadLaneAt" : "dx.op.waveReadLaneAt",
4603                                                     get_overload(nir_type_uint, intr->def.bit_size));
4604    const struct dxil_value *args[] = {
4605       dxil_module_get_int32_const(&ctx->mod, quad ? DXIL_INTR_QUAD_READ_LANE_AT : DXIL_INTR_WAVE_READ_LANE_AT),
4606       get_src(ctx, &intr->src[0], 0, nir_type_uint),
4607       get_src(ctx, &intr->src[1], 0, nir_type_uint),
4608    };
4609    if (!func || !args[0] || !args[1] || !args[2])
4610       return false;
4611 
4612    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4613    if (!ret)
4614       return false;
4615    store_def(ctx, &intr->def, 0, ret);
4616    return true;
4617 }
4618 
4619 static bool
emit_vote_eq(struct ntd_context * ctx,nir_intrinsic_instr * intr)4620 emit_vote_eq(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4621 {
4622    ctx->mod.feats.wave_ops = 1;
4623    nir_alu_type alu_type = intr->intrinsic == nir_intrinsic_vote_ieq ? nir_type_int : nir_type_float;
4624    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveAllEqual",
4625                                                     get_overload(alu_type, intr->src[0].ssa->bit_size));
4626    const struct dxil_value *args[] = {
4627       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_ALL_EQUAL),
4628       get_src(ctx, intr->src, 0, alu_type),
4629    };
4630    if (!func || !args[0] || !args[1])
4631       return false;
4632 
4633    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4634    if (!ret)
4635       return false;
4636    store_def(ctx, &intr->def, 0, ret);
4637    return true;
4638 }
4639 
4640 static bool
emit_vote(struct ntd_context * ctx,nir_intrinsic_instr * intr)4641 emit_vote(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4642 {
4643    ctx->mod.feats.wave_ops = 1;
4644    bool any = intr->intrinsic == nir_intrinsic_vote_any;
4645    const struct dxil_func *func = dxil_get_function(&ctx->mod,
4646                                                     any ? "dx.op.waveAnyTrue" : "dx.op.waveAllTrue",
4647                                                     DXIL_NONE);
4648    const struct dxil_value *args[] = {
4649       dxil_module_get_int32_const(&ctx->mod, any ? DXIL_INTR_WAVE_ANY_TRUE : DXIL_INTR_WAVE_ALL_TRUE),
4650       get_src(ctx, intr->src, 0, nir_type_bool),
4651    };
4652    if (!func || !args[0] || !args[1])
4653       return false;
4654 
4655    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4656    if (!ret)
4657       return false;
4658    store_def(ctx, &intr->def, 0, ret);
4659    return true;
4660 }
4661 
4662 static bool
emit_ballot(struct ntd_context * ctx,nir_intrinsic_instr * intr)4663 emit_ballot(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4664 {
4665    ctx->mod.feats.wave_ops = 1;
4666    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveBallot", DXIL_NONE);
4667    const struct dxil_value *args[] = {
4668       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_BALLOT),
4669       get_src(ctx, intr->src, 0, nir_type_bool),
4670    };
4671    if (!func || !args[0] || !args[1])
4672       return false;
4673 
4674    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4675    if (!ret)
4676       return false;
4677    for (uint32_t i = 0; i < 4; ++i)
4678       store_def(ctx, &intr->def, i, dxil_emit_extractval(&ctx->mod, ret, i));
4679    return true;
4680 }
4681 
4682 static bool
emit_quad_op(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_quad_op_kind op)4683 emit_quad_op(struct ntd_context *ctx, nir_intrinsic_instr *intr, enum dxil_quad_op_kind op)
4684 {
4685    ctx->mod.feats.wave_ops = 1;
4686    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quadOp",
4687                                                     get_overload(nir_type_uint, intr->def.bit_size));
4688    const struct dxil_value *args[] = {
4689       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_QUAD_OP),
4690       get_src(ctx, intr->src, 0, nir_type_uint),
4691       dxil_module_get_int8_const(&ctx->mod, op),
4692    };
4693    if (!func || !args[0] || !args[1] || !args[2])
4694       return false;
4695 
4696    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4697    if (!ret)
4698       return false;
4699    store_def(ctx, &intr->def, 0, ret);
4700    return true;
4701 }
4702 
4703 static enum dxil_wave_bit_op_kind
get_reduce_bit_op(nir_op op)4704 get_reduce_bit_op(nir_op op)
4705 {
4706    switch (op) {
4707    case nir_op_ior: return DXIL_WAVE_BIT_OP_OR;
4708    case nir_op_ixor: return DXIL_WAVE_BIT_OP_XOR;
4709    case nir_op_iand: return DXIL_WAVE_BIT_OP_AND;
4710    default:
4711       unreachable("Invalid bit op");
4712    }
4713 }
4714 
4715 static bool
emit_reduce_bitwise(struct ntd_context * ctx,nir_intrinsic_instr * intr)4716 emit_reduce_bitwise(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4717 {
4718    enum dxil_wave_bit_op_kind wave_bit_op = get_reduce_bit_op(nir_intrinsic_reduction_op(intr));
4719    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveBit",
4720                                                     get_overload(nir_type_uint, intr->def.bit_size));
4721    const struct dxil_value *args[] = {
4722       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_BIT),
4723       get_src(ctx, intr->src, 0, nir_type_uint),
4724       dxil_module_get_int8_const(&ctx->mod, wave_bit_op),
4725    };
4726    if (!func || !args[0] || !args[1] || !args[2])
4727       return false;
4728 
4729    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4730    if (!ret)
4731       return false;
4732    store_def(ctx, &intr->def, 0, ret);
4733    return true;
4734 }
4735 
4736 static enum dxil_wave_op_kind
get_reduce_op(nir_op op)4737 get_reduce_op(nir_op op)
4738 {
4739    switch (op) {
4740    case nir_op_iadd:
4741    case nir_op_fadd:
4742       return DXIL_WAVE_OP_SUM;
4743    case nir_op_imul:
4744    case nir_op_fmul:
4745       return DXIL_WAVE_OP_PRODUCT;
4746    case nir_op_imax:
4747    case nir_op_umax:
4748    case nir_op_fmax:
4749       return DXIL_WAVE_OP_MAX;
4750    case nir_op_imin:
4751    case nir_op_umin:
4752    case nir_op_fmin:
4753       return DXIL_WAVE_OP_MIN;
4754    default:
4755       unreachable("Unexpected reduction op");
4756    }
4757 }
4758 
4759 static bool
emit_reduce(struct ntd_context * ctx,nir_intrinsic_instr * intr)4760 emit_reduce(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4761 {
4762    ctx->mod.feats.wave_ops = 1;
4763    bool is_prefix = intr->intrinsic == nir_intrinsic_exclusive_scan;
4764    nir_op reduction_op = (nir_op)nir_intrinsic_reduction_op(intr);
4765    switch (reduction_op) {
4766    case nir_op_ior:
4767    case nir_op_ixor:
4768    case nir_op_iand:
4769       assert(!is_prefix);
4770       return emit_reduce_bitwise(ctx, intr);
4771    default:
4772       break;
4773    }
4774    nir_alu_type alu_type = nir_op_infos[reduction_op].input_types[0];
4775    enum dxil_wave_op_kind wave_op = get_reduce_op(reduction_op);
4776    const struct dxil_func *func = dxil_get_function(&ctx->mod, is_prefix ? "dx.op.wavePrefixOp" : "dx.op.waveActiveOp",
4777                                                     get_overload(alu_type, intr->def.bit_size));
4778    bool is_unsigned = alu_type == nir_type_uint;
4779    const struct dxil_value *args[] = {
4780       dxil_module_get_int32_const(&ctx->mod, is_prefix ? DXIL_INTR_WAVE_PREFIX_OP : DXIL_INTR_WAVE_ACTIVE_OP),
4781       get_src(ctx, intr->src, 0, alu_type),
4782       dxil_module_get_int8_const(&ctx->mod, wave_op),
4783       dxil_module_get_int8_const(&ctx->mod, is_unsigned),
4784    };
4785    if (!func || !args[0] || !args[1] || !args[2] || !args[3])
4786       return false;
4787 
4788    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4789    if (!ret)
4790       return false;
4791    store_def(ctx, &intr->def, 0, ret);
4792    return true;
4793 }
4794 
4795 static bool
emit_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4796 emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4797 {
4798    switch (intr->intrinsic) {
4799    case nir_intrinsic_load_global_invocation_id:
4800       return emit_load_global_invocation_id(ctx, intr);
4801    case nir_intrinsic_load_local_invocation_id:
4802       return emit_load_local_invocation_id(ctx, intr);
4803    case nir_intrinsic_load_local_invocation_index:
4804       return emit_load_local_invocation_index(ctx, intr);
4805    case nir_intrinsic_load_workgroup_id:
4806       return emit_load_local_workgroup_id(ctx, intr);
4807    case nir_intrinsic_load_ssbo:
4808       return emit_load_ssbo(ctx, intr);
4809    case nir_intrinsic_store_ssbo:
4810       return emit_store_ssbo(ctx, intr);
4811    case nir_intrinsic_load_deref:
4812       return emit_load_deref(ctx, intr);
4813    case nir_intrinsic_store_deref:
4814       return emit_store_deref(ctx, intr);
4815    case nir_intrinsic_deref_atomic:
4816       return emit_atomic_deref(ctx, intr);
4817    case nir_intrinsic_deref_atomic_swap:
4818       return emit_atomic_deref_swap(ctx, intr);
4819    case nir_intrinsic_load_ubo_vec4:
4820       return emit_load_ubo_vec4(ctx, intr);
4821    case nir_intrinsic_load_primitive_id:
4822       return emit_load_unary_external_function(ctx, intr, "dx.op.primitiveID",
4823                                                DXIL_INTR_PRIMITIVE_ID, nir_type_int);
4824    case nir_intrinsic_load_sample_id:
4825    case nir_intrinsic_load_sample_id_no_per_sample:
4826       return emit_load_sample_id(ctx, intr);
4827    case nir_intrinsic_load_invocation_id:
4828       switch (ctx->mod.shader_kind) {
4829       case DXIL_HULL_SHADER:
4830          return emit_load_unary_external_function(ctx, intr, "dx.op.outputControlPointID",
4831                                                   DXIL_INTR_OUTPUT_CONTROL_POINT_ID, nir_type_int);
4832       case DXIL_GEOMETRY_SHADER:
4833          return emit_load_unary_external_function(ctx, intr, "dx.op.gsInstanceID",
4834                                                   DXIL_INTR_GS_INSTANCE_ID, nir_type_int);
4835       default:
4836          unreachable("Unexpected shader kind for invocation ID");
4837       }
4838    case nir_intrinsic_load_view_index:
4839       ctx->mod.feats.view_id = true;
4840       return emit_load_unary_external_function(ctx, intr, "dx.op.viewID",
4841                                                DXIL_INTR_VIEW_ID, nir_type_int);
4842    case nir_intrinsic_load_sample_mask_in:
4843       return emit_load_sample_mask_in(ctx, intr);
4844    case nir_intrinsic_load_tess_coord:
4845       return emit_load_tess_coord(ctx, intr);
4846    case nir_intrinsic_terminate_if:
4847    case nir_intrinsic_demote_if:
4848       return emit_discard_if(ctx, intr);
4849    case nir_intrinsic_terminate:
4850    case nir_intrinsic_demote:
4851       return emit_discard(ctx);
4852    case nir_intrinsic_emit_vertex:
4853       return emit_emit_vertex(ctx, intr);
4854    case nir_intrinsic_end_primitive:
4855       return emit_end_primitive(ctx, intr);
4856    case nir_intrinsic_barrier:
4857       return emit_barrier(ctx, intr);
4858    case nir_intrinsic_ssbo_atomic:
4859       return emit_ssbo_atomic(ctx, intr);
4860    case nir_intrinsic_ssbo_atomic_swap:
4861       return emit_ssbo_atomic_comp_swap(ctx, intr);
4862    case nir_intrinsic_image_deref_atomic:
4863    case nir_intrinsic_image_atomic:
4864    case nir_intrinsic_bindless_image_atomic:
4865       return emit_image_atomic(ctx, intr);
4866    case nir_intrinsic_image_deref_atomic_swap:
4867    case nir_intrinsic_image_atomic_swap:
4868    case nir_intrinsic_bindless_image_atomic_swap:
4869       return emit_image_atomic_comp_swap(ctx, intr);
4870    case nir_intrinsic_image_store:
4871    case nir_intrinsic_image_deref_store:
4872    case nir_intrinsic_bindless_image_store:
4873       return emit_image_store(ctx, intr);
4874    case nir_intrinsic_image_load:
4875    case nir_intrinsic_image_deref_load:
4876    case nir_intrinsic_bindless_image_load:
4877       return emit_image_load(ctx, intr);
4878    case nir_intrinsic_image_size:
4879    case nir_intrinsic_image_deref_size:
4880    case nir_intrinsic_bindless_image_size:
4881       return emit_image_size(ctx, intr);
4882    case nir_intrinsic_get_ssbo_size:
4883       return emit_get_ssbo_size(ctx, intr);
4884    case nir_intrinsic_load_input:
4885    case nir_intrinsic_load_per_vertex_input:
4886    case nir_intrinsic_load_output:
4887    case nir_intrinsic_load_per_vertex_output:
4888       return emit_load_input_via_intrinsic(ctx, intr);
4889    case nir_intrinsic_store_output:
4890    case nir_intrinsic_store_per_vertex_output:
4891       return emit_store_output_via_intrinsic(ctx, intr);
4892 
4893    case nir_intrinsic_load_barycentric_at_offset:
4894    case nir_intrinsic_load_barycentric_at_sample:
4895    case nir_intrinsic_load_barycentric_centroid:
4896    case nir_intrinsic_load_barycentric_pixel:
4897       /* Emit nothing, we only support these as inputs to load_interpolated_input */
4898       return true;
4899    case nir_intrinsic_load_interpolated_input:
4900       return emit_load_interpolated_input(ctx, intr);
4901       break;
4902 
4903    case nir_intrinsic_vulkan_resource_index:
4904       return emit_vulkan_resource_index(ctx, intr);
4905    case nir_intrinsic_load_vulkan_descriptor:
4906       return emit_load_vulkan_descriptor(ctx, intr);
4907 
4908    case nir_intrinsic_load_sample_pos_from_id:
4909       return emit_load_sample_pos_from_id(ctx, intr);
4910 
4911    case nir_intrinsic_is_helper_invocation:
4912       return emit_load_unary_external_function(
4913          ctx, intr, "dx.op.isHelperLane", DXIL_INTR_IS_HELPER_LANE, nir_type_int);
4914    case nir_intrinsic_elect:
4915       ctx->mod.feats.wave_ops = 1;
4916       return emit_load_unary_external_function(
4917          ctx, intr, "dx.op.waveIsFirstLane", DXIL_INTR_WAVE_IS_FIRST_LANE, nir_type_invalid);
4918    case nir_intrinsic_load_subgroup_size:
4919       ctx->mod.feats.wave_ops = 1;
4920       return emit_load_unary_external_function(
4921          ctx, intr, "dx.op.waveGetLaneCount", DXIL_INTR_WAVE_GET_LANE_COUNT, nir_type_invalid);
4922    case nir_intrinsic_load_subgroup_invocation:
4923       ctx->mod.feats.wave_ops = 1;
4924       return emit_load_unary_external_function(
4925          ctx, intr, "dx.op.waveGetLaneIndex", DXIL_INTR_WAVE_GET_LANE_INDEX, nir_type_invalid);
4926 
4927    case nir_intrinsic_vote_feq:
4928    case nir_intrinsic_vote_ieq:
4929       return emit_vote_eq(ctx, intr);
4930    case nir_intrinsic_vote_any:
4931    case nir_intrinsic_vote_all:
4932       return emit_vote(ctx, intr);
4933 
4934    case nir_intrinsic_ballot:
4935       return emit_ballot(ctx, intr);
4936 
4937    case nir_intrinsic_read_first_invocation:
4938       return emit_read_first_invocation(ctx, intr);
4939    case nir_intrinsic_read_invocation:
4940    case nir_intrinsic_shuffle:
4941    case nir_intrinsic_quad_broadcast:
4942       return emit_read_invocation(ctx, intr);
4943 
4944    case nir_intrinsic_quad_swap_horizontal:
4945       return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_X);
4946    case nir_intrinsic_quad_swap_vertical:
4947       return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_Y);
4948    case nir_intrinsic_quad_swap_diagonal:
4949       return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_DIAGONAL);
4950 
4951    case nir_intrinsic_reduce:
4952    case nir_intrinsic_exclusive_scan:
4953       return emit_reduce(ctx, intr);
4954 
4955    case nir_intrinsic_ddx:
4956    case nir_intrinsic_ddx_coarse: return emit_derivative(ctx, intr, DXIL_INTR_DDX_COARSE);
4957    case nir_intrinsic_ddx_fine: return emit_derivative(ctx, intr, DXIL_INTR_DDX_FINE);
4958    case nir_intrinsic_ddy:
4959    case nir_intrinsic_ddy_coarse: return emit_derivative(ctx, intr, DXIL_INTR_DDY_COARSE);
4960    case nir_intrinsic_ddy_fine: return emit_derivative(ctx, intr, DXIL_INTR_DDY_FINE);
4961 
4962    case nir_intrinsic_load_first_vertex:
4963       ctx->mod.feats.extended_command_info = true;
4964       return emit_load_unary_external_function(ctx, intr, "dx.op.startVertexLocation",
4965                                                DXIL_INTR_START_VERTEX_LOCATION, nir_type_int);
4966    case nir_intrinsic_load_base_instance:
4967       ctx->mod.feats.extended_command_info = true;
4968       return emit_load_unary_external_function(ctx, intr, "dx.op.startInstanceLocation",
4969                                                DXIL_INTR_START_INSTANCE_LOCATION, nir_type_int);
4970 
4971    case nir_intrinsic_load_num_workgroups:
4972    case nir_intrinsic_load_workgroup_size:
4973    default:
4974       log_nir_instr_unsupported(
4975          ctx->logger, "Unimplemented intrinsic instruction", &intr->instr);
4976       return false;
4977    }
4978 }
4979 
4980 static const struct dxil_type *
dxil_type_for_const(struct ntd_context * ctx,nir_def * def)4981 dxil_type_for_const(struct ntd_context *ctx, nir_def *def)
4982 {
4983    if (BITSET_TEST(ctx->int_types, def->index) ||
4984        !BITSET_TEST(ctx->float_types, def->index))
4985       return dxil_module_get_int_type(&ctx->mod, def->bit_size);
4986    return dxil_module_get_float_type(&ctx->mod, def->bit_size);
4987 }
4988 
4989 static bool
emit_load_const(struct ntd_context * ctx,nir_load_const_instr * load_const)4990 emit_load_const(struct ntd_context *ctx, nir_load_const_instr *load_const)
4991 {
4992    for (uint32_t i = 0; i < load_const->def.num_components; ++i) {
4993       const struct dxil_type *type = dxil_type_for_const(ctx, &load_const->def);
4994       store_ssa_def(ctx, &load_const->def, i, get_value_for_const(&ctx->mod, &load_const->value[i], type));
4995    }
4996    return true;
4997 }
4998 
4999 static bool
emit_deref(struct ntd_context * ctx,nir_deref_instr * instr)5000 emit_deref(struct ntd_context* ctx, nir_deref_instr* instr)
5001 {
5002    /* There's two possible reasons we might be walking through derefs:
5003     * 1. Computing an index to be used for a texture/sampler/image binding, which
5004     *    can only do array indexing and should compute the indices along the way with
5005     *    array-of-array sizes.
5006     * 2. Storing an index to be used in a GEP for access to a variable.
5007     */
5008    nir_variable *var = nir_deref_instr_get_variable(instr);
5009    assert(var);
5010 
5011    bool is_aoa_size =
5012       glsl_type_is_sampler(glsl_without_array(var->type)) ||
5013       glsl_type_is_image(glsl_without_array(var->type)) ||
5014       glsl_type_is_texture(glsl_without_array(var->type));
5015 
5016    if (!is_aoa_size) {
5017       /* Just store the values, we'll use these to build a GEP in the load or store */
5018       switch (instr->deref_type) {
5019       case nir_deref_type_var:
5020          store_def(ctx, &instr->def, 0, dxil_module_get_int_const(&ctx->mod, 0, instr->def.bit_size));
5021          return true;
5022       case nir_deref_type_array:
5023          store_def(ctx, &instr->def, 0, get_src(ctx, &instr->arr.index, 0, nir_type_int));
5024          return true;
5025       case nir_deref_type_struct:
5026          store_def(ctx, &instr->def, 0, dxil_module_get_int_const(&ctx->mod, instr->strct.index, 32));
5027          return true;
5028       default:
5029          unreachable("Other deref types not supported");
5030       }
5031    }
5032 
5033    /* In the CL environment, there's nothing to emit. Any references to
5034     * derefs will emit the necessary logic to handle scratch/shared GEP addressing
5035     */
5036    if (ctx->opts->environment == DXIL_ENVIRONMENT_CL)
5037       return true;
5038 
5039    const struct glsl_type *type = instr->type;
5040    const struct dxil_value *binding;
5041    unsigned binding_val = ctx->opts->environment == DXIL_ENVIRONMENT_GL ?
5042       var->data.driver_location : var->data.binding;
5043 
5044    if (instr->deref_type == nir_deref_type_var) {
5045       binding = dxil_module_get_int32_const(&ctx->mod, binding_val);
5046    } else {
5047       const struct dxil_value *base = get_src(ctx, &instr->parent, 0, nir_type_uint32);
5048       const struct dxil_value *offset = get_src(ctx, &instr->arr.index, 0, nir_type_uint32);
5049       if (!base || !offset)
5050          return false;
5051 
5052       if (glsl_type_is_array(instr->type)) {
5053          offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_MUL, offset,
5054             dxil_module_get_int32_const(&ctx->mod, glsl_get_aoa_size(instr->type)), 0);
5055          if (!offset)
5056             return false;
5057       }
5058       binding = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, base, offset, 0);
5059    }
5060 
5061    if (!binding)
5062       return false;
5063 
5064    /* Haven't finished chasing the deref chain yet, just store the value */
5065    if (glsl_type_is_array(type)) {
5066       store_def(ctx, &instr->def, 0, binding);
5067       return true;
5068    }
5069 
5070    assert(glsl_type_is_sampler(type) || glsl_type_is_image(type) || glsl_type_is_texture(type));
5071    enum dxil_resource_class res_class;
5072    if (glsl_type_is_image(type))
5073       res_class = DXIL_RESOURCE_CLASS_UAV;
5074    else if (glsl_type_is_sampler(type))
5075       res_class = DXIL_RESOURCE_CLASS_SAMPLER;
5076    else
5077       res_class = DXIL_RESOURCE_CLASS_SRV;
5078 
5079    unsigned descriptor_set = ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN ?
5080       var->data.descriptor_set : (glsl_type_is_image(type) ? 1 : 0);
5081    const struct dxil_value *handle = emit_createhandle_call_dynamic(ctx, res_class,
5082       descriptor_set, binding_val, binding, false);
5083    if (!handle)
5084       return false;
5085 
5086    store_ssa_def(ctx, &instr->def, 0, handle);
5087    return true;
5088 }
5089 
5090 static bool
emit_cond_branch(struct ntd_context * ctx,const struct dxil_value * cond,int true_block,int false_block)5091 emit_cond_branch(struct ntd_context *ctx, const struct dxil_value *cond,
5092                  int true_block, int false_block)
5093 {
5094    assert(cond);
5095    assert(true_block >= 0);
5096    assert(false_block >= 0);
5097    return dxil_emit_branch(&ctx->mod, cond, true_block, false_block);
5098 }
5099 
5100 static bool
emit_branch(struct ntd_context * ctx,int block)5101 emit_branch(struct ntd_context *ctx, int block)
5102 {
5103    assert(block >= 0);
5104    return dxil_emit_branch(&ctx->mod, NULL, block, -1);
5105 }
5106 
5107 static bool
emit_jump(struct ntd_context * ctx,nir_jump_instr * instr)5108 emit_jump(struct ntd_context *ctx, nir_jump_instr *instr)
5109 {
5110    switch (instr->type) {
5111    case nir_jump_break:
5112    case nir_jump_continue:
5113       assert(instr->instr.block->successors[0]);
5114       assert(!instr->instr.block->successors[1]);
5115       return emit_branch(ctx, instr->instr.block->successors[0]->index);
5116 
5117    default:
5118       unreachable("Unsupported jump type\n");
5119    }
5120 }
5121 
5122 struct phi_block {
5123    unsigned num_components;
5124    struct dxil_instr *comp[NIR_MAX_VEC_COMPONENTS];
5125 };
5126 
5127 static bool
emit_phi(struct ntd_context * ctx,nir_phi_instr * instr)5128 emit_phi(struct ntd_context *ctx, nir_phi_instr *instr)
5129 {
5130    const struct dxil_type *type = NULL;
5131    nir_foreach_phi_src(src, instr) {
5132       /* All sources have the same type, just use the first one */
5133       type = dxil_value_get_type(ctx->defs[src->src.ssa->index].chans[0]);
5134       break;
5135    }
5136 
5137    struct phi_block *vphi = ralloc(ctx->phis, struct phi_block);
5138    vphi->num_components = instr->def.num_components;
5139 
5140    for (unsigned i = 0; i < vphi->num_components; ++i) {
5141       struct dxil_instr *phi = vphi->comp[i] = dxil_emit_phi(&ctx->mod, type);
5142       if (!phi)
5143          return false;
5144       store_ssa_def(ctx, &instr->def, i, dxil_instr_get_return_value(phi));
5145    }
5146    _mesa_hash_table_insert(ctx->phis, instr, vphi);
5147    return true;
5148 }
5149 
5150 static bool
fixup_phi(struct ntd_context * ctx,nir_phi_instr * instr,struct phi_block * vphi)5151 fixup_phi(struct ntd_context *ctx, nir_phi_instr *instr,
5152           struct phi_block *vphi)
5153 {
5154    const struct dxil_value *values[16];
5155    unsigned blocks[16];
5156    for (unsigned i = 0; i < vphi->num_components; ++i) {
5157       size_t num_incoming = 0;
5158       nir_foreach_phi_src(src, instr) {
5159          const struct dxil_value *val = get_src_ssa(ctx, src->src.ssa, i);
5160          values[num_incoming] = val;
5161          blocks[num_incoming] = src->pred->index;
5162          ++num_incoming;
5163          if (num_incoming == ARRAY_SIZE(values)) {
5164             if (!dxil_phi_add_incoming(vphi->comp[i], values, blocks,
5165                                        num_incoming))
5166                return false;
5167             num_incoming = 0;
5168          }
5169       }
5170       if (num_incoming > 0 && !dxil_phi_add_incoming(vphi->comp[i], values,
5171                                                      blocks, num_incoming))
5172          return false;
5173    }
5174    return true;
5175 }
5176 
5177 static unsigned
get_n_src(struct ntd_context * ctx,const struct dxil_value ** values,unsigned max_components,nir_tex_src * src,nir_alu_type type)5178 get_n_src(struct ntd_context *ctx, const struct dxil_value **values,
5179           unsigned max_components, nir_tex_src *src, nir_alu_type type)
5180 {
5181    unsigned num_components = nir_src_num_components(src->src);
5182    unsigned i = 0;
5183 
5184    assert(num_components <= max_components);
5185 
5186    for (i = 0; i < num_components; ++i) {
5187       values[i] = get_src(ctx, &src->src, i, type);
5188       if (!values[i])
5189          return 0;
5190    }
5191 
5192    return num_components;
5193 }
5194 
5195 #define PAD_SRC(ctx, array, components, undef) \
5196    for (unsigned i = components; i < ARRAY_SIZE(array); ++i) { \
5197       array[i] = undef; \
5198    }
5199 
5200 static const struct dxil_value *
emit_sample(struct ntd_context * ctx,struct texop_parameters * params)5201 emit_sample(struct ntd_context *ctx, struct texop_parameters *params)
5202 {
5203    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sample", params->overload);
5204    if (!func)
5205       return NULL;
5206 
5207    const struct dxil_value *args[11] = {
5208       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE),
5209       params->tex, params->sampler,
5210       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5211       params->offset[0], params->offset[1], params->offset[2],
5212       params->min_lod
5213    };
5214 
5215    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5216 }
5217 
5218 static const struct dxil_value *
emit_sample_bias(struct ntd_context * ctx,struct texop_parameters * params)5219 emit_sample_bias(struct ntd_context *ctx, struct texop_parameters *params)
5220 {
5221    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleBias", params->overload);
5222    if (!func)
5223       return NULL;
5224 
5225    assert(params->bias != NULL);
5226 
5227    const struct dxil_value *args[12] = {
5228       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_BIAS),
5229       params->tex, params->sampler,
5230       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5231       params->offset[0], params->offset[1], params->offset[2],
5232       params->bias, params->min_lod
5233    };
5234 
5235    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5236 }
5237 
5238 static const struct dxil_value *
emit_sample_level(struct ntd_context * ctx,struct texop_parameters * params)5239 emit_sample_level(struct ntd_context *ctx, struct texop_parameters *params)
5240 {
5241    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleLevel", params->overload);
5242    if (!func)
5243       return NULL;
5244 
5245    assert(params->lod_or_sample != NULL);
5246 
5247    const struct dxil_value *args[11] = {
5248       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_LEVEL),
5249       params->tex, params->sampler,
5250       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5251       params->offset[0], params->offset[1], params->offset[2],
5252       params->lod_or_sample
5253    };
5254 
5255    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5256 }
5257 
5258 static const struct dxil_value *
emit_sample_cmp(struct ntd_context * ctx,struct texop_parameters * params)5259 emit_sample_cmp(struct ntd_context *ctx, struct texop_parameters *params)
5260 {
5261    const struct dxil_func *func;
5262    enum dxil_intr opcode;
5263 
5264    func = dxil_get_function(&ctx->mod, "dx.op.sampleCmp", DXIL_F32);
5265    opcode = DXIL_INTR_SAMPLE_CMP;
5266 
5267    if (!func)
5268       return NULL;
5269 
5270    const struct dxil_value *args[12] = {
5271       dxil_module_get_int32_const(&ctx->mod, opcode),
5272       params->tex, params->sampler,
5273       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5274       params->offset[0], params->offset[1], params->offset[2],
5275       params->cmp, params->min_lod
5276    };
5277 
5278    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5279 }
5280 
5281 static const struct dxil_value *
emit_sample_cmp_level_zero(struct ntd_context * ctx,struct texop_parameters * params)5282 emit_sample_cmp_level_zero(struct ntd_context *ctx, struct texop_parameters *params)
5283 {
5284    const struct dxil_func *func;
5285    enum dxil_intr opcode;
5286 
5287    func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevelZero", DXIL_F32);
5288    opcode = DXIL_INTR_SAMPLE_CMP_LVL_ZERO;
5289 
5290    if (!func)
5291       return NULL;
5292 
5293    const struct dxil_value *args[11] = {
5294       dxil_module_get_int32_const(&ctx->mod, opcode),
5295       params->tex, params->sampler,
5296       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5297       params->offset[0], params->offset[1], params->offset[2],
5298       params->cmp
5299    };
5300 
5301    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5302 }
5303 
5304 static const struct dxil_value *
emit_sample_cmp_level(struct ntd_context * ctx,struct texop_parameters * params)5305 emit_sample_cmp_level(struct ntd_context *ctx, struct texop_parameters *params)
5306 {
5307    ctx->mod.feats.advanced_texture_ops = true;
5308    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevel", params->overload);
5309    if (!func)
5310       return NULL;
5311 
5312    assert(params->lod_or_sample != NULL);
5313 
5314    const struct dxil_value *args[12] = {
5315       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_LEVEL),
5316       params->tex, params->sampler,
5317       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5318       params->offset[0], params->offset[1], params->offset[2],
5319       params->cmp, params->lod_or_sample
5320    };
5321 
5322    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5323 }
5324 
5325 static const struct dxil_value *
emit_sample_cmp_bias(struct ntd_context * ctx,struct texop_parameters * params)5326 emit_sample_cmp_bias(struct ntd_context *ctx, struct texop_parameters *params)
5327 {
5328    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpBias", params->overload);
5329    if (!func)
5330       return NULL;
5331 
5332    assert(params->bias != NULL);
5333    ctx->mod.feats.sample_cmp_bias_gradient = 1;
5334 
5335    const struct dxil_value *args[13] = {
5336       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_BIAS),
5337       params->tex, params->sampler,
5338       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5339       params->offset[0], params->offset[1], params->offset[2],
5340       params->cmp, params->bias, params->min_lod
5341    };
5342 
5343    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5344 }
5345 
5346 static const struct dxil_value *
emit_sample_grad(struct ntd_context * ctx,struct texop_parameters * params)5347 emit_sample_grad(struct ntd_context *ctx, struct texop_parameters *params)
5348 {
5349    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleGrad", params->overload);
5350    if (!func)
5351       return false;
5352 
5353    const struct dxil_value *args[17] = {
5354       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_GRAD),
5355       params->tex, params->sampler,
5356       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5357       params->offset[0], params->offset[1], params->offset[2],
5358       params->dx[0], params->dx[1], params->dx[2],
5359       params->dy[0], params->dy[1], params->dy[2],
5360       params->min_lod
5361    };
5362 
5363    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5364 }
5365 
5366 static const struct dxil_value *
emit_sample_cmp_grad(struct ntd_context * ctx,struct texop_parameters * params)5367 emit_sample_cmp_grad(struct ntd_context *ctx, struct texop_parameters *params)
5368 {
5369    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpGrad", params->overload);
5370    if (!func)
5371       return false;
5372 
5373    ctx->mod.feats.sample_cmp_bias_gradient = 1;
5374 
5375    const struct dxil_value *args[18] = {
5376       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_GRAD),
5377       params->tex, params->sampler,
5378       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5379       params->offset[0], params->offset[1], params->offset[2],
5380       params->cmp,
5381       params->dx[0], params->dx[1], params->dx[2],
5382       params->dy[0], params->dy[1], params->dy[2],
5383       params->min_lod
5384    };
5385 
5386    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5387 }
5388 
5389 static const struct dxil_value *
emit_texel_fetch(struct ntd_context * ctx,struct texop_parameters * params)5390 emit_texel_fetch(struct ntd_context *ctx, struct texop_parameters *params)
5391 {
5392    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", params->overload);
5393    if (!func)
5394       return false;
5395 
5396    if (!params->lod_or_sample)
5397       params->lod_or_sample = dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32));
5398 
5399    const struct dxil_value *args[] = {
5400       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD),
5401       params->tex,
5402       params->lod_or_sample, params->coord[0], params->coord[1], params->coord[2],
5403       params->offset[0], params->offset[1], params->offset[2]
5404    };
5405 
5406    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5407 }
5408 
5409 static const struct dxil_value *
emit_texture_lod(struct ntd_context * ctx,struct texop_parameters * params,bool clamped)5410 emit_texture_lod(struct ntd_context *ctx, struct texop_parameters *params, bool clamped)
5411 {
5412    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.calculateLOD", DXIL_F32);
5413    if (!func)
5414       return false;
5415 
5416    const struct dxil_value *args[] = {
5417       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOD),
5418       params->tex,
5419       params->sampler,
5420       params->coord[0],
5421       params->coord[1],
5422       params->coord[2],
5423       dxil_module_get_int1_const(&ctx->mod, clamped ? 1 : 0)
5424    };
5425 
5426    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5427 }
5428 
5429 static const struct dxil_value *
emit_texture_gather(struct ntd_context * ctx,struct texop_parameters * params,unsigned component)5430 emit_texture_gather(struct ntd_context *ctx, struct texop_parameters *params, unsigned component)
5431 {
5432    const struct dxil_func *func = dxil_get_function(&ctx->mod,
5433       params->cmp ? "dx.op.textureGatherCmp" : "dx.op.textureGather", params->overload);
5434    if (!func)
5435       return false;
5436 
5437    const struct dxil_value *args[] = {
5438       dxil_module_get_int32_const(&ctx->mod, params->cmp ?
5439          DXIL_INTR_TEXTURE_GATHER_CMP : DXIL_INTR_TEXTURE_GATHER),
5440       params->tex,
5441       params->sampler,
5442       params->coord[0],
5443       params->coord[1],
5444       params->coord[2],
5445       params->coord[3],
5446       params->offset[0],
5447       params->offset[1],
5448       dxil_module_get_int32_const(&ctx->mod, component),
5449       params->cmp
5450    };
5451 
5452    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args) - (params->cmp ? 0 : 1));
5453 }
5454 
5455 static bool
emit_tex(struct ntd_context * ctx,nir_tex_instr * instr)5456 emit_tex(struct ntd_context *ctx, nir_tex_instr *instr)
5457 {
5458    struct texop_parameters params;
5459    memset(&params, 0, sizeof(struct texop_parameters));
5460    if (ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN) {
5461       params.tex = ctx->srv_handles[instr->texture_index];
5462       params.sampler = ctx->sampler_handles[instr->sampler_index];
5463    }
5464 
5465    const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
5466    const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32);
5467    const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
5468    const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type);
5469 
5470    unsigned coord_components = 0, offset_components = 0, dx_components = 0, dy_components = 0;
5471    params.overload = get_overload(instr->dest_type, 32);
5472 
5473    bool lod_is_zero = false;
5474    for (unsigned i = 0; i < instr->num_srcs; i++) {
5475       nir_alu_type type = nir_tex_instr_src_type(instr, i);
5476 
5477       switch (instr->src[i].src_type) {
5478       case nir_tex_src_coord:
5479          coord_components = get_n_src(ctx, params.coord, ARRAY_SIZE(params.coord),
5480                                       &instr->src[i], type);
5481          if (!coord_components)
5482             return false;
5483          break;
5484 
5485       case nir_tex_src_offset:
5486          offset_components = get_n_src(ctx, params.offset, ARRAY_SIZE(params.offset),
5487                                        &instr->src[i],  nir_type_int);
5488          if (!offset_components)
5489             return false;
5490 
5491          /* Dynamic offsets were only allowed with gather, until "advanced texture ops" in SM7 */
5492          if (!nir_src_is_const(instr->src[i].src) && instr->op != nir_texop_tg4)
5493             ctx->mod.feats.advanced_texture_ops = true;
5494          break;
5495 
5496       case nir_tex_src_bias:
5497          assert(instr->op == nir_texop_txb);
5498          assert(nir_src_num_components(instr->src[i].src) == 1);
5499          params.bias = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
5500          if (!params.bias)
5501             return false;
5502          break;
5503 
5504       case nir_tex_src_lod:
5505          assert(nir_src_num_components(instr->src[i].src) == 1);
5506          if (instr->op == nir_texop_txf_ms) {
5507             assert(nir_src_as_int(instr->src[i].src) == 0);
5508             break;
5509          }
5510 
5511          /* Buffers don't have a LOD */
5512          if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF)
5513             params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, type);
5514          else
5515             params.lod_or_sample = int_undef;
5516          if (!params.lod_or_sample)
5517             return false;
5518 
5519          if (nir_src_is_const(instr->src[i].src) && nir_src_as_float(instr->src[i].src) == 0.0f)
5520             lod_is_zero = true;
5521          break;
5522 
5523       case nir_tex_src_min_lod:
5524          assert(nir_src_num_components(instr->src[i].src) == 1);
5525          params.min_lod = get_src(ctx, &instr->src[i].src, 0, type);
5526          if (!params.min_lod)
5527             return false;
5528          break;
5529 
5530       case nir_tex_src_comparator:
5531          assert(nir_src_num_components(instr->src[i].src) == 1);
5532          params.cmp = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
5533          if (!params.cmp)
5534             return false;
5535          break;
5536 
5537       case nir_tex_src_ddx:
5538          dx_components = get_n_src(ctx, params.dx, ARRAY_SIZE(params.dx),
5539                                    &instr->src[i], nir_type_float);
5540          if (!dx_components)
5541             return false;
5542          break;
5543 
5544       case nir_tex_src_ddy:
5545          dy_components = get_n_src(ctx, params.dy, ARRAY_SIZE(params.dy),
5546                                    &instr->src[i], nir_type_float);
5547          if (!dy_components)
5548             return false;
5549          break;
5550 
5551       case nir_tex_src_ms_index:
5552          params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, nir_type_int);
5553          if (!params.lod_or_sample)
5554             return false;
5555          break;
5556 
5557       case nir_tex_src_texture_deref:
5558          assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
5559          params.tex = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
5560          break;
5561 
5562       case nir_tex_src_sampler_deref:
5563          assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
5564          params.sampler = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
5565          break;
5566 
5567       case nir_tex_src_texture_offset:
5568          params.tex = emit_createhandle_call_dynamic(ctx, DXIL_RESOURCE_CLASS_SRV,
5569             0, instr->texture_index,
5570             dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
5571                get_src(ctx, &instr->src[i].src, 0, nir_type_uint),
5572                dxil_module_get_int32_const(&ctx->mod, instr->texture_index), 0),
5573             instr->texture_non_uniform);
5574          break;
5575 
5576       case nir_tex_src_sampler_offset:
5577          if (nir_tex_instr_need_sampler(instr)) {
5578             params.sampler = emit_createhandle_call_dynamic(ctx, DXIL_RESOURCE_CLASS_SAMPLER,
5579                0, instr->sampler_index,
5580                dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
5581                   get_src(ctx, &instr->src[i].src, 0, nir_type_uint),
5582                   dxil_module_get_int32_const(&ctx->mod, instr->sampler_index), 0),
5583                instr->sampler_non_uniform);
5584          }
5585          break;
5586 
5587       case nir_tex_src_texture_handle:
5588          params.tex = create_srv_handle(ctx, instr, &instr->src[i].src);
5589          break;
5590 
5591       case nir_tex_src_sampler_handle:
5592          if (nir_tex_instr_need_sampler(instr))
5593             params.sampler = create_sampler_handle(ctx, instr->is_shadow, &instr->src[i].src);
5594          break;
5595 
5596       case nir_tex_src_projector:
5597          unreachable("Texture projector should have been lowered");
5598 
5599       default:
5600          fprintf(stderr, "texture source: %d\n", instr->src[i].src_type);
5601          unreachable("unknown texture source");
5602       }
5603    }
5604 
5605    assert(params.tex != NULL);
5606    assert(instr->op == nir_texop_txf ||
5607           instr->op == nir_texop_txf_ms ||
5608           nir_tex_instr_is_query(instr) ||
5609           params.sampler != NULL);
5610 
5611    PAD_SRC(ctx, params.coord, coord_components, float_undef);
5612    PAD_SRC(ctx, params.offset, offset_components, int_undef);
5613    if (!params.min_lod) params.min_lod = float_undef;
5614 
5615    const struct dxil_value *sample = NULL;
5616    switch (instr->op) {
5617    case nir_texop_txb:
5618       if (params.cmp != NULL && ctx->mod.minor_version >= 8)
5619          sample = emit_sample_cmp_bias(ctx, &params);
5620       else
5621          sample = emit_sample_bias(ctx, &params);
5622       break;
5623 
5624    case nir_texop_tex:
5625       if (params.cmp != NULL) {
5626          sample = emit_sample_cmp(ctx, &params);
5627          break;
5628       } else if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) {
5629          sample = emit_sample(ctx, &params);
5630          break;
5631       }
5632       params.lod_or_sample = dxil_module_get_float_const(&ctx->mod, 0);
5633       lod_is_zero = true;
5634       FALLTHROUGH;
5635    case nir_texop_txl:
5636       if (lod_is_zero && params.cmp != NULL && ctx->mod.minor_version < 7) {
5637          /* Prior to SM 6.7, if the level is constant 0.0, ignore the LOD argument,
5638           * so level-less DXIL instructions are used. This is needed to avoid emitting
5639           * dx.op.sampleCmpLevel, which would not be available.
5640           */
5641          sample = emit_sample_cmp_level_zero(ctx, &params);
5642       } else {
5643          if (params.cmp != NULL)
5644             sample = emit_sample_cmp_level(ctx, &params);
5645          else
5646             sample = emit_sample_level(ctx, &params);
5647       }
5648       break;
5649 
5650    case nir_texop_txd:
5651       PAD_SRC(ctx, params.dx, dx_components, float_undef);
5652       PAD_SRC(ctx, params.dy, dy_components,float_undef);
5653       if (params.cmp != NULL && ctx->mod.minor_version >= 8)
5654          sample = emit_sample_cmp_grad(ctx, &params);
5655       else
5656          sample = emit_sample_grad(ctx, &params);
5657       break;
5658 
5659    case nir_texop_txf:
5660    case nir_texop_txf_ms:
5661       if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
5662          params.coord[1] = int_undef;
5663          sample = emit_bufferload_call(ctx, params.tex, params.coord, params.overload);
5664       } else {
5665          PAD_SRC(ctx, params.coord, coord_components, int_undef);
5666          sample = emit_texel_fetch(ctx, &params);
5667       }
5668       break;
5669 
5670    case nir_texop_txs:
5671       sample = emit_texture_size(ctx, &params);
5672       break;
5673 
5674    case nir_texop_tg4:
5675       sample = emit_texture_gather(ctx, &params, instr->component);
5676       break;
5677 
5678    case nir_texop_lod:
5679       sample = emit_texture_lod(ctx, &params, true);
5680       store_def(ctx, &instr->def, 0, sample);
5681       sample = emit_texture_lod(ctx, &params, false);
5682       store_def(ctx, &instr->def, 1, sample);
5683       return true;
5684 
5685    case nir_texop_query_levels: {
5686       params.lod_or_sample = dxil_module_get_int_const(&ctx->mod, 0, 32);
5687       sample = emit_texture_size(ctx, &params);
5688       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
5689       store_def(ctx, &instr->def, 0, retval);
5690       return true;
5691    }
5692 
5693    case nir_texop_texture_samples: {
5694       params.lod_or_sample = int_undef;
5695       sample = emit_texture_size(ctx, &params);
5696       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
5697       store_def(ctx, &instr->def, 0, retval);
5698       return true;
5699    }
5700 
5701    default:
5702       fprintf(stderr, "texture op: %d\n", instr->op);
5703       unreachable("unknown texture op");
5704    }
5705 
5706    if (!sample)
5707       return false;
5708 
5709    for (unsigned i = 0; i < instr->def.num_components; ++i) {
5710       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, i);
5711       store_def(ctx, &instr->def, i, retval);
5712    }
5713 
5714    return true;
5715 }
5716 
5717 static bool
emit_undefined(struct ntd_context * ctx,nir_undef_instr * undef)5718 emit_undefined(struct ntd_context *ctx, nir_undef_instr *undef)
5719 {
5720    for (unsigned i = 0; i < undef->def.num_components; ++i)
5721       store_ssa_def(ctx, &undef->def, i, dxil_module_get_int32_const(&ctx->mod, 0));
5722    return true;
5723 }
5724 
emit_instr(struct ntd_context * ctx,struct nir_instr * instr)5725 static bool emit_instr(struct ntd_context *ctx, struct nir_instr* instr)
5726 {
5727    switch (instr->type) {
5728    case nir_instr_type_alu:
5729       return emit_alu(ctx, nir_instr_as_alu(instr));
5730    case nir_instr_type_intrinsic:
5731       return emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
5732    case nir_instr_type_load_const:
5733       return emit_load_const(ctx, nir_instr_as_load_const(instr));
5734    case nir_instr_type_deref:
5735       return emit_deref(ctx, nir_instr_as_deref(instr));
5736    case nir_instr_type_jump:
5737       return emit_jump(ctx, nir_instr_as_jump(instr));
5738    case nir_instr_type_phi:
5739       return emit_phi(ctx, nir_instr_as_phi(instr));
5740    case nir_instr_type_tex:
5741       return emit_tex(ctx, nir_instr_as_tex(instr));
5742    case nir_instr_type_undef:
5743       return emit_undefined(ctx, nir_instr_as_undef(instr));
5744    default:
5745       log_nir_instr_unsupported(ctx->logger, "Unimplemented instruction type",
5746                                 instr);
5747       return false;
5748    }
5749 }
5750 
5751 
5752 static bool
emit_block(struct ntd_context * ctx,struct nir_block * block)5753 emit_block(struct ntd_context *ctx, struct nir_block *block)
5754 {
5755    assert(block->index < ctx->mod.cur_emitting_func->num_basic_block_ids);
5756    ctx->mod.cur_emitting_func->basic_block_ids[block->index] = ctx->mod.cur_emitting_func->curr_block;
5757 
5758    nir_foreach_instr(instr, block) {
5759       TRACE_CONVERSION(instr);
5760 
5761       if (!emit_instr(ctx, instr))  {
5762          return false;
5763       }
5764    }
5765    return true;
5766 }
5767 
5768 static bool
5769 emit_cf_list(struct ntd_context *ctx, struct exec_list *list);
5770 
5771 static bool
emit_if(struct ntd_context * ctx,struct nir_if * if_stmt)5772 emit_if(struct ntd_context *ctx, struct nir_if *if_stmt)
5773 {
5774    assert(nir_src_num_components(if_stmt->condition) == 1);
5775    const struct dxil_value *cond = get_src(ctx, &if_stmt->condition, 0,
5776                                            nir_type_bool);
5777    if (!cond)
5778       return false;
5779 
5780    /* prepare blocks */
5781    nir_block *then_block = nir_if_first_then_block(if_stmt);
5782    assert(nir_if_last_then_block(if_stmt)->successors[0]);
5783    assert(!nir_if_last_then_block(if_stmt)->successors[1]);
5784    int then_succ = nir_if_last_then_block(if_stmt)->successors[0]->index;
5785 
5786    nir_block *else_block = NULL;
5787    int else_succ = -1;
5788    if (!exec_list_is_empty(&if_stmt->else_list)) {
5789       else_block = nir_if_first_else_block(if_stmt);
5790       assert(nir_if_last_else_block(if_stmt)->successors[0]);
5791       assert(!nir_if_last_else_block(if_stmt)->successors[1]);
5792       else_succ = nir_if_last_else_block(if_stmt)->successors[0]->index;
5793    }
5794 
5795    if (!emit_cond_branch(ctx, cond, then_block->index,
5796                          else_block ? else_block->index : then_succ))
5797       return false;
5798 
5799    /* handle then-block */
5800    if (!emit_cf_list(ctx, &if_stmt->then_list) ||
5801        (!nir_block_ends_in_jump(nir_if_last_then_block(if_stmt)) &&
5802         !emit_branch(ctx, then_succ)))
5803       return false;
5804 
5805    if (else_block) {
5806       /* handle else-block */
5807       if (!emit_cf_list(ctx, &if_stmt->else_list) ||
5808           (!nir_block_ends_in_jump(nir_if_last_else_block(if_stmt)) &&
5809            !emit_branch(ctx, else_succ)))
5810          return false;
5811    }
5812 
5813    return true;
5814 }
5815 
5816 static bool
emit_loop(struct ntd_context * ctx,nir_loop * loop)5817 emit_loop(struct ntd_context *ctx, nir_loop *loop)
5818 {
5819    assert(!nir_loop_has_continue_construct(loop));
5820    nir_block *first_block = nir_loop_first_block(loop);
5821    nir_block *last_block = nir_loop_last_block(loop);
5822 
5823    assert(last_block->successors[0]);
5824    assert(!last_block->successors[1]);
5825 
5826    if (!emit_branch(ctx, first_block->index))
5827       return false;
5828 
5829    if (!emit_cf_list(ctx, &loop->body))
5830       return false;
5831 
5832    /* If the loop's last block doesn't explicitly jump somewhere, then there's
5833     * an implicit continue that should take it back to the first loop block
5834     */
5835    nir_instr *last_instr = nir_block_last_instr(last_block);
5836    if ((!last_instr || last_instr->type != nir_instr_type_jump) &&
5837        !emit_branch(ctx, first_block->index))
5838       return false;
5839 
5840    return true;
5841 }
5842 
5843 static bool
emit_cf_list(struct ntd_context * ctx,struct exec_list * list)5844 emit_cf_list(struct ntd_context *ctx, struct exec_list *list)
5845 {
5846    foreach_list_typed(nir_cf_node, node, node, list) {
5847       switch (node->type) {
5848       case nir_cf_node_block:
5849          if (!emit_block(ctx, nir_cf_node_as_block(node)))
5850             return false;
5851          break;
5852 
5853       case nir_cf_node_if:
5854          if (!emit_if(ctx, nir_cf_node_as_if(node)))
5855             return false;
5856          break;
5857 
5858       case nir_cf_node_loop:
5859          if (!emit_loop(ctx, nir_cf_node_as_loop(node)))
5860             return false;
5861          break;
5862 
5863       default:
5864          unreachable("unsupported cf-list node");
5865          break;
5866       }
5867    }
5868    return true;
5869 }
5870 
5871 static void
insert_sorted_by_binding(struct exec_list * var_list,nir_variable * new_var)5872 insert_sorted_by_binding(struct exec_list *var_list, nir_variable *new_var)
5873 {
5874    nir_foreach_variable_in_list(var, var_list) {
5875       if (var->data.binding > new_var->data.binding) {
5876          exec_node_insert_node_before(&var->node, &new_var->node);
5877          return;
5878       }
5879    }
5880    exec_list_push_tail(var_list, &new_var->node);
5881 }
5882 
5883 
5884 static void
sort_uniforms_by_binding_and_remove_structs(nir_shader * s)5885 sort_uniforms_by_binding_and_remove_structs(nir_shader *s)
5886 {
5887    struct exec_list new_list;
5888    exec_list_make_empty(&new_list);
5889 
5890    nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) {
5891       exec_node_remove(&var->node);
5892       const struct glsl_type *type = glsl_without_array(var->type);
5893       if (!glsl_type_is_struct(type))
5894          insert_sorted_by_binding(&new_list, var);
5895    }
5896    exec_list_append(&s->variables, &new_list);
5897 }
5898 
5899 static bool
emit_cbvs(struct ntd_context * ctx)5900 emit_cbvs(struct ntd_context *ctx)
5901 {
5902    if (ctx->opts->environment != DXIL_ENVIRONMENT_GL) {
5903       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ubo) {
5904          if (!emit_ubo_var(ctx, var))
5905             return false;
5906       }
5907    } else {
5908       if (ctx->shader->info.num_ubos) {
5909          const unsigned ubo_size = 16384 /*4096 vec4's*/;
5910          uint array_base = ctx->shader->info.first_ubo_is_default_ubo ? 1 : 0;
5911          bool has_ubo0 = ctx->shader->num_uniforms > 0 && ctx->shader->info.first_ubo_is_default_ubo;
5912          bool has_state_vars = ctx->opts->last_ubo_is_not_arrayed;
5913          unsigned ubo1_array_size = ctx->shader->info.num_ubos - array_base -
5914             (has_state_vars ? 1 : 0);
5915 
5916          if (has_ubo0 &&
5917              !emit_cbv(ctx, 0, 0, ubo_size, 1, "__ubo_uniforms"))
5918             return false;
5919          if (ubo1_array_size &&
5920              !emit_cbv(ctx, array_base, 0, ubo_size, ubo1_array_size, "__ubos"))
5921             return false;
5922          if (has_state_vars &&
5923              !emit_cbv(ctx, ctx->shader->info.num_ubos - 1, 0, ubo_size, 1, "__ubo_state_vars"))
5924             return false;
5925       }
5926    }
5927 
5928    return true;
5929 }
5930 
5931 static bool
emit_scratch(struct ntd_context * ctx,nir_function_impl * impl)5932 emit_scratch(struct ntd_context *ctx, nir_function_impl *impl)
5933 {
5934    uint32_t index = 0;
5935    nir_foreach_function_temp_variable(var, impl)
5936       var->data.driver_location = index++;
5937 
5938    if (ctx->scratchvars)
5939       ralloc_free((void *)ctx->scratchvars);
5940 
5941    ctx->scratchvars = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
5942 
5943    nir_foreach_function_temp_variable(var, impl) {
5944       const struct dxil_type *type = get_type_for_glsl_type(&ctx->mod, var->type);
5945       const struct dxil_value *length = dxil_module_get_int32_const(&ctx->mod, 1);
5946       const struct dxil_value *ptr = dxil_emit_alloca(&ctx->mod, type, length, 16);
5947       if (!ptr)
5948          return false;
5949 
5950       ctx->scratchvars[var->data.driver_location] = ptr;
5951    }
5952 
5953    return true;
5954 }
5955 
5956 static bool
emit_function(struct ntd_context * ctx,nir_function * func,nir_function_impl * impl)5957 emit_function(struct ntd_context *ctx, nir_function *func, nir_function_impl *impl)
5958 {
5959    assert(func->num_params == 0);
5960    nir_metadata_require(impl, nir_metadata_block_index);
5961 
5962    const char *attr_keys[2] = { NULL };
5963    const char *attr_values[2] = { NULL };
5964    if (ctx->shader->info.float_controls_execution_mode &
5965        (FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32 | FLOAT_CONTROLS_DENORM_PRESERVE_FP32))
5966       attr_keys[0] = "fp32-denorm-mode";
5967    if (ctx->shader->info.float_controls_execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32)
5968       attr_values[0] = "ftz";
5969    else if (ctx->shader->info.float_controls_execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32)
5970       attr_values[0] = "preserve";
5971 
5972    const struct dxil_type *void_type = dxil_module_get_void_type(&ctx->mod);
5973    const struct dxil_type *func_type = dxil_module_add_function_type(&ctx->mod, void_type, NULL, 0);
5974    struct dxil_func_def *func_def = dxil_add_function_def(&ctx->mod, func->name, func_type, impl->num_blocks, attr_keys, attr_values);
5975    if (!func_def)
5976       return false;
5977 
5978    if (func->is_entrypoint)
5979       ctx->main_func_def = func_def;
5980    else if (func == ctx->tess_ctrl_patch_constant_func)
5981       ctx->tess_ctrl_patch_constant_func_def = func_def;
5982 
5983    ctx->defs = rzalloc_array(ctx->ralloc_ctx, struct dxil_def, impl->ssa_alloc);
5984    ctx->float_types = rzalloc_array(ctx->ralloc_ctx, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc));
5985    ctx->int_types = rzalloc_array(ctx->ralloc_ctx, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc));
5986    if (!ctx->defs || !ctx->float_types || !ctx->int_types)
5987       return false;
5988    ctx->num_defs = impl->ssa_alloc;
5989 
5990    ctx->phis = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
5991    if (!ctx->phis)
5992       return false;
5993 
5994    nir_gather_types(impl, ctx->float_types, ctx->int_types);
5995 
5996    if (!emit_scratch(ctx, impl))
5997       return false;
5998 
5999    if (!emit_static_indexing_handles(ctx))
6000       return false;
6001 
6002    if (!emit_cf_list(ctx, &impl->body))
6003       return false;
6004 
6005    hash_table_foreach(ctx->phis, entry) {
6006       if (!fixup_phi(ctx, (nir_phi_instr *)entry->key,
6007                      (struct phi_block *)entry->data))
6008          return false;
6009    }
6010 
6011    if (!dxil_emit_ret_void(&ctx->mod))
6012       return false;
6013 
6014    ralloc_free(ctx->defs);
6015    ctx->defs = NULL;
6016    _mesa_hash_table_destroy(ctx->phis, NULL);
6017    return true;
6018 }
6019 
6020 static bool
emit_module(struct ntd_context * ctx,const struct nir_to_dxil_options * opts)6021 emit_module(struct ntd_context *ctx, const struct nir_to_dxil_options *opts)
6022 {
6023    /* The validator forces us to emit resources in a specific order:
6024     * CBVs, Samplers, SRVs, UAVs. While we are at it also remove
6025     * stale struct uniforms, they are lowered but might not have been removed */
6026    sort_uniforms_by_binding_and_remove_structs(ctx->shader);
6027 
6028    /* CBVs */
6029    if (!emit_cbvs(ctx))
6030       return false;
6031 
6032    /* Samplers */
6033    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
6034       unsigned count = glsl_type_get_sampler_count(var->type);
6035       assert(count == 0 || glsl_type_is_bare_sampler(glsl_without_array(var->type)));
6036       if (count > 0 && !emit_sampler(ctx, var, count))
6037          return false;
6038    }
6039 
6040    /* SRVs */
6041    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
6042       unsigned count = glsl_type_get_texture_count(var->type);
6043       assert(count == 0 || glsl_type_is_texture(glsl_without_array(var->type)));
6044       if (count > 0 && !emit_srv(ctx, var, count))
6045          return false;
6046    }
6047 
6048    /* Handle read-only SSBOs as SRVs */
6049    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6050       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
6051          if ((var->data.access & ACCESS_NON_WRITEABLE) != 0) {
6052             unsigned count = 1;
6053             if (glsl_type_is_array(var->type))
6054                count = glsl_get_length(var->type);
6055             if (!emit_srv(ctx, var, count))
6056                return false;
6057          }
6058       }
6059    }
6060 
6061    if (!emit_shared_vars(ctx))
6062       return false;
6063    if (!emit_global_consts(ctx))
6064       return false;
6065 
6066    /* UAVs */
6067    if (ctx->shader->info.stage == MESA_SHADER_KERNEL) {
6068       if (!emit_globals(ctx, opts->num_kernel_globals))
6069          return false;
6070 
6071    } else if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6072       /* Handle read/write SSBOs as UAVs */
6073       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
6074          if ((var->data.access & ACCESS_NON_WRITEABLE) == 0) {
6075             unsigned count = 1;
6076             if (glsl_type_is_array(var->type))
6077                count = glsl_get_length(var->type);
6078             if (!emit_uav(ctx, var->data.binding, var->data.descriptor_set,
6079                         count, DXIL_COMP_TYPE_INVALID, 1,
6080                         DXIL_RESOURCE_KIND_RAW_BUFFER, var->data.access, var->name))
6081                return false;
6082 
6083          }
6084       }
6085    } else {
6086       for (unsigned i = 0; i < ctx->shader->info.num_ssbos; ++i) {
6087          char name[64];
6088          snprintf(name, sizeof(name), "__ssbo%d", i);
6089          if (!emit_uav(ctx, i, 0, 1, DXIL_COMP_TYPE_INVALID, 1,
6090                        DXIL_RESOURCE_KIND_RAW_BUFFER, 0, name))
6091             return false;
6092       }
6093       /* To work around a WARP bug, bind these descriptors a second time in descriptor
6094        * space 2. Space 0 will be used for static indexing, while space 2 will be used
6095        * for dynamic indexing. Space 0 will be individual SSBOs in the DXIL shader, while
6096        * space 2 will be a single array.
6097        */
6098       if (ctx->shader->info.num_ssbos &&
6099           !emit_uav(ctx, 0, 2, ctx->shader->info.num_ssbos, DXIL_COMP_TYPE_INVALID, 1,
6100                     DXIL_RESOURCE_KIND_RAW_BUFFER, 0, "__ssbo_dynamic"))
6101          return false;
6102    }
6103 
6104    nir_foreach_image_variable(var, ctx->shader) {
6105       if (!emit_uav_var(ctx, var, glsl_type_get_image_count(var->type)))
6106          return false;
6107    }
6108 
6109    ctx->mod.info.has_per_sample_input =
6110       BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
6111       ctx->shader->info.fs.uses_sample_shading ||
6112       ctx->shader->info.fs.uses_sample_qualifier;
6113    if (!ctx->mod.info.has_per_sample_input && ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
6114       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in | nir_var_system_value) {
6115          if (var->data.sample) {
6116             ctx->mod.info.has_per_sample_input = true;
6117             break;
6118          }
6119       }
6120    }
6121 
6122    /* From the Vulkan spec 1.3.238, section 15.8:
6123     * When Sample Shading is enabled, the x and y components of FragCoord reflect the location
6124     * of one of the samples corresponding to the shader invocation.
6125     *
6126     * In other words, if the fragment shader is executing per-sample, then the position variable
6127     * should always be per-sample,
6128     *
6129     * Also:
6130     * The Centroid interpolation decoration is ignored, but allowed, on FragCoord.
6131     */
6132    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6133       nir_variable *pos_var = nir_find_variable_with_location(ctx->shader, nir_var_shader_in, VARYING_SLOT_POS);
6134       if (pos_var) {
6135          if (ctx->mod.info.has_per_sample_input)
6136             pos_var->data.sample = true;
6137          pos_var->data.centroid = false;
6138       }
6139    }
6140 
6141    unsigned input_clip_size = ctx->mod.shader_kind == DXIL_PIXEL_SHADER ?
6142       ctx->shader->info.clip_distance_array_size : ctx->opts->input_clip_size;
6143    preprocess_signatures(&ctx->mod, ctx->shader, input_clip_size);
6144 
6145    nir_foreach_function_with_impl(func, impl, ctx->shader) {
6146       if (!emit_function(ctx, func, impl))
6147          return false;
6148    }
6149 
6150    if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
6151       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_out) {
6152          if (var->data.location == FRAG_RESULT_STENCIL) {
6153             ctx->mod.feats.stencil_ref = true;
6154          }
6155       }
6156    } else if (ctx->shader->info.stage == MESA_SHADER_VERTEX ||
6157               ctx->shader->info.stage == MESA_SHADER_TESS_EVAL) {
6158       if (ctx->shader->info.outputs_written &
6159           (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER))
6160          ctx->mod.feats.array_layer_from_vs_or_ds = true;
6161    } else if (ctx->shader->info.stage == MESA_SHADER_GEOMETRY ||
6162               ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) {
6163       if (ctx->shader->info.inputs_read &
6164           (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER))
6165          ctx->mod.feats.array_layer_from_vs_or_ds = true;
6166    }
6167 
6168    if (ctx->mod.feats.native_low_precision && ctx->mod.minor_version < 2) {
6169       ctx->logger->log(ctx->logger->priv,
6170                        "Shader uses 16bit, which requires shader model 6.2, but 6.2 is unsupported\n");
6171       return false;
6172    }
6173 
6174    return emit_metadata(ctx) &&
6175           dxil_emit_module(&ctx->mod);
6176 }
6177 
6178 static unsigned int
get_dxil_shader_kind(struct nir_shader * s)6179 get_dxil_shader_kind(struct nir_shader *s)
6180 {
6181    switch (s->info.stage) {
6182    case MESA_SHADER_VERTEX:
6183       return DXIL_VERTEX_SHADER;
6184    case MESA_SHADER_TESS_CTRL:
6185       return DXIL_HULL_SHADER;
6186    case MESA_SHADER_TESS_EVAL:
6187       return DXIL_DOMAIN_SHADER;
6188    case MESA_SHADER_GEOMETRY:
6189       return DXIL_GEOMETRY_SHADER;
6190    case MESA_SHADER_FRAGMENT:
6191       return DXIL_PIXEL_SHADER;
6192    case MESA_SHADER_KERNEL:
6193    case MESA_SHADER_COMPUTE:
6194       return DXIL_COMPUTE_SHADER;
6195    default:
6196       unreachable("unknown shader stage in nir_to_dxil");
6197       return DXIL_COMPUTE_SHADER;
6198    }
6199 }
6200 
6201 static unsigned
lower_bit_size_callback(const nir_instr * instr,void * data)6202 lower_bit_size_callback(const nir_instr* instr, void *data)
6203 {
6204    if (instr->type != nir_instr_type_alu)
6205       return 0;
6206    nir_alu_instr *alu = nir_instr_as_alu(instr);
6207 
6208    if (nir_op_infos[alu->op].is_conversion)
6209       return 0;
6210 
6211    if (nir_op_is_vec_or_mov(alu->op))
6212       return 0;
6213 
6214    unsigned num_inputs = nir_op_infos[alu->op].num_inputs;
6215    const struct nir_to_dxil_options *opts = (const struct nir_to_dxil_options*)data;
6216    unsigned min_bit_size = opts->lower_int16 ? 32 : 16;
6217 
6218    unsigned ret = 0;
6219    for (unsigned i = 0; i < num_inputs; i++) {
6220       unsigned bit_size = nir_src_bit_size(alu->src[i].src);
6221       if (bit_size != 1 && bit_size < min_bit_size)
6222          ret = min_bit_size;
6223    }
6224 
6225    return ret;
6226 }
6227 
6228 static bool
vectorize_filter(unsigned align_mul,unsigned align_offset,unsigned bit_size,unsigned num_components,int64_t hole_size,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * data)6229 vectorize_filter(
6230    unsigned align_mul,
6231    unsigned align_offset,
6232    unsigned bit_size,
6233    unsigned num_components,
6234    int64_t hole_size,
6235    nir_intrinsic_instr *low, nir_intrinsic_instr *high,
6236    void *data)
6237 {
6238    return hole_size <= 0 && util_is_power_of_two_nonzero(num_components);
6239 }
6240 
6241 struct lower_mem_bit_sizes_data {
6242    const nir_shader_compiler_options *nir_options;
6243    const struct nir_to_dxil_options *dxil_options;
6244 };
6245 
6246 static nir_mem_access_size_align
lower_mem_access_bit_sizes_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size_in,uint32_t align_mul,uint32_t align_offset,bool offset_is_const,enum gl_access_qualifier access,const void * cb_data)6247 lower_mem_access_bit_sizes_cb(nir_intrinsic_op intrin,
6248                               uint8_t bytes,
6249                               uint8_t bit_size_in,
6250                               uint32_t align_mul,
6251                               uint32_t align_offset,
6252                               bool offset_is_const,
6253                               enum gl_access_qualifier access,
6254                               const void *cb_data)
6255 {
6256    const struct lower_mem_bit_sizes_data *data = cb_data;
6257    unsigned max_bit_size = 32;
6258    unsigned min_bit_size = data->dxil_options->lower_int16 ? 32 : 16;
6259    unsigned closest_bit_size = MAX2(min_bit_size, MIN2(max_bit_size, bit_size_in));
6260    if (intrin == nir_intrinsic_load_ubo) {
6261       /* UBO loads can be done at whatever (supported) bit size, but require 16 byte
6262        * alignment and can load up to 16 bytes per instruction. However this pass requires
6263        * loading 16 bytes of data to get 16-byte alignment. We're going to run lower_ubo_vec4
6264        * which can deal with unaligned vec4s, so for this pass let's just deal with bit size
6265        * and total size restrictions. */
6266       return (nir_mem_access_size_align) {
6267          .align = closest_bit_size / 8,
6268          .bit_size = closest_bit_size,
6269          .num_components = DIV_ROUND_UP(MIN2(bytes, 16) * 8, closest_bit_size),
6270          .shift = nir_mem_access_shift_method_scalar,
6271       };
6272    }
6273 
6274    assert(intrin == nir_intrinsic_load_ssbo || intrin == nir_intrinsic_store_ssbo);
6275    uint32_t align = nir_combined_align(align_mul, align_offset);
6276    if (align < min_bit_size / 8) {
6277       /* Unaligned load/store, use the minimum bit size, up to 4 components */
6278       unsigned ideal_num_components = intrin == nir_intrinsic_load_ssbo ?
6279          DIV_ROUND_UP(bytes * 8, min_bit_size) :
6280          (32 / min_bit_size);
6281       return (nir_mem_access_size_align) {
6282          .align = min_bit_size / 8,
6283          .bit_size = min_bit_size,
6284          .num_components = MIN2(4, ideal_num_components),
6285          .shift = nir_mem_access_shift_method_scalar,
6286       };
6287    }
6288 
6289    /* Increase/decrease bit size to try to get closer to the requested byte size/align */
6290    unsigned bit_size = closest_bit_size;
6291    unsigned target = MIN2(bytes, align);
6292    while (target < bit_size / 8 && bit_size > min_bit_size)
6293       bit_size /= 2;
6294    while (target > bit_size / 8 * 4 && bit_size < max_bit_size)
6295       bit_size *= 2;
6296 
6297    /* This is the best we can do */
6298    unsigned num_components = intrin == nir_intrinsic_load_ssbo ?
6299       DIV_ROUND_UP(bytes * 8, bit_size) :
6300       MAX2(1, (bytes * 8 / bit_size));
6301    return (nir_mem_access_size_align) {
6302       .align = bit_size / 8,
6303       .bit_size = bit_size,
6304       .num_components = MIN2(4, num_components),
6305       .shift = nir_mem_access_shift_method_scalar,
6306    };
6307 }
6308 
6309 static void
optimize_nir(struct nir_shader * s,const struct nir_to_dxil_options * opts)6310 optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts)
6311 {
6312    bool progress;
6313    do {
6314       progress = false;
6315       NIR_PASS_V(s, nir_lower_vars_to_ssa);
6316       NIR_PASS(progress, s, nir_lower_indirect_derefs, nir_var_function_temp, 4);
6317       NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
6318       NIR_PASS(progress, s, nir_copy_prop);
6319       NIR_PASS(progress, s, nir_opt_copy_prop_vars);
6320       NIR_PASS(progress, s, nir_lower_bit_size, lower_bit_size_callback, (void*)opts);
6321       NIR_PASS(progress, s, dxil_nir_lower_8bit_conv);
6322       if (opts->lower_int16)
6323          NIR_PASS(progress, s, dxil_nir_lower_16bit_conv);
6324       NIR_PASS(progress, s, nir_opt_remove_phis);
6325       NIR_PASS(progress, s, nir_opt_dce);
6326       NIR_PASS(progress, s, nir_opt_if,
6327                nir_opt_if_optimize_phi_true_false | nir_opt_if_avoid_64bit_phis);
6328       NIR_PASS(progress, s, nir_opt_dead_cf);
6329       NIR_PASS(progress, s, nir_opt_cse);
6330       NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
6331       NIR_PASS(progress, s, nir_opt_algebraic);
6332       NIR_PASS(progress, s, dxil_nir_algebraic);
6333       if (s->options->lower_int64_options)
6334          NIR_PASS(progress, s, nir_lower_int64);
6335       NIR_PASS(progress, s, nir_lower_alu);
6336       NIR_PASS(progress, s, nir_opt_constant_folding);
6337       NIR_PASS(progress, s, nir_opt_undef);
6338       NIR_PASS(progress, s, nir_opt_deref);
6339       NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16);
6340       NIR_PASS(progress, s, nir_lower_64bit_phis);
6341       NIR_PASS(progress, s, nir_lower_phis_to_scalar, true);
6342       NIR_PASS(progress, s, nir_opt_loop_unroll);
6343       NIR_PASS(progress, s, nir_lower_pack);
6344       NIR_PASS(progress, s, dxil_nir_remove_oob_array_accesses);
6345       NIR_PASS_V(s, nir_lower_system_values);
6346    } while (progress);
6347 
6348    do {
6349       progress = false;
6350       NIR_PASS(progress, s, nir_opt_algebraic_late);
6351    } while (progress);
6352 
6353    NIR_PASS_V(s, nir_lower_undef_to_zero);
6354 }
6355 
6356 static
dxil_fill_validation_state(struct ntd_context * ctx,struct dxil_validation_state * state)6357 void dxil_fill_validation_state(struct ntd_context *ctx,
6358                                 struct dxil_validation_state *state)
6359 {
6360    unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
6361       sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
6362    state->num_resources = ctx->resources.size / resource_element_size;
6363    state->resources.v0 = (struct dxil_resource_v0*)ctx->resources.data;
6364    if (ctx->shader->info.subgroup_size >= SUBGROUP_SIZE_REQUIRE_4) {
6365       state->state.psv1.psv0.max_expected_wave_lane_count = ctx->shader->info.subgroup_size;
6366       state->state.psv1.psv0.min_expected_wave_lane_count = ctx->shader->info.subgroup_size;
6367    } else {
6368       state->state.psv1.psv0.max_expected_wave_lane_count = UINT_MAX;
6369    }
6370    state->state.psv1.shader_stage = (uint8_t)ctx->mod.shader_kind;
6371    state->state.psv1.uses_view_id = (uint8_t)ctx->mod.feats.view_id;
6372    state->state.psv1.sig_input_elements = (uint8_t)ctx->mod.num_sig_inputs;
6373    state->state.psv1.sig_output_elements = (uint8_t)ctx->mod.num_sig_outputs;
6374    state->state.psv1.sig_patch_const_or_prim_elements = (uint8_t)ctx->mod.num_sig_patch_consts;
6375 
6376    switch (ctx->mod.shader_kind) {
6377    case DXIL_VERTEX_SHADER:
6378       state->state.psv1.psv0.vs.output_position_present = ctx->mod.info.has_out_position;
6379       break;
6380    case DXIL_PIXEL_SHADER:
6381       /* TODO: handle depth outputs */
6382       state->state.psv1.psv0.ps.depth_output = ctx->mod.info.has_out_depth;
6383       state->state.psv1.psv0.ps.sample_frequency =
6384          ctx->mod.info.has_per_sample_input;
6385       break;
6386    case DXIL_COMPUTE_SHADER:
6387       state->state.num_threads_x = MAX2(ctx->shader->info.workgroup_size[0], 1);
6388       state->state.num_threads_y = MAX2(ctx->shader->info.workgroup_size[1], 1);
6389       state->state.num_threads_z = MAX2(ctx->shader->info.workgroup_size[2], 1);
6390       break;
6391    case DXIL_GEOMETRY_SHADER:
6392       state->state.psv1.max_vertex_count = ctx->shader->info.gs.vertices_out;
6393       state->state.psv1.psv0.gs.input_primitive = dxil_get_input_primitive(ctx->shader->info.gs.input_primitive);
6394       state->state.psv1.psv0.gs.output_toplology = dxil_get_primitive_topology(ctx->shader->info.gs.output_primitive);
6395       state->state.psv1.psv0.gs.output_stream_mask = MAX2(ctx->shader->info.gs.active_stream_mask, 1);
6396       state->state.psv1.psv0.gs.output_position_present = ctx->mod.info.has_out_position;
6397       break;
6398    case DXIL_HULL_SHADER:
6399       state->state.psv1.psv0.hs.input_control_point_count = ctx->tess_input_control_point_count;
6400       state->state.psv1.psv0.hs.output_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
6401       state->state.psv1.psv0.hs.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
6402       state->state.psv1.psv0.hs.tessellator_output_primitive = get_tessellator_output_primitive(&ctx->shader->info);
6403       state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
6404       break;
6405    case DXIL_DOMAIN_SHADER:
6406       state->state.psv1.psv0.ds.input_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
6407       state->state.psv1.psv0.ds.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
6408       state->state.psv1.psv0.ds.output_position_present = ctx->mod.info.has_out_position;
6409       state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
6410       break;
6411    default:
6412       assert(0 && "Shader type not (yet) supported");
6413    }
6414 }
6415 
6416 static nir_variable *
add_sysvalue(struct ntd_context * ctx,uint8_t value,char * name,int driver_location)6417 add_sysvalue(struct ntd_context *ctx,
6418               uint8_t value, char *name,
6419               int driver_location)
6420 {
6421 
6422    nir_variable *var = rzalloc(ctx->shader, nir_variable);
6423    if (!var)
6424       return NULL;
6425    var->data.driver_location = driver_location;
6426    var->data.location = value;
6427    var->type = glsl_uint_type();
6428    var->name = name;
6429    var->data.mode = nir_var_system_value;
6430    var->data.interpolation = INTERP_MODE_FLAT;
6431    return var;
6432 }
6433 
6434 static bool
append_input_or_sysvalue(struct ntd_context * ctx,int input_loc,int sv_slot,char * name,int driver_location)6435 append_input_or_sysvalue(struct ntd_context *ctx,
6436                          int input_loc,  int sv_slot,
6437                          char *name, int driver_location)
6438 {
6439    if (input_loc >= 0) {
6440       /* Check inputs whether a variable is available the corresponds
6441        * to the sysvalue */
6442       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
6443          if (var->data.location == input_loc) {
6444             ctx->system_value[sv_slot] = var;
6445             return true;
6446          }
6447       }
6448    }
6449 
6450    ctx->system_value[sv_slot] = add_sysvalue(ctx, sv_slot, name, driver_location);
6451    if (!ctx->system_value[sv_slot])
6452       return false;
6453 
6454    nir_shader_add_variable(ctx->shader, ctx->system_value[sv_slot]);
6455    return true;
6456 }
6457 
6458 struct sysvalue_name {
6459    gl_system_value value;
6460    int slot;
6461    char *name;
6462    gl_shader_stage only_in_shader;
6463 } possible_sysvalues[] = {
6464    {SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, -1, "SV_VertexID", MESA_SHADER_NONE},
6465    {SYSTEM_VALUE_INSTANCE_ID, -1, "SV_InstanceID", MESA_SHADER_NONE},
6466    {SYSTEM_VALUE_FRONT_FACE, VARYING_SLOT_FACE, "SV_IsFrontFace", MESA_SHADER_NONE},
6467    {SYSTEM_VALUE_PRIMITIVE_ID, VARYING_SLOT_PRIMITIVE_ID, "SV_PrimitiveID", MESA_SHADER_GEOMETRY},
6468    {SYSTEM_VALUE_SAMPLE_ID, -1, "SV_SampleIndex", MESA_SHADER_NONE},
6469 };
6470 
6471 static bool
allocate_sysvalues(struct ntd_context * ctx)6472 allocate_sysvalues(struct ntd_context *ctx)
6473 {
6474    unsigned driver_location = 0;
6475    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in)
6476       driver_location = MAX2(driver_location, var->data.driver_location + 1);
6477    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_system_value)
6478       driver_location = MAX2(driver_location, var->data.driver_location + 1);
6479 
6480    if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
6481        !BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID)) {
6482       bool need_sample_id = ctx->shader->info.fs.uses_sample_shading;
6483 
6484       /* "var->data.sample = true" sometimes just mean, "I want per-sample
6485        * shading", which explains why we can end up with vars having flat
6486        * interpolation with the per-sample bit set. If there's only such
6487        * type of variables, we need to tell DXIL that we read SV_SampleIndex
6488        * to make DXIL validation happy.
6489        */
6490       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
6491          bool var_can_be_sample_rate = !var->data.centroid && var->data.interpolation != INTERP_MODE_FLAT;
6492          /* If there's an input that will actually force sample-rate shading, then we don't
6493           * need SV_SampleIndex. */
6494          if (var->data.sample && var_can_be_sample_rate) {
6495             need_sample_id = false;
6496             break;
6497          }
6498          /* If there's an input that wants to be sample-rate, but can't be, then we might
6499           * need SV_SampleIndex. */
6500          if (var->data.sample && !var_can_be_sample_rate)
6501             need_sample_id = true;
6502       }
6503 
6504       if (need_sample_id)
6505          BITSET_SET(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
6506    }
6507 
6508    for (unsigned i = 0; i < ARRAY_SIZE(possible_sysvalues); ++i) {
6509       struct sysvalue_name *info = &possible_sysvalues[i];
6510       if (info->only_in_shader != MESA_SHADER_NONE &&
6511           info->only_in_shader != ctx->shader->info.stage)
6512          continue;
6513       if (BITSET_TEST(ctx->shader->info.system_values_read, info->value)) {
6514          if (!append_input_or_sysvalue(ctx, info->slot,
6515                                        info->value, info->name,
6516                                        driver_location++))
6517             return false;
6518       }
6519    }
6520    return true;
6521 }
6522 
6523 static int
type_size_vec4(const struct glsl_type * type,bool bindless)6524 type_size_vec4(const struct glsl_type *type, bool bindless)
6525 {
6526    return glsl_count_attribute_slots(type, false);
6527 }
6528 
6529 static const unsigned dxil_validator_min_capable_version = DXIL_VALIDATOR_1_4;
6530 static const unsigned dxil_validator_max_capable_version = DXIL_VALIDATOR_1_8;
6531 static const unsigned dxil_min_shader_model = SHADER_MODEL_6_0;
6532 static const unsigned dxil_max_shader_model = SHADER_MODEL_6_8;
6533 
6534 bool
nir_to_dxil(struct nir_shader * s,const struct nir_to_dxil_options * opts,const struct dxil_logger * logger,struct blob * blob)6535 nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts,
6536             const struct dxil_logger *logger, struct blob *blob)
6537 {
6538    assert(opts);
6539    bool retval = true;
6540    debug_dxil = (int)debug_get_option_debug_dxil();
6541    blob_init(blob);
6542 
6543    if (opts->shader_model_max < dxil_min_shader_model) {
6544       debug_printf("D3D12: cannot support emitting shader models lower than %d.%d\n",
6545                    dxil_min_shader_model >> 16,
6546                    dxil_min_shader_model & 0xffff);
6547       return false;
6548    }
6549 
6550    if (opts->shader_model_max > dxil_max_shader_model) {
6551       debug_printf("D3D12: cannot support emitting higher than shader model %d.%d\n",
6552                    dxil_max_shader_model >> 16,
6553                    dxil_max_shader_model & 0xffff);
6554       return false;
6555    }
6556 
6557    if (opts->validator_version_max != NO_DXIL_VALIDATION &&
6558        opts->validator_version_max < dxil_validator_min_capable_version) {
6559       debug_printf("D3D12: Invalid validator version %d.%d, must be 1.4 or greater\n",
6560          opts->validator_version_max >> 16,
6561          opts->validator_version_max & 0xffff);
6562       return false;
6563    }
6564 
6565    /* If no validation, write a blob as if it was going to be validated by the newest understood validator.
6566     * Same if the validator is newer than we know how to write for.
6567     */
6568    uint32_t validator_version =
6569       opts->validator_version_max == NO_DXIL_VALIDATION ||
6570       opts->validator_version_max > dxil_validator_max_capable_version ?
6571       dxil_validator_max_capable_version : opts->validator_version_max;
6572 
6573    struct ntd_context *ctx = calloc(1, sizeof(*ctx));
6574    if (!ctx)
6575       return false;
6576 
6577    ctx->opts = opts;
6578    ctx->shader = s;
6579    ctx->logger = logger ? logger : &default_logger;
6580 
6581    ctx->ralloc_ctx = ralloc_context(NULL);
6582    if (!ctx->ralloc_ctx) {
6583       retval = false;
6584       goto out;
6585    }
6586 
6587    util_dynarray_init(&ctx->srv_metadata_nodes, ctx->ralloc_ctx);
6588    util_dynarray_init(&ctx->uav_metadata_nodes, ctx->ralloc_ctx);
6589    util_dynarray_init(&ctx->cbv_metadata_nodes, ctx->ralloc_ctx);
6590    util_dynarray_init(&ctx->sampler_metadata_nodes, ctx->ralloc_ctx);
6591    util_dynarray_init(&ctx->resources, ctx->ralloc_ctx);
6592    dxil_module_init(&ctx->mod, ctx->ralloc_ctx);
6593    ctx->mod.shader_kind = get_dxil_shader_kind(s);
6594    ctx->mod.major_version = 6;
6595    /* Use the highest shader model that's supported and can be validated */
6596    ctx->mod.minor_version =
6597       MIN2(opts->shader_model_max & 0xffff, validator_version & 0xffff);
6598    ctx->mod.major_validator = validator_version >> 16;
6599    ctx->mod.minor_validator = validator_version & 0xffff;
6600 
6601    if (s->info.stage <= MESA_SHADER_FRAGMENT) {
6602       uint64_t in_mask =
6603          s->info.stage == MESA_SHADER_VERTEX ?
6604          0 : (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
6605       uint64_t out_mask =
6606          s->info.stage == MESA_SHADER_FRAGMENT ?
6607          ((1ull << FRAG_RESULT_STENCIL) | (1ull << FRAG_RESULT_SAMPLE_MASK)) :
6608          (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
6609 
6610       NIR_PASS_V(s, dxil_nir_fix_io_uint_type, in_mask, out_mask);
6611    }
6612 
6613    NIR_PASS_V(s, dxil_nir_lower_fquantize2f16);
6614    NIR_PASS_V(s, nir_lower_frexp);
6615    NIR_PASS_V(s, nir_lower_flrp, 16 | 32 | 64, true);
6616    NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32);
6617    NIR_PASS_V(s, dxil_nir_ensure_position_writes);
6618    NIR_PASS_V(s, dxil_nir_lower_system_values);
6619    NIR_PASS_V(s, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_system_value | nir_var_shader_out, NULL, NULL);
6620 
6621    /* Do a round of optimization to try to vectorize loads/stores. Otherwise the addresses used for loads
6622     * might be too opaque for the pass to see that they're next to each other. */
6623    optimize_nir(s, opts);
6624 
6625 /* Vectorize UBO/SSBO accesses aggressively. This can help increase alignment to enable us to do better
6626     * chunking of loads and stores after lowering bit sizes. Ignore load/store size limitations here, we'll
6627     * address them with lower_mem_access_bit_sizes */
6628    nir_load_store_vectorize_options vectorize_opts = {
6629       .callback = vectorize_filter,
6630       .modes = nir_var_mem_ubo | nir_var_mem_ssbo,
6631    };
6632    NIR_PASS_V(s, nir_opt_load_store_vectorize, &vectorize_opts);
6633 
6634    /* Now that they're bloated to the max, address bit size restrictions and overall size limitations for
6635     * a single load/store op. */
6636    struct lower_mem_bit_sizes_data mem_size_data = { s->options, opts };
6637    nir_lower_mem_access_bit_sizes_options mem_size_options = {
6638       .modes = nir_var_mem_ubo | nir_var_mem_ssbo,
6639       .callback = lower_mem_access_bit_sizes_cb,
6640       .may_lower_unaligned_stores_to_atomics = true,
6641       .cb_data = &mem_size_data
6642    };
6643    NIR_PASS_V(s, nir_lower_mem_access_bit_sizes, &mem_size_options);
6644 
6645    /* Lastly, conver byte-address UBO loads to vec-addressed. This pass can also deal with selecting sub-
6646     * components from the load and dealing with vec-straddling loads. */
6647    NIR_PASS_V(s, nir_lower_ubo_vec4);
6648 
6649    if (opts->shader_model_max < SHADER_MODEL_6_6) {
6650       /* In a later pass, load_helper_invocation will be lowered to sample mask based fallback,
6651        * so both load- and is- will be emulated eventually.
6652        */
6653       NIR_PASS_V(s, nir_lower_is_helper_invocation);
6654    }
6655 
6656    if (ctx->mod.shader_kind == DXIL_HULL_SHADER)
6657       NIR_PASS_V(s, dxil_nir_split_tess_ctrl, &ctx->tess_ctrl_patch_constant_func);
6658 
6659    if (ctx->mod.shader_kind == DXIL_HULL_SHADER ||
6660        ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
6661       /* Make sure any derefs are gone after lower_io before updating tess level vars */
6662       NIR_PASS_V(s, nir_opt_dce);
6663       NIR_PASS_V(s, dxil_nir_fixup_tess_level_for_domain);
6664    }
6665 
6666    optimize_nir(s, opts);
6667 
6668    NIR_PASS_V(s, nir_remove_dead_variables,
6669               nir_var_function_temp | nir_var_mem_constant | nir_var_mem_shared, NULL);
6670 
6671    if (!allocate_sysvalues(ctx))
6672       return false;
6673 
6674    NIR_PASS_V(s, dxil_nir_lower_sysval_to_load_input, ctx->system_value);
6675    NIR_PASS_V(s, nir_opt_dce);
6676 
6677    /* This needs to be after any copy prop is done to prevent these movs from being erased */
6678    NIR_PASS_V(s, dxil_nir_move_consts);
6679    NIR_PASS_V(s, nir_opt_dce);
6680 
6681    NIR_PASS_V(s, dxil_nir_guess_image_formats);
6682 
6683    if (debug_dxil & DXIL_DEBUG_VERBOSE)
6684       nir_print_shader(s, stderr);
6685 
6686    if (!emit_module(ctx, opts)) {
6687       debug_printf("D3D12: dxil_container_add_module failed\n");
6688       retval = false;
6689       goto out;
6690    }
6691 
6692    if (debug_dxil & DXIL_DEBUG_DUMP_MODULE) {
6693       struct dxil_dumper *dumper = dxil_dump_create();
6694       dxil_dump_module(dumper, &ctx->mod);
6695       fprintf(stderr, "\n");
6696       dxil_dump_buf_to_file(dumper, stderr);
6697       fprintf(stderr, "\n\n");
6698       dxil_dump_free(dumper);
6699    }
6700 
6701    struct dxil_container container;
6702    dxil_container_init(&container);
6703    /* Native low precision disables min-precision */
6704    if (ctx->mod.feats.native_low_precision)
6705       ctx->mod.feats.min_precision = false;
6706    if (!dxil_container_add_features(&container, &ctx->mod.feats)) {
6707       debug_printf("D3D12: dxil_container_add_features failed\n");
6708       retval = false;
6709       goto out;
6710    }
6711 
6712    if (!dxil_container_add_io_signature(&container,
6713                                         DXIL_ISG1,
6714                                         ctx->mod.num_sig_inputs,
6715                                         ctx->mod.inputs,
6716                                         ctx->mod.minor_validator >= 7)) {
6717       debug_printf("D3D12: failed to write input signature\n");
6718       retval = false;
6719       goto out;
6720    }
6721 
6722    if (!dxil_container_add_io_signature(&container,
6723                                         DXIL_OSG1,
6724                                         ctx->mod.num_sig_outputs,
6725                                         ctx->mod.outputs,
6726                                         ctx->mod.minor_validator >= 7)) {
6727       debug_printf("D3D12: failed to write output signature\n");
6728       retval = false;
6729       goto out;
6730    }
6731 
6732    if ((ctx->mod.shader_kind == DXIL_HULL_SHADER ||
6733         ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) &&
6734        !dxil_container_add_io_signature(&container,
6735                                         DXIL_PSG1,
6736                                         ctx->mod.num_sig_patch_consts,
6737                                         ctx->mod.patch_consts,
6738                                         ctx->mod.minor_validator >= 7)) {
6739       debug_printf("D3D12: failed to write patch constant signature\n");
6740       retval = false;
6741       goto out;
6742    }
6743 
6744    struct dxil_validation_state validation_state;
6745    memset(&validation_state, 0, sizeof(validation_state));
6746    dxil_fill_validation_state(ctx, &validation_state);
6747 
6748    if (!dxil_container_add_state_validation(&container,&ctx->mod,
6749                                             &validation_state)) {
6750       debug_printf("D3D12: failed to write state-validation\n");
6751       retval = false;
6752       goto out;
6753    }
6754 
6755    if (!dxil_container_add_module(&container, &ctx->mod)) {
6756       debug_printf("D3D12: failed to write module\n");
6757       retval = false;
6758       goto out;
6759    }
6760 
6761    if (!dxil_container_write(&container, blob)) {
6762       debug_printf("D3D12: dxil_container_write failed\n");
6763       retval = false;
6764       goto out;
6765    }
6766    dxil_container_finish(&container);
6767 
6768    if (debug_dxil & DXIL_DEBUG_DUMP_BLOB) {
6769       static int shader_id = 0;
6770       char buffer[64];
6771       snprintf(buffer, sizeof(buffer), "shader_%s_%d.blob",
6772                get_shader_kind_str(ctx->mod.shader_kind), shader_id++);
6773       debug_printf("Try to write blob to %s\n", buffer);
6774       FILE *f = fopen(buffer, "wb");
6775       if (f) {
6776          fwrite(blob->data, 1, blob->size, f);
6777          fclose(f);
6778       }
6779    }
6780 
6781 out:
6782    dxil_module_release(&ctx->mod);
6783    ralloc_free(ctx->ralloc_ctx);
6784    free(ctx);
6785    return retval;
6786 }
6787