• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir_to_dxil.h"
25 
26 #include "dxil_container.h"
27 #include "dxil_dump.h"
28 #include "dxil_enums.h"
29 #include "dxil_function.h"
30 #include "dxil_module.h"
31 #include "dxil_nir.h"
32 #include "dxil_signature.h"
33 
34 #include "nir/nir_builder.h"
35 #include "nir_deref.h"
36 #include "util/ralloc.h"
37 #include "util/u_debug.h"
38 #include "util/u_dynarray.h"
39 #include "util/u_math.h"
40 
41 #include "git_sha1.h"
42 
43 #include "vulkan/vulkan_core.h"
44 
45 #include <stdint.h>
46 
47 int debug_dxil = 0;
48 
49 static const struct debug_named_value
50 dxil_debug_options[] = {
51    { "verbose", DXIL_DEBUG_VERBOSE, NULL },
52    { "dump_blob",  DXIL_DEBUG_DUMP_BLOB , "Write shader blobs" },
53    { "trace",  DXIL_DEBUG_TRACE , "Trace instruction conversion" },
54    { "dump_module", DXIL_DEBUG_DUMP_MODULE, "dump module tree to stderr"},
55    DEBUG_NAMED_VALUE_END
56 };
57 
58 DEBUG_GET_ONCE_FLAGS_OPTION(debug_dxil, "DXIL_DEBUG", dxil_debug_options, 0)
59 
60 static void
log_nir_instr_unsupported(const struct dxil_logger * logger,const char * message_prefix,const nir_instr * instr)61 log_nir_instr_unsupported(const struct dxil_logger *logger,
62                           const char *message_prefix, const nir_instr *instr)
63 {
64    char *msg = NULL;
65    char *instr_str = nir_instr_as_str(instr, NULL);
66    asprintf(&msg, "%s: %s\n", message_prefix, instr_str);
67    ralloc_free(instr_str);
68    assert(msg);
69    logger->log(logger->priv, msg);
70    free(msg);
71 }
72 
73 static void
default_logger_func(void * priv,const char * msg)74 default_logger_func(void *priv, const char *msg)
75 {
76    fprintf(stderr, "%s", msg);
77    unreachable("Unhandled error");
78 }
79 
80 static const struct dxil_logger default_logger = { .priv = NULL, .log = default_logger_func };
81 
82 #define TRACE_CONVERSION(instr) \
83    if (debug_dxil & DXIL_DEBUG_TRACE) \
84       do { \
85          fprintf(stderr, "Convert '"); \
86          nir_print_instr(instr, stderr); \
87          fprintf(stderr, "'\n"); \
88       } while (0)
89 
90 static const nir_shader_compiler_options
91 nir_options = {
92    .lower_ineg = true,
93    .lower_fneg = true,
94    .lower_ffma16 = true,
95    .lower_ffma32 = true,
96    .lower_isign = true,
97    .lower_fsign = true,
98    .lower_iabs = true,
99    .lower_fmod = true,
100    .lower_fpow = true,
101    .lower_scmp = true,
102    .lower_ldexp = true,
103    .lower_flrp16 = true,
104    .lower_flrp32 = true,
105    .lower_flrp64 = true,
106    .lower_bitfield_extract = true,
107    .lower_ifind_msb = true,
108    .lower_ufind_msb = true,
109    .lower_extract_word = true,
110    .lower_extract_byte = true,
111    .lower_insert_word = true,
112    .lower_insert_byte = true,
113    .lower_all_io_to_elements = true,
114    .lower_hadd = true,
115    .lower_uadd_sat = true,
116    .lower_usub_sat = true,
117    .lower_iadd_sat = true,
118    .lower_uadd_carry = true,
119    .lower_usub_borrow = true,
120    .lower_mul_high = true,
121    .lower_pack_half_2x16 = true,
122    .lower_pack_unorm_4x8 = true,
123    .lower_pack_snorm_4x8 = true,
124    .lower_pack_snorm_2x16 = true,
125    .lower_pack_unorm_2x16 = true,
126    .lower_pack_64_2x32_split = true,
127    .lower_pack_32_2x16_split = true,
128    .lower_pack_64_4x16 = true,
129    .lower_unpack_64_2x32_split = true,
130    .lower_unpack_32_2x16_split = true,
131    .lower_unpack_half_2x16 = true,
132    .lower_unpack_snorm_2x16 = true,
133    .lower_unpack_snorm_4x8 = true,
134    .lower_unpack_unorm_2x16 = true,
135    .lower_unpack_unorm_4x8 = true,
136    .lower_interpolate_at = true,
137    .has_fsub = true,
138    .has_isub = true,
139    .has_bfe = true,
140    .has_find_msb_rev = true,
141    .vertex_id_zero_based = true,
142    .lower_base_vertex = true,
143    .lower_helper_invocation = true,
144    .has_cs_global_id = true,
145    .lower_mul_2x32_64 = true,
146    .lower_doubles_options =
147       nir_lower_drcp |
148       nir_lower_dsqrt |
149       nir_lower_drsq |
150       nir_lower_dfract |
151       nir_lower_dtrunc |
152       nir_lower_dfloor |
153       nir_lower_dceil |
154       nir_lower_dround_even,
155    .max_unroll_iterations = 32, /* arbitrary */
156    .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out),
157    .lower_device_index_to_zero = true,
158    .linker_ignore_precision = true,
159    .support_16bit_alu = true,
160    .preserve_mediump = true,
161 };
162 
163 const nir_shader_compiler_options*
dxil_get_base_nir_compiler_options(void)164 dxil_get_base_nir_compiler_options(void)
165 {
166    return &nir_options;
167 }
168 
169 void
dxil_get_nir_compiler_options(nir_shader_compiler_options * options,enum dxil_shader_model shader_model_max,unsigned supported_int_sizes,unsigned supported_float_sizes)170 dxil_get_nir_compiler_options(nir_shader_compiler_options *options,
171                               enum dxil_shader_model shader_model_max,
172                               unsigned supported_int_sizes,
173                               unsigned supported_float_sizes)
174 {
175    *options = nir_options;
176    if (!(supported_int_sizes & 64)) {
177       options->lower_pack_64_2x32_split = false;
178       options->lower_unpack_64_2x32_split = false;
179       options->lower_int64_options = ~0;
180    }
181    if (!(supported_float_sizes & 64))
182       options->lower_doubles_options = ~0;
183    if (shader_model_max >= SHADER_MODEL_6_4) {
184       options->has_sdot_4x8 = true;
185       options->has_udot_4x8 = true;
186    }
187 }
188 
189 static bool
emit_llvm_ident(struct dxil_module * m)190 emit_llvm_ident(struct dxil_module *m)
191 {
192    const struct dxil_mdnode *compiler = dxil_get_metadata_string(m, "Mesa version " PACKAGE_VERSION MESA_GIT_SHA1);
193    if (!compiler)
194       return false;
195 
196    const struct dxil_mdnode *llvm_ident = dxil_get_metadata_node(m, &compiler, 1);
197    return llvm_ident &&
198           dxil_add_metadata_named_node(m, "llvm.ident", &llvm_ident, 1);
199 }
200 
201 static bool
emit_named_version(struct dxil_module * m,const char * name,int major,int minor)202 emit_named_version(struct dxil_module *m, const char *name,
203                    int major, int minor)
204 {
205    const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, major);
206    const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, minor);
207    const struct dxil_mdnode *version_nodes[] = { major_node, minor_node };
208    const struct dxil_mdnode *version = dxil_get_metadata_node(m, version_nodes,
209                                                      ARRAY_SIZE(version_nodes));
210    return dxil_add_metadata_named_node(m, name, &version, 1);
211 }
212 
213 static const char *
get_shader_kind_str(enum dxil_shader_kind kind)214 get_shader_kind_str(enum dxil_shader_kind kind)
215 {
216    switch (kind) {
217    case DXIL_PIXEL_SHADER:
218       return "ps";
219    case DXIL_VERTEX_SHADER:
220       return "vs";
221    case DXIL_GEOMETRY_SHADER:
222       return "gs";
223    case DXIL_HULL_SHADER:
224       return "hs";
225    case DXIL_DOMAIN_SHADER:
226       return "ds";
227    case DXIL_COMPUTE_SHADER:
228       return "cs";
229    default:
230       unreachable("invalid shader kind");
231    }
232 }
233 
234 static bool
emit_dx_shader_model(struct dxil_module * m)235 emit_dx_shader_model(struct dxil_module *m)
236 {
237    const struct dxil_mdnode *type_node = dxil_get_metadata_string(m, get_shader_kind_str(m->shader_kind));
238    const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, m->major_version);
239    const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, m->minor_version);
240    const struct dxil_mdnode *shader_model[] = { type_node, major_node,
241                                                 minor_node };
242    const struct dxil_mdnode *dx_shader_model = dxil_get_metadata_node(m, shader_model, ARRAY_SIZE(shader_model));
243 
244    return dxil_add_metadata_named_node(m, "dx.shaderModel",
245                                        &dx_shader_model, 1);
246 }
247 
248 enum {
249    DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG = 0,
250    DXIL_STRUCTURED_BUFFER_ELEMENT_STRIDE_TAG = 1
251 };
252 
253 enum dxil_intr {
254    DXIL_INTR_LOAD_INPUT = 4,
255    DXIL_INTR_STORE_OUTPUT = 5,
256    DXIL_INTR_FABS = 6,
257    DXIL_INTR_SATURATE = 7,
258 
259    DXIL_INTR_ISFINITE = 10,
260    DXIL_INTR_ISNORMAL = 11,
261 
262    DXIL_INTR_FCOS = 12,
263    DXIL_INTR_FSIN = 13,
264 
265    DXIL_INTR_FEXP2 = 21,
266    DXIL_INTR_FRC = 22,
267    DXIL_INTR_FLOG2 = 23,
268 
269    DXIL_INTR_SQRT = 24,
270    DXIL_INTR_RSQRT = 25,
271    DXIL_INTR_ROUND_NE = 26,
272    DXIL_INTR_ROUND_NI = 27,
273    DXIL_INTR_ROUND_PI = 28,
274    DXIL_INTR_ROUND_Z = 29,
275 
276    DXIL_INTR_BFREV = 30,
277    DXIL_INTR_COUNTBITS = 31,
278    DXIL_INTR_FIRSTBIT_LO = 32,
279    DXIL_INTR_FIRSTBIT_HI = 33,
280    DXIL_INTR_FIRSTBIT_SHI = 34,
281 
282    DXIL_INTR_FMAX = 35,
283    DXIL_INTR_FMIN = 36,
284    DXIL_INTR_IMAX = 37,
285    DXIL_INTR_IMIN = 38,
286    DXIL_INTR_UMAX = 39,
287    DXIL_INTR_UMIN = 40,
288 
289    DXIL_INTR_FMA = 47,
290 
291    DXIL_INTR_IBFE = 51,
292    DXIL_INTR_UBFE = 52,
293    DXIL_INTR_BFI = 53,
294 
295    DXIL_INTR_CREATE_HANDLE = 57,
296    DXIL_INTR_CBUFFER_LOAD_LEGACY = 59,
297 
298    DXIL_INTR_SAMPLE = 60,
299    DXIL_INTR_SAMPLE_BIAS = 61,
300    DXIL_INTR_SAMPLE_LEVEL = 62,
301    DXIL_INTR_SAMPLE_GRAD = 63,
302    DXIL_INTR_SAMPLE_CMP = 64,
303    DXIL_INTR_SAMPLE_CMP_LVL_ZERO = 65,
304 
305    DXIL_INTR_TEXTURE_LOAD = 66,
306    DXIL_INTR_TEXTURE_STORE = 67,
307 
308    DXIL_INTR_BUFFER_LOAD = 68,
309    DXIL_INTR_BUFFER_STORE = 69,
310 
311    DXIL_INTR_TEXTURE_SIZE = 72,
312    DXIL_INTR_TEXTURE_GATHER = 73,
313    DXIL_INTR_TEXTURE_GATHER_CMP = 74,
314 
315    DXIL_INTR_TEXTURE2DMS_GET_SAMPLE_POSITION = 75,
316    DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION = 76,
317    DXIL_INTR_RENDER_TARGET_GET_SAMPLE_COUNT = 77,
318 
319    DXIL_INTR_ATOMIC_BINOP = 78,
320    DXIL_INTR_ATOMIC_CMPXCHG = 79,
321    DXIL_INTR_BARRIER = 80,
322    DXIL_INTR_TEXTURE_LOD = 81,
323 
324    DXIL_INTR_DISCARD = 82,
325    DXIL_INTR_DDX_COARSE = 83,
326    DXIL_INTR_DDY_COARSE = 84,
327    DXIL_INTR_DDX_FINE = 85,
328    DXIL_INTR_DDY_FINE = 86,
329 
330    DXIL_INTR_EVAL_SNAPPED = 87,
331    DXIL_INTR_EVAL_SAMPLE_INDEX = 88,
332    DXIL_INTR_EVAL_CENTROID = 89,
333 
334    DXIL_INTR_SAMPLE_INDEX = 90,
335    DXIL_INTR_COVERAGE = 91,
336 
337    DXIL_INTR_THREAD_ID = 93,
338    DXIL_INTR_GROUP_ID = 94,
339    DXIL_INTR_THREAD_ID_IN_GROUP = 95,
340    DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP = 96,
341 
342    DXIL_INTR_EMIT_STREAM = 97,
343    DXIL_INTR_CUT_STREAM = 98,
344 
345    DXIL_INTR_GS_INSTANCE_ID = 100,
346 
347    DXIL_INTR_MAKE_DOUBLE = 101,
348    DXIL_INTR_SPLIT_DOUBLE = 102,
349 
350    DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT = 103,
351    DXIL_INTR_LOAD_PATCH_CONSTANT = 104,
352    DXIL_INTR_DOMAIN_LOCATION = 105,
353    DXIL_INTR_STORE_PATCH_CONSTANT = 106,
354    DXIL_INTR_OUTPUT_CONTROL_POINT_ID = 107,
355    DXIL_INTR_PRIMITIVE_ID = 108,
356 
357    DXIL_INTR_WAVE_IS_FIRST_LANE = 110,
358    DXIL_INTR_WAVE_GET_LANE_INDEX = 111,
359    DXIL_INTR_WAVE_GET_LANE_COUNT = 112,
360    DXIL_INTR_WAVE_ANY_TRUE = 113,
361    DXIL_INTR_WAVE_ALL_TRUE = 114,
362    DXIL_INTR_WAVE_ACTIVE_ALL_EQUAL = 115,
363    DXIL_INTR_WAVE_ACTIVE_BALLOT = 116,
364    DXIL_INTR_WAVE_READ_LANE_AT = 117,
365    DXIL_INTR_WAVE_READ_LANE_FIRST = 118,
366    DXIL_INTR_WAVE_ACTIVE_OP = 119,
367    DXIL_INTR_WAVE_ACTIVE_BIT = 120,
368    DXIL_INTR_WAVE_PREFIX_OP = 121,
369    DXIL_INTR_QUAD_READ_LANE_AT = 122,
370    DXIL_INTR_QUAD_OP = 123,
371 
372    DXIL_INTR_LEGACY_F32TOF16 = 130,
373    DXIL_INTR_LEGACY_F16TOF32 = 131,
374 
375    DXIL_INTR_ATTRIBUTE_AT_VERTEX = 137,
376    DXIL_INTR_VIEW_ID = 138,
377 
378    DXIL_INTR_RAW_BUFFER_LOAD = 139,
379    DXIL_INTR_RAW_BUFFER_STORE = 140,
380 
381    DXIL_INTR_DOT4_ADD_I8_PACKED = 163,
382    DXIL_INTR_DOT4_ADD_U8_PACKED = 164,
383 
384    DXIL_INTR_ANNOTATE_HANDLE = 216,
385    DXIL_INTR_CREATE_HANDLE_FROM_BINDING = 217,
386    DXIL_INTR_CREATE_HANDLE_FROM_HEAP = 218,
387 
388    DXIL_INTR_IS_HELPER_LANE = 221,
389    DXIL_INTR_SAMPLE_CMP_LEVEL = 224,
390    DXIL_INTR_SAMPLE_CMP_GRAD = 254,
391    DXIL_INTR_SAMPLE_CMP_BIAS = 255,
392 };
393 
394 enum dxil_atomic_op {
395    DXIL_ATOMIC_ADD = 0,
396    DXIL_ATOMIC_AND = 1,
397    DXIL_ATOMIC_OR = 2,
398    DXIL_ATOMIC_XOR = 3,
399    DXIL_ATOMIC_IMIN = 4,
400    DXIL_ATOMIC_IMAX = 5,
401    DXIL_ATOMIC_UMIN = 6,
402    DXIL_ATOMIC_UMAX = 7,
403    DXIL_ATOMIC_EXCHANGE = 8,
404 };
405 
406 static enum dxil_atomic_op
nir_atomic_to_dxil_atomic(nir_atomic_op op)407 nir_atomic_to_dxil_atomic(nir_atomic_op op)
408 {
409    switch (op) {
410    case nir_atomic_op_iadd: return DXIL_ATOMIC_ADD;
411    case nir_atomic_op_iand: return DXIL_ATOMIC_AND;
412    case nir_atomic_op_ior: return DXIL_ATOMIC_OR;
413    case nir_atomic_op_ixor: return DXIL_ATOMIC_XOR;
414    case nir_atomic_op_imin: return DXIL_ATOMIC_IMIN;
415    case nir_atomic_op_imax: return DXIL_ATOMIC_IMAX;
416    case nir_atomic_op_umin: return DXIL_ATOMIC_UMIN;
417    case nir_atomic_op_umax: return DXIL_ATOMIC_UMAX;
418    case nir_atomic_op_xchg: return DXIL_ATOMIC_EXCHANGE;
419    default: unreachable("Unsupported atomic op");
420    }
421 }
422 
423 static enum dxil_rmw_op
nir_atomic_to_dxil_rmw(nir_atomic_op op)424 nir_atomic_to_dxil_rmw(nir_atomic_op op)
425 {
426    switch (op) {
427    case nir_atomic_op_iadd: return DXIL_RMWOP_ADD;
428    case nir_atomic_op_iand: return DXIL_RMWOP_AND;
429    case nir_atomic_op_ior: return DXIL_RMWOP_OR;
430    case nir_atomic_op_ixor: return DXIL_RMWOP_XOR;
431    case nir_atomic_op_imin: return DXIL_RMWOP_MIN;
432    case nir_atomic_op_imax: return DXIL_RMWOP_MAX;
433    case nir_atomic_op_umin: return DXIL_RMWOP_UMIN;
434    case nir_atomic_op_umax: return DXIL_RMWOP_UMAX;
435    case nir_atomic_op_xchg: return DXIL_RMWOP_XCHG;
436    default: unreachable("Unsupported atomic op");
437    }
438 }
439 
440 typedef struct {
441    unsigned id;
442    unsigned binding;
443    unsigned size;
444    unsigned space;
445 } resource_array_layout;
446 
447 static void
fill_resource_metadata(struct dxil_module * m,const struct dxil_mdnode ** fields,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout)448 fill_resource_metadata(struct dxil_module *m, const struct dxil_mdnode **fields,
449                        const struct dxil_type *struct_type,
450                        const char *name, const resource_array_layout *layout)
451 {
452    const struct dxil_type *pointer_type = dxil_module_get_pointer_type(m, struct_type);
453    const struct dxil_value *pointer_undef = dxil_module_get_undef(m, pointer_type);
454 
455    fields[0] = dxil_get_metadata_int32(m, layout->id); // resource ID
456    fields[1] = dxil_get_metadata_value(m, pointer_type, pointer_undef); // global constant symbol
457    fields[2] = dxil_get_metadata_string(m, name ? name : ""); // name
458    fields[3] = dxil_get_metadata_int32(m, layout->space); // space ID
459    fields[4] = dxil_get_metadata_int32(m, layout->binding); // lower bound
460    fields[5] = dxil_get_metadata_int32(m, layout->size); // range size
461 }
462 
463 static const struct dxil_mdnode *
emit_srv_metadata(struct dxil_module * m,const struct dxil_type * elem_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind)464 emit_srv_metadata(struct dxil_module *m, const struct dxil_type *elem_type,
465                   const char *name, const resource_array_layout *layout,
466                   enum dxil_component_type comp_type,
467                   enum dxil_resource_kind res_kind)
468 {
469    const struct dxil_mdnode *fields[9];
470 
471    const struct dxil_mdnode *metadata_tag_nodes[2];
472 
473    fill_resource_metadata(m, fields, elem_type, name, layout);
474    fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
475    fields[7] = dxil_get_metadata_int1(m, 0); // sample count
476    if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
477        res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
478       metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
479       metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
480       fields[8] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
481    } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
482       fields[8] = NULL;
483    else
484       unreachable("Structured buffers not supported yet");
485 
486    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
487 }
488 
489 static const struct dxil_mdnode *
emit_uav_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind,enum gl_access_qualifier access)490 emit_uav_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
491                   const char *name, const resource_array_layout *layout,
492                   enum dxil_component_type comp_type,
493                   enum dxil_resource_kind res_kind,
494                   enum gl_access_qualifier access)
495 {
496    const struct dxil_mdnode *fields[11];
497 
498    const struct dxil_mdnode *metadata_tag_nodes[2];
499 
500    fill_resource_metadata(m, fields, struct_type, name, layout);
501    fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
502    fields[7] = dxil_get_metadata_int1(m, (access & ACCESS_COHERENT) != 0); // globally-coherent
503    fields[8] = dxil_get_metadata_int1(m, false); // has counter
504    fields[9] = dxil_get_metadata_int1(m, false); // is ROV
505    if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
506        res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
507       metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
508       metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
509       fields[10] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
510    } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
511       fields[10] = NULL;
512    else
513       unreachable("Structured buffers not supported yet");
514 
515    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
516 }
517 
518 static const struct dxil_mdnode *
emit_cbv_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,unsigned size)519 emit_cbv_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
520                   const char *name, const resource_array_layout *layout,
521                   unsigned size)
522 {
523    const struct dxil_mdnode *fields[8];
524 
525    fill_resource_metadata(m, fields, struct_type, name, layout);
526    fields[6] = dxil_get_metadata_int32(m, size); // constant buffer size
527    fields[7] = NULL; // metadata
528 
529    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
530 }
531 
532 static const struct dxil_mdnode *
emit_sampler_metadata(struct dxil_module * m,const struct dxil_type * struct_type,nir_variable * var,const resource_array_layout * layout)533 emit_sampler_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
534                       nir_variable *var, const resource_array_layout *layout)
535 {
536    const struct dxil_mdnode *fields[8];
537    const struct glsl_type *type = glsl_without_array(var->type);
538 
539    fill_resource_metadata(m, fields, struct_type, var->name, layout);
540    enum dxil_sampler_kind sampler_kind = glsl_sampler_type_is_shadow(type) ?
541           DXIL_SAMPLER_KIND_COMPARISON : DXIL_SAMPLER_KIND_DEFAULT;
542    fields[6] = dxil_get_metadata_int32(m, sampler_kind); // sampler kind
543    fields[7] = NULL; // metadata
544 
545    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
546 }
547 
548 
549 #define MAX_SRVS 128
550 #define MAX_UAVS 64
551 #define MAX_CBVS 64 // ??
552 #define MAX_SAMPLERS 64 // ??
553 
554 struct dxil_def {
555    const struct dxil_value *chans[NIR_MAX_VEC_COMPONENTS];
556 };
557 
558 struct ntd_context {
559    void *ralloc_ctx;
560    const struct nir_to_dxil_options *opts;
561    struct nir_shader *shader;
562 
563    struct dxil_module mod;
564 
565    struct util_dynarray srv_metadata_nodes;
566    const struct dxil_value *srv_handles[MAX_SRVS];
567 
568    struct util_dynarray uav_metadata_nodes;
569    const struct dxil_value *ssbo_handles[MAX_UAVS];
570    const struct dxil_value *image_handles[MAX_UAVS];
571    uint32_t num_uavs;
572 
573    struct util_dynarray cbv_metadata_nodes;
574    const struct dxil_value *cbv_handles[MAX_CBVS];
575 
576    struct util_dynarray sampler_metadata_nodes;
577    const struct dxil_value *sampler_handles[MAX_SAMPLERS];
578 
579    struct util_dynarray resources;
580 
581    const struct dxil_mdnode *shader_property_nodes[6];
582    size_t num_shader_property_nodes;
583 
584    struct dxil_def *defs;
585    unsigned num_defs;
586    struct hash_table *phis;
587 
588    const struct dxil_value **sharedvars;
589    const struct dxil_value **scratchvars;
590    const struct dxil_value **consts;
591 
592    nir_variable *ps_front_face;
593    nir_variable *system_value[SYSTEM_VALUE_MAX];
594 
595    nir_function *tess_ctrl_patch_constant_func;
596    unsigned tess_input_control_point_count;
597 
598    struct dxil_func_def *main_func_def;
599    struct dxil_func_def *tess_ctrl_patch_constant_func_def;
600    unsigned unnamed_ubo_count;
601 
602    BITSET_WORD *float_types;
603    BITSET_WORD *int_types;
604 
605    const struct dxil_logger *logger;
606 };
607 
608 static const char*
unary_func_name(enum dxil_intr intr)609 unary_func_name(enum dxil_intr intr)
610 {
611    switch (intr) {
612    case DXIL_INTR_COUNTBITS:
613    case DXIL_INTR_FIRSTBIT_HI:
614    case DXIL_INTR_FIRSTBIT_SHI:
615    case DXIL_INTR_FIRSTBIT_LO:
616       return "dx.op.unaryBits";
617    case DXIL_INTR_ISFINITE:
618    case DXIL_INTR_ISNORMAL:
619       return "dx.op.isSpecialFloat";
620    default:
621       return "dx.op.unary";
622    }
623 }
624 
625 static const struct dxil_value *
emit_unary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0)626 emit_unary_call(struct ntd_context *ctx, enum overload_type overload,
627                 enum dxil_intr intr,
628                 const struct dxil_value *op0)
629 {
630    const struct dxil_func *func = dxil_get_function(&ctx->mod,
631                                                     unary_func_name(intr),
632                                                     overload);
633    if (!func)
634       return NULL;
635 
636    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
637    if (!opcode)
638       return NULL;
639 
640    const struct dxil_value *args[] = {
641      opcode,
642      op0
643    };
644 
645    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
646 }
647 
648 static const struct dxil_value *
emit_binary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)649 emit_binary_call(struct ntd_context *ctx, enum overload_type overload,
650                  enum dxil_intr intr,
651                  const struct dxil_value *op0, const struct dxil_value *op1)
652 {
653    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.binary", overload);
654    if (!func)
655       return NULL;
656 
657    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
658    if (!opcode)
659       return NULL;
660 
661    const struct dxil_value *args[] = {
662      opcode,
663      op0,
664      op1
665    };
666 
667    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
668 }
669 
670 static const struct dxil_value *
emit_tertiary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)671 emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload,
672                    enum dxil_intr intr,
673                    const struct dxil_value *op0,
674                    const struct dxil_value *op1,
675                    const struct dxil_value *op2)
676 {
677    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.tertiary", overload);
678    if (!func)
679       return NULL;
680 
681    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
682    if (!opcode)
683       return NULL;
684 
685    const struct dxil_value *args[] = {
686      opcode,
687      op0,
688      op1,
689      op2
690    };
691 
692    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
693 }
694 
695 static const struct dxil_value *
emit_quaternary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2,const struct dxil_value * op3)696 emit_quaternary_call(struct ntd_context *ctx, enum overload_type overload,
697                      enum dxil_intr intr,
698                      const struct dxil_value *op0,
699                      const struct dxil_value *op1,
700                      const struct dxil_value *op2,
701                      const struct dxil_value *op3)
702 {
703    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quaternary", overload);
704    if (!func)
705       return NULL;
706 
707    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
708    if (!opcode)
709       return NULL;
710 
711    const struct dxil_value *args[] = {
712      opcode,
713      op0,
714      op1,
715      op2,
716      op3
717    };
718 
719    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
720 }
721 
722 static const struct dxil_value *
emit_threadid_call(struct ntd_context * ctx,const struct dxil_value * comp)723 emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp)
724 {
725    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadId", DXIL_I32);
726    if (!func)
727       return NULL;
728 
729    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
730        DXIL_INTR_THREAD_ID);
731    if (!opcode)
732       return NULL;
733 
734    const struct dxil_value *args[] = {
735      opcode,
736      comp
737    };
738 
739    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
740 }
741 
742 static const struct dxil_value *
emit_threadidingroup_call(struct ntd_context * ctx,const struct dxil_value * comp)743 emit_threadidingroup_call(struct ntd_context *ctx,
744                           const struct dxil_value *comp)
745 {
746    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadIdInGroup", DXIL_I32);
747 
748    if (!func)
749       return NULL;
750 
751    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
752        DXIL_INTR_THREAD_ID_IN_GROUP);
753    if (!opcode)
754       return NULL;
755 
756    const struct dxil_value *args[] = {
757      opcode,
758      comp
759    };
760 
761    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
762 }
763 
764 static const struct dxil_value *
emit_flattenedthreadidingroup_call(struct ntd_context * ctx)765 emit_flattenedthreadidingroup_call(struct ntd_context *ctx)
766 {
767    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.flattenedThreadIdInGroup", DXIL_I32);
768 
769    if (!func)
770       return NULL;
771 
772    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
773       DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP);
774    if (!opcode)
775       return NULL;
776 
777    const struct dxil_value *args[] = {
778      opcode
779    };
780 
781    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
782 }
783 
784 static const struct dxil_value *
emit_groupid_call(struct ntd_context * ctx,const struct dxil_value * comp)785 emit_groupid_call(struct ntd_context *ctx, const struct dxil_value *comp)
786 {
787    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.groupId", DXIL_I32);
788 
789    if (!func)
790       return NULL;
791 
792    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
793        DXIL_INTR_GROUP_ID);
794    if (!opcode)
795       return NULL;
796 
797    const struct dxil_value *args[] = {
798      opcode,
799      comp
800    };
801 
802    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
803 }
804 
805 static const struct dxil_value *
emit_raw_bufferload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],enum overload_type overload,unsigned component_count,unsigned alignment)806 emit_raw_bufferload_call(struct ntd_context *ctx,
807                          const struct dxil_value *handle,
808                          const struct dxil_value *coord[2],
809                          enum overload_type overload,
810                          unsigned component_count,
811                          unsigned alignment)
812 {
813    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.rawBufferLoad", overload);
814    if (!func)
815       return NULL;
816 
817    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
818                                                                  DXIL_INTR_RAW_BUFFER_LOAD);
819    const struct dxil_value *args[] = {
820       opcode, handle, coord[0], coord[1],
821       dxil_module_get_int8_const(&ctx->mod, (1 << component_count) - 1),
822       dxil_module_get_int32_const(&ctx->mod, alignment),
823    };
824 
825    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
826 }
827 
828 static const struct dxil_value *
emit_bufferload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],enum overload_type overload)829 emit_bufferload_call(struct ntd_context *ctx,
830                      const struct dxil_value *handle,
831                      const struct dxil_value *coord[2],
832                      enum overload_type overload)
833 {
834    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferLoad", overload);
835    if (!func)
836       return NULL;
837 
838    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
839       DXIL_INTR_BUFFER_LOAD);
840    const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1] };
841 
842    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
843 }
844 
845 static bool
emit_raw_bufferstore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload,unsigned alignment)846 emit_raw_bufferstore_call(struct ntd_context *ctx,
847                           const struct dxil_value *handle,
848                           const struct dxil_value *coord[2],
849                           const struct dxil_value *value[4],
850                           const struct dxil_value *write_mask,
851                           enum overload_type overload,
852                           unsigned alignment)
853 {
854    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.rawBufferStore", overload);
855 
856    if (!func)
857       return false;
858 
859    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
860                                                                  DXIL_INTR_RAW_BUFFER_STORE);
861    const struct dxil_value *args[] = {
862       opcode, handle, coord[0], coord[1],
863       value[0], value[1], value[2], value[3],
864       write_mask,
865       dxil_module_get_int32_const(&ctx->mod, alignment),
866    };
867 
868    return dxil_emit_call_void(&ctx->mod, func,
869                               args, ARRAY_SIZE(args));
870 }
871 
872 static bool
emit_bufferstore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)873 emit_bufferstore_call(struct ntd_context *ctx,
874                       const struct dxil_value *handle,
875                       const struct dxil_value *coord[2],
876                       const struct dxil_value *value[4],
877                       const struct dxil_value *write_mask,
878                       enum overload_type overload)
879 {
880    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferStore", overload);
881 
882    if (!func)
883       return false;
884 
885    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
886       DXIL_INTR_BUFFER_STORE);
887    const struct dxil_value *args[] = {
888       opcode, handle, coord[0], coord[1],
889       value[0], value[1], value[2], value[3],
890       write_mask
891    };
892 
893    return dxil_emit_call_void(&ctx->mod, func,
894                               args, ARRAY_SIZE(args));
895 }
896 
897 static const struct dxil_value *
emit_textureload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],enum overload_type overload)898 emit_textureload_call(struct ntd_context *ctx,
899                       const struct dxil_value *handle,
900                       const struct dxil_value *coord[3],
901                       enum overload_type overload)
902 {
903    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", overload);
904    if (!func)
905       return NULL;
906    const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
907    const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
908 
909    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
910       DXIL_INTR_TEXTURE_LOAD);
911    const struct dxil_value *args[] = { opcode, handle,
912       /*lod_or_sample*/ int_undef,
913       coord[0], coord[1], coord[2],
914       /* offsets */ int_undef, int_undef, int_undef};
915 
916    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
917 }
918 
919 static bool
emit_texturestore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)920 emit_texturestore_call(struct ntd_context *ctx,
921                        const struct dxil_value *handle,
922                        const struct dxil_value *coord[3],
923                        const struct dxil_value *value[4],
924                        const struct dxil_value *write_mask,
925                        enum overload_type overload)
926 {
927    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureStore", overload);
928 
929    if (!func)
930       return false;
931 
932    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
933       DXIL_INTR_TEXTURE_STORE);
934    const struct dxil_value *args[] = {
935       opcode, handle, coord[0], coord[1], coord[2],
936       value[0], value[1], value[2], value[3],
937       write_mask
938    };
939 
940    return dxil_emit_call_void(&ctx->mod, func,
941                               args, ARRAY_SIZE(args));
942 }
943 
944 static const struct dxil_value *
emit_atomic_binop(struct ntd_context * ctx,const struct dxil_value * handle,enum dxil_atomic_op atomic_op,const struct dxil_value * coord[3],const struct dxil_value * value)945 emit_atomic_binop(struct ntd_context *ctx,
946                   const struct dxil_value *handle,
947                   enum dxil_atomic_op atomic_op,
948                   const struct dxil_value *coord[3],
949                   const struct dxil_value *value)
950 {
951    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicBinOp", DXIL_I32);
952 
953    if (!func)
954       return false;
955 
956    const struct dxil_value *opcode =
957       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_BINOP);
958    const struct dxil_value *atomic_op_value =
959       dxil_module_get_int32_const(&ctx->mod, atomic_op);
960    const struct dxil_value *args[] = {
961       opcode, handle, atomic_op_value,
962       coord[0], coord[1], coord[2], value
963    };
964 
965    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
966 }
967 
968 static const struct dxil_value *
emit_atomic_cmpxchg(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * cmpval,const struct dxil_value * newval)969 emit_atomic_cmpxchg(struct ntd_context *ctx,
970                     const struct dxil_value *handle,
971                     const struct dxil_value *coord[3],
972                     const struct dxil_value *cmpval,
973                     const struct dxil_value *newval)
974 {
975    const struct dxil_func *func =
976       dxil_get_function(&ctx->mod, "dx.op.atomicCompareExchange", DXIL_I32);
977 
978    if (!func)
979       return false;
980 
981    const struct dxil_value *opcode =
982       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_CMPXCHG);
983    const struct dxil_value *args[] = {
984       opcode, handle, coord[0], coord[1], coord[2], cmpval, newval
985    };
986 
987    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
988 }
989 
990 static const struct dxil_value *
emit_createhandle_call_pre_6_6(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)991 emit_createhandle_call_pre_6_6(struct ntd_context *ctx,
992                                enum dxil_resource_class resource_class,
993                                unsigned lower_bound,
994                                unsigned upper_bound,
995                                unsigned space,
996                                unsigned resource_range_id,
997                                const struct dxil_value *resource_range_index,
998                                bool non_uniform_resource_index)
999 {
1000    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE);
1001    const struct dxil_value *resource_class_value = dxil_module_get_int8_const(&ctx->mod, resource_class);
1002    const struct dxil_value *resource_range_id_value = dxil_module_get_int32_const(&ctx->mod, resource_range_id);
1003    const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1004    if (!opcode || !resource_class_value || !resource_range_id_value ||
1005        !non_uniform_resource_index_value)
1006       return NULL;
1007 
1008    const struct dxil_value *args[] = {
1009       opcode,
1010       resource_class_value,
1011       resource_range_id_value,
1012       resource_range_index,
1013       non_uniform_resource_index_value
1014    };
1015 
1016    const struct dxil_func *func =
1017          dxil_get_function(&ctx->mod, "dx.op.createHandle", DXIL_NONE);
1018 
1019    if (!func)
1020          return NULL;
1021 
1022    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1023 }
1024 
1025 static const struct dxil_value *
emit_annotate_handle(struct ntd_context * ctx,const struct dxil_value * unannotated_handle,const struct dxil_value * res_props)1026 emit_annotate_handle(struct ntd_context *ctx,
1027                      const struct dxil_value *unannotated_handle,
1028                      const struct dxil_value *res_props)
1029 {
1030    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ANNOTATE_HANDLE);
1031    if (!opcode)
1032       return NULL;
1033 
1034    const struct dxil_value *args[] = {
1035       opcode,
1036       unannotated_handle,
1037       res_props
1038    };
1039 
1040    const struct dxil_func *func =
1041       dxil_get_function(&ctx->mod, "dx.op.annotateHandle", DXIL_NONE);
1042 
1043    if (!func)
1044       return NULL;
1045 
1046    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1047 }
1048 
1049 static const struct dxil_value *
emit_annotate_handle_from_metadata(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned resource_range_id,const struct dxil_value * unannotated_handle)1050 emit_annotate_handle_from_metadata(struct ntd_context *ctx,
1051                                    enum dxil_resource_class resource_class,
1052                                    unsigned resource_range_id,
1053                                    const struct dxil_value *unannotated_handle)
1054 {
1055 
1056    const struct util_dynarray *mdnodes;
1057    switch (resource_class) {
1058    case DXIL_RESOURCE_CLASS_SRV:
1059       mdnodes = &ctx->srv_metadata_nodes;
1060       break;
1061    case DXIL_RESOURCE_CLASS_UAV:
1062       mdnodes = &ctx->uav_metadata_nodes;
1063       break;
1064    case DXIL_RESOURCE_CLASS_CBV:
1065       mdnodes = &ctx->cbv_metadata_nodes;
1066       break;
1067    case DXIL_RESOURCE_CLASS_SAMPLER:
1068       mdnodes = &ctx->sampler_metadata_nodes;
1069       break;
1070    default:
1071       unreachable("Invalid resource class");
1072    }
1073 
1074    const struct dxil_mdnode *mdnode = *util_dynarray_element(mdnodes, const struct dxil_mdnode *, resource_range_id);
1075    const struct dxil_value *res_props = dxil_module_get_res_props_const(&ctx->mod, resource_class, mdnode);
1076    if (!res_props)
1077       return NULL;
1078 
1079    return emit_annotate_handle(ctx, unannotated_handle, res_props);
1080 }
1081 
1082 static const struct dxil_value *
emit_createhandle_and_annotate(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1083 emit_createhandle_and_annotate(struct ntd_context *ctx,
1084                                enum dxil_resource_class resource_class,
1085                                unsigned lower_bound,
1086                                unsigned upper_bound,
1087                                unsigned space,
1088                                unsigned resource_range_id,
1089                                const struct dxil_value *resource_range_index,
1090                                bool non_uniform_resource_index)
1091 {
1092    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE_FROM_BINDING);
1093    const struct dxil_value *res_bind = dxil_module_get_res_bind_const(&ctx->mod, lower_bound, upper_bound, space, resource_class);
1094    const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1095    if (!opcode || !res_bind || !non_uniform_resource_index_value)
1096       return NULL;
1097 
1098    const struct dxil_value *args[] = {
1099       opcode,
1100       res_bind,
1101       resource_range_index,
1102       non_uniform_resource_index_value
1103    };
1104 
1105    const struct dxil_func *func =
1106       dxil_get_function(&ctx->mod, "dx.op.createHandleFromBinding", DXIL_NONE);
1107 
1108    if (!func)
1109       return NULL;
1110 
1111    const struct dxil_value *unannotated_handle = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1112    if (!unannotated_handle)
1113       return NULL;
1114 
1115    return emit_annotate_handle_from_metadata(ctx, resource_class, resource_range_id, unannotated_handle);
1116 }
1117 
1118 static const struct dxil_value *
emit_createhandle_call(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1119 emit_createhandle_call(struct ntd_context *ctx,
1120                        enum dxil_resource_class resource_class,
1121                        unsigned lower_bound,
1122                        unsigned upper_bound,
1123                        unsigned space,
1124                        unsigned resource_range_id,
1125                        const struct dxil_value *resource_range_index,
1126                        bool non_uniform_resource_index)
1127 {
1128    if (ctx->mod.minor_version < 6)
1129       return emit_createhandle_call_pre_6_6(ctx, resource_class, lower_bound, upper_bound, space, resource_range_id, resource_range_index, non_uniform_resource_index);
1130    else
1131       return emit_createhandle_and_annotate(ctx, resource_class, lower_bound, upper_bound, space, resource_range_id, resource_range_index, non_uniform_resource_index);
1132 }
1133 
1134 static const struct dxil_value *
emit_createhandle_call_const_index(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,unsigned resource_range_index,bool non_uniform_resource_index)1135 emit_createhandle_call_const_index(struct ntd_context *ctx,
1136                                    enum dxil_resource_class resource_class,
1137                                    unsigned lower_bound,
1138                                    unsigned upper_bound,
1139                                    unsigned space,
1140                                    unsigned resource_range_id,
1141                                    unsigned resource_range_index,
1142                                    bool non_uniform_resource_index)
1143 {
1144 
1145    const struct dxil_value *resource_range_index_value = dxil_module_get_int32_const(&ctx->mod, resource_range_index);
1146    if (!resource_range_index_value)
1147       return NULL;
1148 
1149    return emit_createhandle_call(ctx, resource_class, lower_bound, upper_bound, space,
1150                                  resource_range_id, resource_range_index_value,
1151                                  non_uniform_resource_index);
1152 }
1153 
1154 static const struct dxil_value *
emit_createhandle_heap(struct ntd_context * ctx,const struct dxil_value * resource_range_index,bool is_sampler,bool non_uniform_resource_index)1155 emit_createhandle_heap(struct ntd_context *ctx,
1156                        const struct dxil_value *resource_range_index,
1157                        bool is_sampler,
1158                        bool non_uniform_resource_index)
1159 {
1160    if (is_sampler)
1161       ctx->mod.feats.sampler_descriptor_heap_indexing = true;
1162    else
1163       ctx->mod.feats.resource_descriptor_heap_indexing = true;
1164 
1165    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE_FROM_HEAP);
1166    const struct dxil_value *sampler = dxil_module_get_int1_const(&ctx->mod, is_sampler);
1167    const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1168    if (!opcode || !sampler || !non_uniform_resource_index_value)
1169       return NULL;
1170 
1171    const struct dxil_value *args[] = {
1172       opcode,
1173       resource_range_index,
1174       sampler,
1175       non_uniform_resource_index_value
1176    };
1177 
1178    const struct dxil_func *func =
1179       dxil_get_function(&ctx->mod, "dx.op.createHandleFromHeap", DXIL_NONE);
1180 
1181    if (!func)
1182       return NULL;
1183 
1184    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1185 }
1186 
1187 static void
add_resource(struct ntd_context * ctx,enum dxil_resource_type type,enum dxil_resource_kind kind,const resource_array_layout * layout)1188 add_resource(struct ntd_context *ctx, enum dxil_resource_type type,
1189              enum dxil_resource_kind kind,
1190              const resource_array_layout *layout)
1191 {
1192    struct dxil_resource_v0 *resource_v0 = NULL;
1193    struct dxil_resource_v1 *resource_v1 = NULL;
1194    if (ctx->mod.minor_validator >= 6) {
1195       resource_v1 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v1, 1);
1196       resource_v0 = &resource_v1->v0;
1197    } else {
1198       resource_v0 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v0, 1);
1199    }
1200    resource_v0->resource_type = type;
1201    resource_v0->space = layout->space;
1202    resource_v0->lower_bound = layout->binding;
1203    if (layout->size == 0 || (uint64_t)layout->size + layout->binding >= UINT_MAX)
1204       resource_v0->upper_bound = UINT_MAX;
1205    else
1206       resource_v0->upper_bound = layout->binding + layout->size - 1;
1207    if (type == DXIL_RES_UAV_TYPED ||
1208        type == DXIL_RES_UAV_RAW ||
1209        type == DXIL_RES_UAV_STRUCTURED) {
1210       uint32_t new_uav_count = ctx->num_uavs + layout->size;
1211       if (layout->size == 0 || new_uav_count < ctx->num_uavs)
1212          ctx->num_uavs = UINT_MAX;
1213       else
1214          ctx->num_uavs = new_uav_count;
1215       if (ctx->mod.minor_validator >= 6 && ctx->num_uavs > 8)
1216          ctx->mod.feats.use_64uavs = 1;
1217    }
1218 
1219    if (resource_v1) {
1220       resource_v1->resource_kind = kind;
1221       /* No flags supported yet */
1222       resource_v1->resource_flags = 0;
1223    }
1224 }
1225 
1226 static const struct dxil_value *
emit_createhandle_call_dynamic(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned space,unsigned binding,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1227 emit_createhandle_call_dynamic(struct ntd_context *ctx,
1228                                enum dxil_resource_class resource_class,
1229                                unsigned space,
1230                                unsigned binding,
1231                                const struct dxil_value *resource_range_index,
1232                                bool non_uniform_resource_index)
1233 {
1234    unsigned offset = 0;
1235    unsigned count = 0;
1236 
1237    unsigned num_srvs = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
1238    unsigned num_uavs = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
1239    unsigned num_cbvs = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1240    unsigned num_samplers = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1241 
1242    switch (resource_class) {
1243    case DXIL_RESOURCE_CLASS_UAV:
1244       offset = num_srvs + num_samplers + num_cbvs;
1245       count = num_uavs;
1246       break;
1247    case DXIL_RESOURCE_CLASS_SRV:
1248       offset = num_samplers + num_cbvs;
1249       count = num_srvs;
1250       break;
1251    case DXIL_RESOURCE_CLASS_SAMPLER:
1252       offset = num_cbvs;
1253       count = num_samplers;
1254       break;
1255    case DXIL_RESOURCE_CLASS_CBV:
1256       offset = 0;
1257       count = num_cbvs;
1258       break;
1259    }
1260 
1261    unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
1262       sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
1263    assert(offset + count <= ctx->resources.size / resource_element_size);
1264    for (unsigned i = offset; i < offset + count; ++i) {
1265       const struct dxil_resource_v0 *resource = (const struct dxil_resource_v0 *)((const char *)ctx->resources.data + resource_element_size * i);
1266       if (resource->space == space &&
1267           resource->lower_bound <= binding &&
1268           resource->upper_bound >= binding) {
1269          return emit_createhandle_call(ctx, resource_class, resource->lower_bound,
1270                                        resource->upper_bound, space,
1271                                        i - offset,
1272                                        resource_range_index,
1273                                        non_uniform_resource_index);
1274       }
1275    }
1276 
1277    unreachable("Resource access for undeclared range");
1278 }
1279 
1280 static bool
emit_srv(struct ntd_context * ctx,nir_variable * var,unsigned count)1281 emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned count)
1282 {
1283    unsigned id = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
1284    unsigned binding = var->data.binding;
1285    resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1286 
1287    enum dxil_component_type comp_type;
1288    enum dxil_resource_kind res_kind;
1289    enum dxil_resource_type res_type;
1290    if (var->data.mode == nir_var_mem_ssbo) {
1291       comp_type = DXIL_COMP_TYPE_INVALID;
1292       res_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
1293       res_type = DXIL_RES_SRV_RAW;
1294    } else {
1295       comp_type = dxil_get_comp_type(var->type);
1296       res_kind = dxil_get_resource_kind(var->type);
1297       res_type = DXIL_RES_SRV_TYPED;
1298    }
1299    const struct dxil_type *res_type_as_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, 4, false /* readwrite */);
1300 
1301    if (glsl_type_is_array(var->type))
1302       res_type_as_type = dxil_module_get_array_type(&ctx->mod, res_type_as_type, count);
1303 
1304    const struct dxil_mdnode *srv_meta = emit_srv_metadata(&ctx->mod, res_type_as_type, var->name,
1305                                                           &layout, comp_type, res_kind);
1306 
1307    if (!srv_meta)
1308       return false;
1309 
1310    util_dynarray_append(&ctx->srv_metadata_nodes, const struct dxil_mdnode *, srv_meta);
1311    add_resource(ctx, res_type, res_kind, &layout);
1312    if (res_type == DXIL_RES_SRV_RAW)
1313       ctx->mod.raw_and_structured_buffers = true;
1314 
1315    return true;
1316 }
1317 
1318 static bool
emit_globals(struct ntd_context * ctx,unsigned size)1319 emit_globals(struct ntd_context *ctx, unsigned size)
1320 {
1321    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo)
1322       size++;
1323 
1324    if (!size)
1325       return true;
1326 
1327    const struct dxil_type *struct_type = dxil_module_get_res_type(&ctx->mod,
1328       DXIL_RESOURCE_KIND_RAW_BUFFER, DXIL_COMP_TYPE_INVALID, 1, true /* readwrite */);
1329    if (!struct_type)
1330       return false;
1331 
1332    const struct dxil_type *array_type =
1333       dxil_module_get_array_type(&ctx->mod, struct_type, size);
1334    if (!array_type)
1335       return false;
1336 
1337    resource_array_layout layout = {0, 0, size, 0};
1338    const struct dxil_mdnode *uav_meta =
1339       emit_uav_metadata(&ctx->mod, array_type,
1340                                    "globals", &layout,
1341                                    DXIL_COMP_TYPE_INVALID,
1342                                    DXIL_RESOURCE_KIND_RAW_BUFFER, 0);
1343    if (!uav_meta)
1344       return false;
1345 
1346    util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
1347    if (ctx->mod.minor_validator < 6 &&
1348        util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
1349       ctx->mod.feats.use_64uavs = 1;
1350    /* Handles to UAVs used for kernel globals are created on-demand */
1351    add_resource(ctx, DXIL_RES_UAV_RAW, DXIL_RESOURCE_KIND_RAW_BUFFER, &layout);
1352    ctx->mod.raw_and_structured_buffers = true;
1353    return true;
1354 }
1355 
1356 static bool
emit_uav(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned count,enum dxil_component_type comp_type,unsigned num_comps,enum dxil_resource_kind res_kind,enum gl_access_qualifier access,const char * name)1357 emit_uav(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned count,
1358          enum dxil_component_type comp_type, unsigned num_comps, enum dxil_resource_kind res_kind,
1359          enum gl_access_qualifier access, const char *name)
1360 {
1361    unsigned id = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
1362    resource_array_layout layout = { id, binding, count, space };
1363 
1364    const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, num_comps, true /* readwrite */);
1365    res_type = dxil_module_get_array_type(&ctx->mod, res_type, count);
1366    const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, res_type, name,
1367                                                           &layout, comp_type, res_kind, access);
1368 
1369    if (!uav_meta)
1370       return false;
1371 
1372    util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
1373    if (ctx->mod.minor_validator < 6 &&
1374        util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
1375       ctx->mod.feats.use_64uavs = 1;
1376 
1377    add_resource(ctx, res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER ? DXIL_RES_UAV_RAW : DXIL_RES_UAV_TYPED, res_kind, &layout);
1378    if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
1379       ctx->mod.raw_and_structured_buffers = true;
1380    if (ctx->mod.shader_kind != DXIL_PIXEL_SHADER &&
1381        ctx->mod.shader_kind != DXIL_COMPUTE_SHADER)
1382       ctx->mod.feats.uavs_at_every_stage = true;
1383 
1384    return true;
1385 }
1386 
1387 static bool
emit_uav_var(struct ntd_context * ctx,nir_variable * var,unsigned count)1388 emit_uav_var(struct ntd_context *ctx, nir_variable *var, unsigned count)
1389 {
1390    unsigned binding, space;
1391    if (ctx->opts->environment == DXIL_ENVIRONMENT_GL) {
1392       /* For GL, the image intrinsics are already lowered, using driver_location
1393        * as the 0-based image index. Use space 1 so that we can keep using these
1394        * NIR constants without having to remap them, and so they don't overlap
1395        * SSBOs, which are also 0-based UAV bindings.
1396        */
1397       binding = var->data.driver_location;
1398       space = 1;
1399    } else {
1400       binding = var->data.binding;
1401       space = var->data.descriptor_set;
1402    }
1403    enum dxil_component_type comp_type = dxil_get_comp_type(var->type);
1404    enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type);
1405    const char *name = var->name;
1406 
1407    return emit_uav(ctx, binding, space, count, comp_type,
1408                    util_format_get_nr_components(var->data.image.format),
1409                    res_kind, var->data.access, name);
1410 }
1411 
1412 static const struct dxil_value *
get_value_for_const(struct dxil_module * mod,nir_const_value * c,const struct dxil_type * type)1413 get_value_for_const(struct dxil_module *mod, nir_const_value *c, const struct dxil_type *type)
1414 {
1415    if (type == mod->int1_type) return dxil_module_get_int1_const(mod, c->b);
1416    if (type == mod->float32_type) return dxil_module_get_float_const(mod, c->f32);
1417    if (type == mod->int32_type) return dxil_module_get_int32_const(mod, c->i32);
1418    if (type == mod->int16_type) {
1419       mod->feats.min_precision = true;
1420       return dxil_module_get_int16_const(mod, c->i16);
1421    }
1422    if (type == mod->int64_type) {
1423       mod->feats.int64_ops = true;
1424       return dxil_module_get_int64_const(mod, c->i64);
1425    }
1426    if (type == mod->float16_type) {
1427       mod->feats.min_precision = true;
1428       return dxil_module_get_float16_const(mod, c->u16);
1429    }
1430    if (type == mod->float64_type) {
1431       mod->feats.doubles = true;
1432       return dxil_module_get_double_const(mod, c->f64);
1433    }
1434    unreachable("Invalid type");
1435 }
1436 
1437 static const struct dxil_type *
get_type_for_glsl_base_type(struct dxil_module * mod,enum glsl_base_type type)1438 get_type_for_glsl_base_type(struct dxil_module *mod, enum glsl_base_type type)
1439 {
1440    uint32_t bit_size = glsl_base_type_bit_size(type);
1441    if (nir_alu_type_get_base_type(nir_get_nir_type_for_glsl_base_type(type)) == nir_type_float)
1442       return dxil_module_get_float_type(mod, bit_size);
1443    return dxil_module_get_int_type(mod, bit_size);
1444 }
1445 
1446 static const struct dxil_type *
get_type_for_glsl_type(struct dxil_module * mod,const struct glsl_type * type)1447 get_type_for_glsl_type(struct dxil_module *mod, const struct glsl_type *type)
1448 {
1449    if (glsl_type_is_scalar(type))
1450       return get_type_for_glsl_base_type(mod, glsl_get_base_type(type));
1451 
1452    if (glsl_type_is_vector(type))
1453       return dxil_module_get_vector_type(mod, get_type_for_glsl_base_type(mod, glsl_get_base_type(type)),
1454                                          glsl_get_vector_elements(type));
1455 
1456    if (glsl_type_is_array(type))
1457       return dxil_module_get_array_type(mod, get_type_for_glsl_type(mod, glsl_get_array_element(type)),
1458                                         glsl_array_size(type));
1459 
1460    assert(glsl_type_is_struct(type));
1461    uint32_t size = glsl_get_length(type);
1462    const struct dxil_type **fields = calloc(sizeof(const struct dxil_type *), size);
1463    for (uint32_t i = 0; i < size; ++i)
1464       fields[i] = get_type_for_glsl_type(mod, glsl_get_struct_field(type, i));
1465    const struct dxil_type *ret = dxil_module_get_struct_type(mod, glsl_get_type_name(type), fields, size);
1466    free((void *)fields);
1467    return ret;
1468 }
1469 
1470 static const struct dxil_value *
get_value_for_const_aggregate(struct dxil_module * mod,nir_constant * c,const struct glsl_type * type)1471 get_value_for_const_aggregate(struct dxil_module *mod, nir_constant *c, const struct glsl_type *type)
1472 {
1473    const struct dxil_type *dxil_type = get_type_for_glsl_type(mod, type);
1474    if (glsl_type_is_vector_or_scalar(type)) {
1475       const struct dxil_type *element_type = get_type_for_glsl_base_type(mod, glsl_get_base_type(type));
1476       const struct dxil_value *elements[NIR_MAX_VEC_COMPONENTS];
1477       for (uint32_t i = 0; i < glsl_get_vector_elements(type); ++i)
1478          elements[i] = get_value_for_const(mod, &c->values[i], element_type);
1479       if (glsl_type_is_scalar(type))
1480          return elements[0];
1481       return dxil_module_get_vector_const(mod, dxil_type, elements);
1482    }
1483 
1484    uint32_t num_values = glsl_get_length(type);
1485    assert(num_values == c->num_elements);
1486    const struct dxil_value **values = calloc(sizeof(const struct dxil_value *), num_values);
1487    const struct dxil_value *ret;
1488    if (glsl_type_is_array(type)) {
1489       const struct glsl_type *element_type = glsl_get_array_element(type);
1490       for (uint32_t i = 0; i < num_values; ++i)
1491          values[i] = get_value_for_const_aggregate(mod, c->elements[i], element_type);
1492       ret = dxil_module_get_array_const(mod, dxil_type, values);
1493    } else {
1494       for (uint32_t i = 0; i < num_values; ++i)
1495          values[i] = get_value_for_const_aggregate(mod, c->elements[i], glsl_get_struct_field(type, i));
1496       ret = dxil_module_get_struct_const(mod, dxil_type, values);
1497    }
1498    free((void *)values);
1499    return ret;
1500 }
1501 
1502 static bool
emit_global_consts(struct ntd_context * ctx)1503 emit_global_consts(struct ntd_context *ctx)
1504 {
1505    uint32_t index = 0;
1506    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_constant) {
1507       assert(var->constant_initializer);
1508       var->data.driver_location = index++;
1509    }
1510 
1511    ctx->consts = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
1512 
1513    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_constant) {
1514       if (!var->name)
1515          var->name = ralloc_asprintf(var, "const_%d", var->data.driver_location);
1516 
1517       const struct dxil_value *agg_vals =
1518          get_value_for_const_aggregate(&ctx->mod, var->constant_initializer, var->type);
1519       if (!agg_vals)
1520          return false;
1521 
1522       const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name,
1523                                                               dxil_value_get_type(agg_vals),
1524                                                               DXIL_AS_DEFAULT, 16,
1525                                                               agg_vals);
1526       if (!gvar)
1527          return false;
1528 
1529       ctx->consts[var->data.driver_location] = gvar;
1530    }
1531 
1532    return true;
1533 }
1534 
1535 static bool
emit_shared_vars(struct ntd_context * ctx)1536 emit_shared_vars(struct ntd_context *ctx)
1537 {
1538    uint32_t index = 0;
1539    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_shared)
1540       var->data.driver_location = index++;
1541 
1542    ctx->sharedvars = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
1543 
1544    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_shared) {
1545       if (!var->name)
1546          var->name = ralloc_asprintf(var, "shared_%d", var->data.driver_location);
1547       const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name,
1548                                                               get_type_for_glsl_type(&ctx->mod, var->type),
1549                                                               DXIL_AS_GROUPSHARED, 16,
1550                                                               NULL);
1551       if (!gvar)
1552          return false;
1553 
1554       ctx->sharedvars[var->data.driver_location] = gvar;
1555    }
1556 
1557    return true;
1558 }
1559 
1560 static bool
emit_cbv(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned size,unsigned count,char * name)1561 emit_cbv(struct ntd_context *ctx, unsigned binding, unsigned space,
1562          unsigned size, unsigned count, char *name)
1563 {
1564    assert(count != 0);
1565 
1566    unsigned idx = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1567 
1568    const struct dxil_type *float32 = dxil_module_get_float_type(&ctx->mod, 32);
1569    const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, float32, size);
1570    const struct dxil_type *buffer_type = dxil_module_get_struct_type(&ctx->mod, name,
1571                                                                      &array_type, 1);
1572    // All ubo[1]s should have been lowered to ubo with static indexing
1573    const struct dxil_type *final_type = count != 1 ? dxil_module_get_array_type(&ctx->mod, buffer_type, count) : buffer_type;
1574    resource_array_layout layout = {idx, binding, count, space};
1575    const struct dxil_mdnode *cbv_meta = emit_cbv_metadata(&ctx->mod, final_type,
1576                                                           name, &layout, 4 * size);
1577 
1578    if (!cbv_meta)
1579       return false;
1580 
1581    util_dynarray_append(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *, cbv_meta);
1582    add_resource(ctx, DXIL_RES_CBV, DXIL_RESOURCE_KIND_CBUFFER, &layout);
1583 
1584    return true;
1585 }
1586 
1587 static bool
emit_ubo_var(struct ntd_context * ctx,nir_variable * var)1588 emit_ubo_var(struct ntd_context *ctx, nir_variable *var)
1589 {
1590    unsigned count = 1;
1591    if (glsl_type_is_array(var->type))
1592       count = glsl_get_length(var->type);
1593 
1594    char *name = var->name;
1595    char temp_name[30];
1596    if (name && strlen(name) == 0) {
1597       snprintf(temp_name, sizeof(temp_name), "__unnamed_ubo_%d",
1598                ctx->unnamed_ubo_count++);
1599       name = temp_name;
1600    }
1601 
1602    const struct glsl_type *type = glsl_without_array(var->type);
1603    assert(glsl_type_is_struct(type) || glsl_type_is_interface(type));
1604    unsigned dwords = ALIGN_POT(glsl_get_explicit_size(type, false), 16) / 4;
1605 
1606    return emit_cbv(ctx, var->data.binding, var->data.descriptor_set,
1607                    dwords, count, name);
1608 }
1609 
1610 static bool
emit_sampler(struct ntd_context * ctx,nir_variable * var,unsigned count)1611 emit_sampler(struct ntd_context *ctx, nir_variable *var, unsigned count)
1612 {
1613    unsigned id = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1614    unsigned binding = var->data.binding;
1615    resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1616    const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
1617    const struct dxil_type *sampler_type = dxil_module_get_struct_type(&ctx->mod, "struct.SamplerState", &int32_type, 1);
1618 
1619    if (glsl_type_is_array(var->type))
1620       sampler_type = dxil_module_get_array_type(&ctx->mod, sampler_type, count);
1621 
1622    const struct dxil_mdnode *sampler_meta = emit_sampler_metadata(&ctx->mod, sampler_type, var, &layout);
1623 
1624    if (!sampler_meta)
1625       return false;
1626 
1627    util_dynarray_append(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *, sampler_meta);
1628    add_resource(ctx, DXIL_RES_SAMPLER, DXIL_RESOURCE_KIND_SAMPLER, &layout);
1629 
1630    return true;
1631 }
1632 
1633 static bool
emit_static_indexing_handles(struct ntd_context * ctx)1634 emit_static_indexing_handles(struct ntd_context *ctx)
1635 {
1636    /* Vulkan always uses dynamic handles, from instructions in the NIR */
1637    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN)
1638       return true;
1639 
1640    unsigned last_res_class = -1;
1641    unsigned id = 0;
1642 
1643    unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
1644       sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
1645    for (struct dxil_resource_v0 *res = (struct dxil_resource_v0 *)ctx->resources.data;
1646         res < (struct dxil_resource_v0 *)((char *)ctx->resources.data + ctx->resources.size);
1647         res = (struct dxil_resource_v0 *)((char *)res + resource_element_size)) {
1648       enum dxil_resource_class res_class;
1649       const struct dxil_value **handle_array;
1650       switch (res->resource_type) {
1651       case DXIL_RES_SRV_TYPED:
1652       case DXIL_RES_SRV_RAW:
1653       case DXIL_RES_SRV_STRUCTURED:
1654          res_class = DXIL_RESOURCE_CLASS_SRV;
1655          handle_array = ctx->srv_handles;
1656          break;
1657       case DXIL_RES_CBV:
1658          res_class = DXIL_RESOURCE_CLASS_CBV;
1659          handle_array = ctx->cbv_handles;
1660          break;
1661       case DXIL_RES_SAMPLER:
1662          res_class = DXIL_RESOURCE_CLASS_SAMPLER;
1663          handle_array = ctx->sampler_handles;
1664          break;
1665       case DXIL_RES_UAV_RAW:
1666          res_class = DXIL_RESOURCE_CLASS_UAV;
1667          handle_array = ctx->ssbo_handles;
1668          break;
1669       case DXIL_RES_UAV_TYPED:
1670       case DXIL_RES_UAV_STRUCTURED:
1671       case DXIL_RES_UAV_STRUCTURED_WITH_COUNTER:
1672          res_class = DXIL_RESOURCE_CLASS_UAV;
1673          handle_array = ctx->image_handles;
1674          break;
1675       default:
1676          unreachable("Unexpected resource type");
1677       }
1678 
1679       if (last_res_class != res_class)
1680          id = 0;
1681       else
1682          id++;
1683       last_res_class = res_class;
1684 
1685       if (res->space > 1)
1686          continue;
1687       assert(res->space == 0 ||
1688          (res->space == 1 &&
1689             res->resource_type != DXIL_RES_UAV_RAW &&
1690             ctx->opts->environment == DXIL_ENVIRONMENT_GL));
1691 
1692       /* CL uses dynamic handles for the "globals" UAV array, but uses static
1693        * handles for UBOs, textures, and samplers.
1694        */
1695       if (ctx->opts->environment == DXIL_ENVIRONMENT_CL &&
1696           res->resource_type == DXIL_RES_UAV_RAW)
1697          continue;
1698 
1699       for (unsigned i = res->lower_bound; i <= res->upper_bound; ++i) {
1700          handle_array[i] = emit_createhandle_call_const_index(ctx,
1701                                                               res_class,
1702                                                               res->lower_bound,
1703                                                               res->upper_bound,
1704                                                               res->space,
1705                                                               id,
1706                                                               i,
1707                                                               false);
1708          if (!handle_array[i])
1709             return false;
1710       }
1711    }
1712    return true;
1713 }
1714 
1715 static const struct dxil_mdnode *
emit_gs_state(struct ntd_context * ctx)1716 emit_gs_state(struct ntd_context *ctx)
1717 {
1718    const struct dxil_mdnode *gs_state_nodes[5];
1719    const nir_shader *s = ctx->shader;
1720 
1721    gs_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, dxil_get_input_primitive(s->info.gs.input_primitive));
1722    gs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.vertices_out);
1723    gs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.gs.active_stream_mask, 1));
1724    gs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, dxil_get_primitive_topology(s->info.gs.output_primitive));
1725    gs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.invocations);
1726 
1727    for (unsigned i = 0; i < ARRAY_SIZE(gs_state_nodes); ++i) {
1728       if (!gs_state_nodes[i])
1729          return NULL;
1730    }
1731 
1732    return dxil_get_metadata_node(&ctx->mod, gs_state_nodes, ARRAY_SIZE(gs_state_nodes));
1733 }
1734 
1735 static enum dxil_tessellator_domain
get_tessellator_domain(enum tess_primitive_mode primitive_mode)1736 get_tessellator_domain(enum tess_primitive_mode primitive_mode)
1737 {
1738    switch (primitive_mode) {
1739    case TESS_PRIMITIVE_QUADS: return DXIL_TESSELLATOR_DOMAIN_QUAD;
1740    case TESS_PRIMITIVE_TRIANGLES: return DXIL_TESSELLATOR_DOMAIN_TRI;
1741    case TESS_PRIMITIVE_ISOLINES: return DXIL_TESSELLATOR_DOMAIN_ISOLINE;
1742    default:
1743       unreachable("Invalid tessellator primitive mode");
1744    }
1745 }
1746 
1747 static enum dxil_tessellator_partitioning
get_tessellator_partitioning(enum gl_tess_spacing spacing)1748 get_tessellator_partitioning(enum gl_tess_spacing spacing)
1749 {
1750    switch (spacing) {
1751    default:
1752    case TESS_SPACING_EQUAL:
1753       return DXIL_TESSELLATOR_PARTITIONING_INTEGER;
1754    case TESS_SPACING_FRACTIONAL_EVEN:
1755       return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
1756    case TESS_SPACING_FRACTIONAL_ODD:
1757       return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
1758    }
1759 }
1760 
1761 static enum dxil_tessellator_output_primitive
get_tessellator_output_primitive(const struct shader_info * info)1762 get_tessellator_output_primitive(const struct shader_info *info)
1763 {
1764    if (info->tess.point_mode)
1765       return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_POINT;
1766    if (info->tess._primitive_mode == TESS_PRIMITIVE_ISOLINES)
1767       return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_LINE;
1768    /* Note: GL tessellation domain is inverted from D3D, which means triangle
1769     * winding needs to be inverted.
1770     */
1771    if (info->tess.ccw)
1772       return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CW;
1773    return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CCW;
1774 }
1775 
1776 static const struct dxil_mdnode *
emit_hs_state(struct ntd_context * ctx)1777 emit_hs_state(struct ntd_context *ctx)
1778 {
1779    const struct dxil_mdnode *hs_state_nodes[7];
1780 
1781    hs_state_nodes[0] = dxil_get_metadata_func(&ctx->mod, ctx->tess_ctrl_patch_constant_func_def->func);
1782    hs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->tess_input_control_point_count);
1783    hs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1784    hs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1785    hs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_partitioning(ctx->shader->info.tess.spacing));
1786    hs_state_nodes[5] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_output_primitive(&ctx->shader->info));
1787    hs_state_nodes[6] = dxil_get_metadata_float32(&ctx->mod, 64.0f);
1788 
1789    return dxil_get_metadata_node(&ctx->mod, hs_state_nodes, ARRAY_SIZE(hs_state_nodes));
1790 }
1791 
1792 static const struct dxil_mdnode *
emit_ds_state(struct ntd_context * ctx)1793 emit_ds_state(struct ntd_context *ctx)
1794 {
1795    const struct dxil_mdnode *ds_state_nodes[2];
1796 
1797    ds_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1798    ds_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1799 
1800    return dxil_get_metadata_node(&ctx->mod, ds_state_nodes, ARRAY_SIZE(ds_state_nodes));
1801 }
1802 
1803 static const struct dxil_mdnode *
emit_threads(struct ntd_context * ctx)1804 emit_threads(struct ntd_context *ctx)
1805 {
1806    const nir_shader *s = ctx->shader;
1807    const struct dxil_mdnode *threads_x = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[0], 1));
1808    const struct dxil_mdnode *threads_y = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[1], 1));
1809    const struct dxil_mdnode *threads_z = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[2], 1));
1810    if (!threads_x || !threads_y || !threads_z)
1811       return false;
1812 
1813    const struct dxil_mdnode *threads_nodes[] = { threads_x, threads_y, threads_z };
1814    return dxil_get_metadata_node(&ctx->mod, threads_nodes, ARRAY_SIZE(threads_nodes));
1815 }
1816 
1817 static const struct dxil_mdnode *
emit_wave_size(struct ntd_context * ctx)1818 emit_wave_size(struct ntd_context *ctx)
1819 {
1820    const nir_shader *s = ctx->shader;
1821    const struct dxil_mdnode *wave_size_node = dxil_get_metadata_int32(&ctx->mod, s->info.subgroup_size);
1822    return dxil_get_metadata_node(&ctx->mod, &wave_size_node, 1);
1823 }
1824 
1825 static const struct dxil_mdnode *
emit_wave_size_range(struct ntd_context * ctx)1826 emit_wave_size_range(struct ntd_context *ctx)
1827 {
1828    const nir_shader *s = ctx->shader;
1829    const struct dxil_mdnode *wave_size_nodes[3];
1830    wave_size_nodes[0] = dxil_get_metadata_int32(&ctx->mod, s->info.subgroup_size);
1831    wave_size_nodes[1] = wave_size_nodes[0];
1832    wave_size_nodes[2] = wave_size_nodes[0];
1833    return dxil_get_metadata_node(&ctx->mod, wave_size_nodes, ARRAY_SIZE(wave_size_nodes));
1834 }
1835 
1836 static int64_t
get_module_flags(struct ntd_context * ctx)1837 get_module_flags(struct ntd_context *ctx)
1838 {
1839    /* See the DXIL documentation for the definition of these flags:
1840     *
1841     * https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-flags
1842     */
1843 
1844    uint64_t flags = 0;
1845    if (ctx->mod.feats.doubles)
1846       flags |= (1 << 2);
1847    if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
1848        ctx->shader->info.fs.early_fragment_tests)
1849       flags |= (1 << 3);
1850    if (ctx->mod.raw_and_structured_buffers)
1851       flags |= (1 << 4);
1852    if (ctx->mod.feats.min_precision)
1853       flags |= (1 << 5);
1854    if (ctx->mod.feats.dx11_1_double_extensions)
1855       flags |= (1 << 6);
1856    if (ctx->mod.feats.array_layer_from_vs_or_ds)
1857       flags |= (1 << 9);
1858    if (ctx->mod.feats.inner_coverage)
1859       flags |= (1 << 10);
1860    if (ctx->mod.feats.stencil_ref)
1861       flags |= (1 << 11);
1862    if (ctx->mod.feats.tiled_resources)
1863       flags |= (1 << 12);
1864    if (ctx->mod.feats.typed_uav_load_additional_formats)
1865       flags |= (1 << 13);
1866    if (ctx->mod.feats.use_64uavs)
1867       flags |= (1 << 15);
1868    if (ctx->mod.feats.uavs_at_every_stage)
1869       flags |= (1 << 16);
1870    if (ctx->mod.feats.cs_4x_raw_sb)
1871       flags |= (1 << 17);
1872    if (ctx->mod.feats.rovs)
1873       flags |= (1 << 18);
1874    if (ctx->mod.feats.wave_ops)
1875       flags |= (1 << 19);
1876    if (ctx->mod.feats.int64_ops)
1877       flags |= (1 << 20);
1878    if (ctx->mod.feats.view_id)
1879       flags |= (1 << 21);
1880    if (ctx->mod.feats.barycentrics)
1881       flags |= (1 << 22);
1882    if (ctx->mod.feats.native_low_precision)
1883       flags |= (1 << 23) | (1 << 5);
1884    if (ctx->mod.feats.shading_rate)
1885       flags |= (1 << 24);
1886    if (ctx->mod.feats.raytracing_tier_1_1)
1887       flags |= (1 << 25);
1888    if (ctx->mod.feats.sampler_feedback)
1889       flags |= (1 << 26);
1890    if (ctx->mod.feats.atomic_int64_typed)
1891       flags |= (1 << 27);
1892    if (ctx->mod.feats.atomic_int64_tgsm)
1893       flags |= (1 << 28);
1894    if (ctx->mod.feats.derivatives_in_mesh_or_amp)
1895       flags |= (1 << 29);
1896    if (ctx->mod.feats.resource_descriptor_heap_indexing)
1897       flags |= (1 << 30);
1898    if (ctx->mod.feats.sampler_descriptor_heap_indexing)
1899       flags |= (1ull << 31);
1900    if (ctx->mod.feats.atomic_int64_heap_resource)
1901       flags |= (1ull << 32);
1902    if (ctx->mod.feats.advanced_texture_ops)
1903       flags |= (1ull << 34);
1904    if (ctx->mod.feats.writable_msaa)
1905       flags |= (1ull << 35);
1906    // Bit 36 is wave MMA
1907    if (ctx->mod.feats.sample_cmp_bias_gradient)
1908       flags |= (1ull << 37);
1909    if (ctx->mod.feats.extended_command_info)
1910       flags |= (1ull << 38);
1911 
1912    if (ctx->opts->disable_math_refactoring)
1913       flags |= (1 << 1);
1914 
1915    /* Work around https://github.com/microsoft/DirectXShaderCompiler/issues/4616
1916     * When targeting SM6.7 and with at least one UAV, if no other flags are present,
1917     * set the resources-may-not-alias flag, or else the DXIL validator may end up
1918     * with uninitialized memory which will fail validation, due to missing that flag.
1919     */
1920    if (flags == 0 && ctx->mod.minor_version >= 7 && ctx->num_uavs > 0)
1921       flags |= (1ull << 33);
1922 
1923    return flags;
1924 }
1925 
1926 static const struct dxil_mdnode *
emit_entrypoint(struct ntd_context * ctx,const struct dxil_func * func,const char * name,const struct dxil_mdnode * signatures,const struct dxil_mdnode * resources,const struct dxil_mdnode * shader_props)1927 emit_entrypoint(struct ntd_context *ctx,
1928                 const struct dxil_func *func, const char *name,
1929                 const struct dxil_mdnode *signatures,
1930                 const struct dxil_mdnode *resources,
1931                 const struct dxil_mdnode *shader_props)
1932 {
1933    char truncated_name[254] = { 0 };
1934    strncpy(truncated_name, name, ARRAY_SIZE(truncated_name) - 1);
1935 
1936    const struct dxil_mdnode *func_md = dxil_get_metadata_func(&ctx->mod, func);
1937    const struct dxil_mdnode *name_md = dxil_get_metadata_string(&ctx->mod, truncated_name);
1938    const struct dxil_mdnode *nodes[] = {
1939       func_md,
1940       name_md,
1941       signatures,
1942       resources,
1943       shader_props
1944    };
1945    return dxil_get_metadata_node(&ctx->mod, nodes,
1946                                  ARRAY_SIZE(nodes));
1947 }
1948 
1949 static const struct dxil_mdnode *
emit_resources(struct ntd_context * ctx)1950 emit_resources(struct ntd_context *ctx)
1951 {
1952    bool emit_resources = false;
1953    const struct dxil_mdnode *resources_nodes[] = {
1954       NULL, NULL, NULL, NULL
1955    };
1956 
1957 #define ARRAY_AND_SIZE(arr) arr.data, util_dynarray_num_elements(&arr, const struct dxil_mdnode *)
1958 
1959    if (ctx->srv_metadata_nodes.size) {
1960       resources_nodes[0] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->srv_metadata_nodes));
1961       emit_resources = true;
1962    }
1963 
1964    if (ctx->uav_metadata_nodes.size) {
1965       resources_nodes[1] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->uav_metadata_nodes));
1966       emit_resources = true;
1967    }
1968 
1969    if (ctx->cbv_metadata_nodes.size) {
1970       resources_nodes[2] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->cbv_metadata_nodes));
1971       emit_resources = true;
1972    }
1973 
1974    if (ctx->sampler_metadata_nodes.size) {
1975       resources_nodes[3] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->sampler_metadata_nodes));
1976       emit_resources = true;
1977    }
1978 
1979 #undef ARRAY_AND_SIZE
1980 
1981    return emit_resources ?
1982       dxil_get_metadata_node(&ctx->mod, resources_nodes, ARRAY_SIZE(resources_nodes)): NULL;
1983 }
1984 
1985 static bool
emit_tag(struct ntd_context * ctx,enum dxil_shader_tag tag,const struct dxil_mdnode * value_node)1986 emit_tag(struct ntd_context *ctx, enum dxil_shader_tag tag,
1987          const struct dxil_mdnode *value_node)
1988 {
1989    const struct dxil_mdnode *tag_node = dxil_get_metadata_int32(&ctx->mod, tag);
1990    if (!tag_node || !value_node)
1991       return false;
1992    assert(ctx->num_shader_property_nodes <= ARRAY_SIZE(ctx->shader_property_nodes) - 2);
1993    ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = tag_node;
1994    ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = value_node;
1995 
1996    return true;
1997 }
1998 
1999 static bool
emit_metadata(struct ntd_context * ctx)2000 emit_metadata(struct ntd_context *ctx)
2001 {
2002    /* DXIL versions are 1.x for shader model 6.x */
2003    assert(ctx->mod.major_version == 6);
2004    unsigned dxilMajor = 1;
2005    unsigned dxilMinor = ctx->mod.minor_version;
2006    unsigned valMajor = ctx->mod.major_validator;
2007    unsigned valMinor = ctx->mod.minor_validator;
2008    if (!emit_llvm_ident(&ctx->mod) ||
2009        !emit_named_version(&ctx->mod, "dx.version", dxilMajor, dxilMinor) ||
2010        !emit_named_version(&ctx->mod, "dx.valver", valMajor, valMinor) ||
2011        !emit_dx_shader_model(&ctx->mod))
2012       return false;
2013 
2014    const struct dxil_func_def *main_func_def = ctx->main_func_def;
2015    if (!main_func_def)
2016       return false;
2017    const struct dxil_func *main_func = main_func_def->func;
2018 
2019    const struct dxil_mdnode *resources_node = emit_resources(ctx);
2020 
2021    const struct dxil_mdnode *main_entrypoint = dxil_get_metadata_func(&ctx->mod, main_func);
2022    const struct dxil_mdnode *node27 = dxil_get_metadata_node(&ctx->mod, NULL, 0);
2023 
2024    const struct dxil_mdnode *node4 = dxil_get_metadata_int32(&ctx->mod, 0);
2025    const struct dxil_mdnode *nodes_4_27_27[] = {
2026       node4, node27, node27
2027    };
2028    const struct dxil_mdnode *node28 = dxil_get_metadata_node(&ctx->mod, nodes_4_27_27,
2029                                                       ARRAY_SIZE(nodes_4_27_27));
2030 
2031    const struct dxil_mdnode *node29 = dxil_get_metadata_node(&ctx->mod, &node28, 1);
2032 
2033    const struct dxil_mdnode *node3 = dxil_get_metadata_int32(&ctx->mod, 1);
2034    const struct dxil_mdnode *main_type_annotation_nodes[] = {
2035       node3, main_entrypoint, node29
2036    };
2037    const struct dxil_mdnode *main_type_annotation = dxil_get_metadata_node(&ctx->mod, main_type_annotation_nodes,
2038                                                                            ARRAY_SIZE(main_type_annotation_nodes));
2039 
2040    if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {
2041       if (!emit_tag(ctx, DXIL_SHADER_TAG_GS_STATE, emit_gs_state(ctx)))
2042          return false;
2043    } else if (ctx->mod.shader_kind == DXIL_HULL_SHADER) {
2044       ctx->tess_input_control_point_count = 32;
2045       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
2046          if (nir_is_arrayed_io(var, MESA_SHADER_TESS_CTRL)) {
2047             ctx->tess_input_control_point_count = glsl_array_size(var->type);
2048             break;
2049          }
2050       }
2051 
2052       if (!emit_tag(ctx, DXIL_SHADER_TAG_HS_STATE, emit_hs_state(ctx)))
2053          return false;
2054    } else if (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
2055       if (!emit_tag(ctx, DXIL_SHADER_TAG_DS_STATE, emit_ds_state(ctx)))
2056          return false;
2057    } else if (ctx->mod.shader_kind == DXIL_COMPUTE_SHADER) {
2058       if (!emit_tag(ctx, DXIL_SHADER_TAG_NUM_THREADS, emit_threads(ctx)))
2059          return false;
2060       if (ctx->mod.minor_version >= 6 &&
2061           ctx->shader->info.subgroup_size >= SUBGROUP_SIZE_REQUIRE_8) {
2062          if (ctx->mod.minor_version < 8) {
2063             if (!emit_tag(ctx, DXIL_SHADER_TAG_WAVE_SIZE, emit_wave_size(ctx)))
2064                return false;
2065          } else {
2066             if (!emit_tag(ctx, DXIL_SHADER_TAG_WAVE_SIZE_RANGE, emit_wave_size_range(ctx)))
2067                return false;
2068          }
2069       }
2070    }
2071 
2072    uint64_t flags = get_module_flags(ctx);
2073    if (flags != 0) {
2074       if (!emit_tag(ctx, DXIL_SHADER_TAG_FLAGS, dxil_get_metadata_int64(&ctx->mod, flags)))
2075          return false;
2076    }
2077    const struct dxil_mdnode *shader_properties = NULL;
2078    if (ctx->num_shader_property_nodes > 0) {
2079       shader_properties = dxil_get_metadata_node(&ctx->mod, ctx->shader_property_nodes,
2080                                                  ctx->num_shader_property_nodes);
2081       if (!shader_properties)
2082          return false;
2083    }
2084 
2085    nir_function_impl *entry_func_impl = nir_shader_get_entrypoint(ctx->shader);
2086    const struct dxil_mdnode *dx_entry_point = emit_entrypoint(ctx, main_func,
2087        entry_func_impl->function->name, get_signatures(&ctx->mod), resources_node, shader_properties);
2088    if (!dx_entry_point)
2089       return false;
2090 
2091    if (resources_node) {
2092       const struct dxil_mdnode *dx_resources = resources_node;
2093       dxil_add_metadata_named_node(&ctx->mod, "dx.resources",
2094                                        &dx_resources, 1);
2095    }
2096 
2097    if (ctx->mod.minor_version >= 2 &&
2098        dxil_nir_analyze_io_dependencies(&ctx->mod, ctx->shader)) {
2099       const struct dxil_type *i32_type = dxil_module_get_int_type(&ctx->mod, 32);
2100       if (!i32_type)
2101          return false;
2102 
2103       const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, i32_type, ctx->mod.serialized_dependency_table_size);
2104       if (!array_type)
2105          return false;
2106 
2107       const struct dxil_value **array_entries = malloc(sizeof(const struct value *) * ctx->mod.serialized_dependency_table_size);
2108       if (!array_entries)
2109          return false;
2110 
2111       for (uint32_t i = 0; i < ctx->mod.serialized_dependency_table_size; ++i)
2112          array_entries[i] = dxil_module_get_int32_const(&ctx->mod, ctx->mod.serialized_dependency_table[i]);
2113       const struct dxil_value *array_val = dxil_module_get_array_const(&ctx->mod, array_type, array_entries);
2114       free((void *)array_entries);
2115 
2116       const struct dxil_mdnode *view_id_state_val = dxil_get_metadata_value(&ctx->mod, array_type, array_val);
2117       if (!view_id_state_val)
2118          return false;
2119 
2120       const struct dxil_mdnode *view_id_state_node = dxil_get_metadata_node(&ctx->mod, &view_id_state_val, 1);
2121 
2122       dxil_add_metadata_named_node(&ctx->mod, "dx.viewIdState", &view_id_state_node, 1);
2123    }
2124 
2125    const struct dxil_mdnode *dx_type_annotations[] = { main_type_annotation };
2126    return dxil_add_metadata_named_node(&ctx->mod, "dx.typeAnnotations",
2127                                        dx_type_annotations,
2128                                        ARRAY_SIZE(dx_type_annotations)) &&
2129           dxil_add_metadata_named_node(&ctx->mod, "dx.entryPoints",
2130                                        &dx_entry_point, 1);
2131 }
2132 
2133 static const struct dxil_value *
bitcast_to_int(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)2134 bitcast_to_int(struct ntd_context *ctx, unsigned bit_size,
2135                const struct dxil_value *value)
2136 {
2137    const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size);
2138    if (!type)
2139       return NULL;
2140 
2141    return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
2142 }
2143 
2144 static const struct dxil_value *
bitcast_to_float(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)2145 bitcast_to_float(struct ntd_context *ctx, unsigned bit_size,
2146                  const struct dxil_value *value)
2147 {
2148    const struct dxil_type *type = dxil_module_get_float_type(&ctx->mod, bit_size);
2149    if (!type)
2150       return NULL;
2151 
2152    return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
2153 }
2154 
2155 static bool
is_phi_src(nir_def * ssa)2156 is_phi_src(nir_def *ssa)
2157 {
2158    nir_foreach_use(src, ssa)
2159       if (nir_src_parent_instr(src)->type == nir_instr_type_phi)
2160          return true;
2161    return false;
2162 }
2163 
2164 static void
store_ssa_def(struct ntd_context * ctx,nir_def * ssa,unsigned chan,const struct dxil_value * value)2165 store_ssa_def(struct ntd_context *ctx, nir_def *ssa, unsigned chan,
2166               const struct dxil_value *value)
2167 {
2168    assert(ssa->index < ctx->num_defs);
2169    assert(chan < ssa->num_components);
2170    /* Insert bitcasts for phi srcs in the parent block */
2171    if (is_phi_src(ssa)) {
2172       /* Prefer ints over floats if it could be both or if we have no type info */
2173       nir_alu_type expect_type =
2174          BITSET_TEST(ctx->int_types, ssa->index) ? nir_type_int :
2175          (BITSET_TEST(ctx->float_types, ssa->index) ? nir_type_float :
2176           nir_type_int);
2177       assert(ssa->bit_size != 1 || expect_type == nir_type_int);
2178       if (ssa->bit_size != 1 && expect_type != dxil_type_to_nir_type(dxil_value_get_type(value)))
2179          value = dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST,
2180                                 expect_type == nir_type_int ?
2181                                  dxil_module_get_int_type(&ctx->mod, ssa->bit_size) :
2182                                  dxil_module_get_float_type(&ctx->mod, ssa->bit_size), value);
2183       if (ssa->bit_size == 64) {
2184          if (expect_type == nir_type_int)
2185             ctx->mod.feats.int64_ops = true;
2186          if (expect_type == nir_type_float)
2187             ctx->mod.feats.doubles = true;
2188       }
2189    }
2190    ctx->defs[ssa->index].chans[chan] = value;
2191 }
2192 
2193 static void
store_def(struct ntd_context * ctx,nir_def * def,unsigned chan,const struct dxil_value * value)2194 store_def(struct ntd_context *ctx, nir_def *def, unsigned chan,
2195            const struct dxil_value *value)
2196 {
2197    const struct dxil_type *type = dxil_value_get_type(value);
2198    if (type == ctx->mod.float64_type)
2199       ctx->mod.feats.doubles = true;
2200    if (type == ctx->mod.float16_type ||
2201        type == ctx->mod.int16_type)
2202       ctx->mod.feats.min_precision = true;
2203    if (type == ctx->mod.int64_type)
2204       ctx->mod.feats.int64_ops = true;
2205    store_ssa_def(ctx, def, chan, value);
2206 }
2207 
2208 static void
store_alu_dest(struct ntd_context * ctx,nir_alu_instr * alu,unsigned chan,const struct dxil_value * value)2209 store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan,
2210                const struct dxil_value *value)
2211 {
2212    store_def(ctx, &alu->def, chan, value);
2213 }
2214 
2215 static const struct dxil_value *
get_src_ssa(struct ntd_context * ctx,const nir_def * ssa,unsigned chan)2216 get_src_ssa(struct ntd_context *ctx, const nir_def *ssa, unsigned chan)
2217 {
2218    assert(ssa->index < ctx->num_defs);
2219    assert(chan < ssa->num_components);
2220    assert(ctx->defs[ssa->index].chans[chan]);
2221    return ctx->defs[ssa->index].chans[chan];
2222 }
2223 
2224 static const struct dxil_value *
get_src(struct ntd_context * ctx,nir_src * src,unsigned chan,nir_alu_type type)2225 get_src(struct ntd_context *ctx, nir_src *src, unsigned chan,
2226         nir_alu_type type)
2227 {
2228    const struct dxil_value *value = get_src_ssa(ctx, src->ssa, chan);
2229 
2230    const int bit_size = nir_src_bit_size(*src);
2231 
2232    switch (nir_alu_type_get_base_type(type)) {
2233    case nir_type_int:
2234    case nir_type_uint: {
2235       const struct dxil_type *expect_type =  dxil_module_get_int_type(&ctx->mod, bit_size);
2236       /* nohing to do */
2237       if (dxil_value_type_equal_to(value, expect_type)) {
2238          assert(bit_size != 64 || ctx->mod.feats.int64_ops);
2239          return value;
2240       }
2241       if (bit_size == 64) {
2242          assert(ctx->mod.feats.doubles);
2243          ctx->mod.feats.int64_ops = true;
2244       }
2245       if (bit_size == 16)
2246          ctx->mod.feats.native_low_precision = true;
2247       assert(dxil_value_type_bitsize_equal_to(value, bit_size));
2248       return bitcast_to_int(ctx,  bit_size, value);
2249       }
2250 
2251    case nir_type_float:
2252       assert(nir_src_bit_size(*src) >= 16);
2253       if (dxil_value_type_equal_to(value, dxil_module_get_float_type(&ctx->mod, bit_size))) {
2254          assert(nir_src_bit_size(*src) != 64 || ctx->mod.feats.doubles);
2255          return value;
2256       }
2257       if (bit_size == 64) {
2258          assert(ctx->mod.feats.int64_ops);
2259          ctx->mod.feats.doubles = true;
2260       }
2261       if (bit_size == 16)
2262          ctx->mod.feats.native_low_precision = true;
2263       assert(dxil_value_type_bitsize_equal_to(value, bit_size));
2264       return bitcast_to_float(ctx, bit_size, value);
2265 
2266    case nir_type_bool:
2267       if (!dxil_value_type_bitsize_equal_to(value, 1)) {
2268          return dxil_emit_cast(&ctx->mod, DXIL_CAST_TRUNC,
2269                                dxil_module_get_int_type(&ctx->mod, 1), value);
2270       }
2271       return value;
2272 
2273    default:
2274       unreachable("unexpected nir_alu_type");
2275    }
2276 }
2277 
2278 static const struct dxil_value *
get_alu_src(struct ntd_context * ctx,nir_alu_instr * alu,unsigned src)2279 get_alu_src(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)
2280 {
2281    unsigned chan = alu->src[src].swizzle[0];
2282    return get_src(ctx, &alu->src[src].src, chan,
2283                   nir_op_infos[alu->op].input_types[src]);
2284 }
2285 
2286 static bool
emit_binop(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)2287 emit_binop(struct ntd_context *ctx, nir_alu_instr *alu,
2288            enum dxil_bin_opcode opcode,
2289            const struct dxil_value *op0, const struct dxil_value *op1)
2290 {
2291    bool is_float_op = nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float;
2292 
2293    enum dxil_opt_flags flags = 0;
2294    if (is_float_op && !alu->exact)
2295       flags |= DXIL_UNSAFE_ALGEBRA;
2296 
2297    const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, flags);
2298    if (!v)
2299       return false;
2300    store_alu_dest(ctx, alu, 0, v);
2301    return true;
2302 }
2303 
2304 static bool
emit_shift(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)2305 emit_shift(struct ntd_context *ctx, nir_alu_instr *alu,
2306            enum dxil_bin_opcode opcode,
2307            const struct dxil_value *op0, const struct dxil_value *op1)
2308 {
2309    unsigned op0_bit_size = nir_src_bit_size(alu->src[0].src);
2310    unsigned op1_bit_size = nir_src_bit_size(alu->src[1].src);
2311 
2312    uint64_t shift_mask = op0_bit_size - 1;
2313    if (!nir_src_is_const(alu->src[1].src)) {
2314       if (op0_bit_size != op1_bit_size) {
2315          const struct dxil_type *type =
2316             dxil_module_get_int_type(&ctx->mod, op0_bit_size);
2317          enum dxil_cast_opcode cast_op =
2318             op1_bit_size < op0_bit_size ? DXIL_CAST_ZEXT : DXIL_CAST_TRUNC;
2319          op1 = dxil_emit_cast(&ctx->mod, cast_op, type, op1);
2320       }
2321       op1 = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND,
2322                             op1,
2323                             dxil_module_get_int_const(&ctx->mod, shift_mask, op0_bit_size),
2324                             0);
2325    } else {
2326       uint64_t val = nir_scalar_as_uint(
2327          nir_scalar_chase_alu_src(nir_get_scalar(&alu->def, 0), 1));
2328       op1 = dxil_module_get_int_const(&ctx->mod, val & shift_mask, op0_bit_size);
2329    }
2330 
2331    const struct dxil_value *v =
2332       dxil_emit_binop(&ctx->mod, opcode, op0, op1, 0);
2333    if (!v)
2334       return false;
2335    store_alu_dest(ctx, alu, 0, v);
2336    return true;
2337 }
2338 
2339 static bool
emit_cmp(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_cmp_pred pred,const struct dxil_value * op0,const struct dxil_value * op1)2340 emit_cmp(struct ntd_context *ctx, nir_alu_instr *alu,
2341          enum dxil_cmp_pred pred,
2342          const struct dxil_value *op0, const struct dxil_value *op1)
2343 {
2344    const struct dxil_value *v = dxil_emit_cmp(&ctx->mod, pred, op0, op1);
2345    if (!v)
2346       return false;
2347    store_alu_dest(ctx, alu, 0, v);
2348    return true;
2349 }
2350 
2351 static enum dxil_cast_opcode
get_cast_op(nir_alu_instr * alu)2352 get_cast_op(nir_alu_instr *alu)
2353 {
2354    unsigned dst_bits = alu->def.bit_size;
2355    unsigned src_bits = nir_src_bit_size(alu->src[0].src);
2356 
2357    switch (alu->op) {
2358    /* bool -> int */
2359    case nir_op_b2i16:
2360    case nir_op_b2i32:
2361    case nir_op_b2i64:
2362       return DXIL_CAST_ZEXT;
2363 
2364    /* float -> float */
2365    case nir_op_f2f16_rtz:
2366    case nir_op_f2f16:
2367    case nir_op_f2fmp:
2368    case nir_op_f2f32:
2369    case nir_op_f2f64:
2370       assert(dst_bits != src_bits);
2371       if (dst_bits < src_bits)
2372          return DXIL_CAST_FPTRUNC;
2373       else
2374          return DXIL_CAST_FPEXT;
2375 
2376    /* int -> int */
2377    case nir_op_i2i1:
2378    case nir_op_i2i16:
2379    case nir_op_i2imp:
2380    case nir_op_i2i32:
2381    case nir_op_i2i64:
2382       assert(dst_bits != src_bits);
2383       if (dst_bits < src_bits)
2384          return DXIL_CAST_TRUNC;
2385       else
2386          return DXIL_CAST_SEXT;
2387 
2388    /* uint -> uint */
2389    case nir_op_u2u1:
2390    case nir_op_u2u16:
2391    case nir_op_u2u32:
2392    case nir_op_u2u64:
2393       assert(dst_bits != src_bits);
2394       if (dst_bits < src_bits)
2395          return DXIL_CAST_TRUNC;
2396       else
2397          return DXIL_CAST_ZEXT;
2398 
2399    /* float -> int */
2400    case nir_op_f2i16:
2401    case nir_op_f2imp:
2402    case nir_op_f2i32:
2403    case nir_op_f2i64:
2404       return DXIL_CAST_FPTOSI;
2405 
2406    /* float -> uint */
2407    case nir_op_f2u16:
2408    case nir_op_f2ump:
2409    case nir_op_f2u32:
2410    case nir_op_f2u64:
2411       return DXIL_CAST_FPTOUI;
2412 
2413    /* int -> float */
2414    case nir_op_i2f16:
2415    case nir_op_i2fmp:
2416    case nir_op_i2f32:
2417    case nir_op_i2f64:
2418       return DXIL_CAST_SITOFP;
2419 
2420    /* uint -> float */
2421    case nir_op_u2f16:
2422    case nir_op_u2fmp:
2423    case nir_op_u2f32:
2424    case nir_op_u2f64:
2425       return DXIL_CAST_UITOFP;
2426 
2427    default:
2428       unreachable("unexpected cast op");
2429    }
2430 }
2431 
2432 static const struct dxil_type *
get_cast_dest_type(struct ntd_context * ctx,nir_alu_instr * alu)2433 get_cast_dest_type(struct ntd_context *ctx, nir_alu_instr *alu)
2434 {
2435    unsigned dst_bits = alu->def.bit_size;
2436    switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type)) {
2437    case nir_type_bool:
2438       assert(dst_bits == 1);
2439       FALLTHROUGH;
2440    case nir_type_int:
2441    case nir_type_uint:
2442       return dxil_module_get_int_type(&ctx->mod, dst_bits);
2443 
2444    case nir_type_float:
2445       return dxil_module_get_float_type(&ctx->mod, dst_bits);
2446 
2447    default:
2448       unreachable("unknown nir_alu_type");
2449    }
2450 }
2451 
2452 static bool
is_double(nir_alu_type alu_type,unsigned bit_size)2453 is_double(nir_alu_type alu_type, unsigned bit_size)
2454 {
2455    return nir_alu_type_get_base_type(alu_type) == nir_type_float &&
2456           bit_size == 64;
2457 }
2458 
2459 static bool
emit_cast(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * value)2460 emit_cast(struct ntd_context *ctx, nir_alu_instr *alu,
2461           const struct dxil_value *value)
2462 {
2463    enum dxil_cast_opcode opcode = get_cast_op(alu);
2464    const struct dxil_type *type = get_cast_dest_type(ctx, alu);
2465    if (!type)
2466       return false;
2467 
2468    const nir_op_info *info = &nir_op_infos[alu->op];
2469    switch (opcode) {
2470    case DXIL_CAST_UITOFP:
2471    case DXIL_CAST_SITOFP:
2472       if (is_double(info->output_type, alu->def.bit_size))
2473          ctx->mod.feats.dx11_1_double_extensions = true;
2474       break;
2475    case DXIL_CAST_FPTOUI:
2476    case DXIL_CAST_FPTOSI:
2477       if (is_double(info->input_types[0], nir_src_bit_size(alu->src[0].src)))
2478          ctx->mod.feats.dx11_1_double_extensions = true;
2479       break;
2480    default:
2481       break;
2482    }
2483 
2484    if (alu->def.bit_size == 16) {
2485       switch (alu->op) {
2486       case nir_op_f2fmp:
2487       case nir_op_i2imp:
2488       case nir_op_f2imp:
2489       case nir_op_f2ump:
2490       case nir_op_i2fmp:
2491       case nir_op_u2fmp:
2492          break;
2493       default:
2494          ctx->mod.feats.native_low_precision = true;
2495       }
2496    }
2497 
2498    const struct dxil_value *v = dxil_emit_cast(&ctx->mod, opcode, type,
2499                                                value);
2500    if (!v)
2501       return false;
2502    store_alu_dest(ctx, alu, 0, v);
2503    return true;
2504 }
2505 
2506 static enum overload_type
get_overload(nir_alu_type alu_type,unsigned bit_size)2507 get_overload(nir_alu_type alu_type, unsigned bit_size)
2508 {
2509    switch (nir_alu_type_get_base_type(alu_type)) {
2510    case nir_type_int:
2511    case nir_type_uint:
2512       switch (bit_size) {
2513       case 1: return DXIL_I1;
2514       case 16: return DXIL_I16;
2515       case 32: return DXIL_I32;
2516       case 64: return DXIL_I64;
2517       default:
2518          unreachable("unexpected bit_size");
2519       }
2520    case nir_type_float:
2521       switch (bit_size) {
2522       case 16: return DXIL_F16;
2523       case 32: return DXIL_F32;
2524       case 64: return DXIL_F64;
2525       default:
2526          unreachable("unexpected bit_size");
2527       }
2528    case nir_type_invalid:
2529       return DXIL_NONE;
2530    default:
2531       unreachable("unexpected output type");
2532    }
2533 }
2534 
2535 static enum overload_type
get_ambiguous_overload(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum overload_type default_type)2536 get_ambiguous_overload(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2537                        enum overload_type default_type)
2538 {
2539    if (BITSET_TEST(ctx->int_types, intr->def.index))
2540       return get_overload(nir_type_int, intr->def.bit_size);
2541    if (BITSET_TEST(ctx->float_types, intr->def.index))
2542       return get_overload(nir_type_float, intr->def.bit_size);
2543    return default_type;
2544 }
2545 
2546 static enum overload_type
get_ambiguous_overload_alu_type(struct ntd_context * ctx,nir_intrinsic_instr * intr,nir_alu_type alu_type)2547 get_ambiguous_overload_alu_type(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2548                                 nir_alu_type alu_type)
2549 {
2550    return get_ambiguous_overload(ctx, intr, get_overload(alu_type, intr->def.bit_size));
2551 }
2552 
2553 static bool
emit_unary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op)2554 emit_unary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2555                  enum dxil_intr intr, const struct dxil_value *op)
2556 {
2557    const nir_op_info *info = &nir_op_infos[alu->op];
2558    unsigned src_bits = nir_src_bit_size(alu->src[0].src);
2559    enum overload_type overload = get_overload(info->input_types[0], src_bits);
2560 
2561    const struct dxil_value *v = emit_unary_call(ctx, overload, intr, op);
2562    if (!v)
2563       return false;
2564    store_alu_dest(ctx, alu, 0, v);
2565    return true;
2566 }
2567 
2568 static bool
emit_binary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)2569 emit_binary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2570                   enum dxil_intr intr,
2571                   const struct dxil_value *op0, const struct dxil_value *op1)
2572 {
2573    const nir_op_info *info = &nir_op_infos[alu->op];
2574    assert(info->output_type == info->input_types[0]);
2575    assert(info->output_type == info->input_types[1]);
2576    unsigned dst_bits = alu->def.bit_size;
2577    assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2578    assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2579    enum overload_type overload = get_overload(info->output_type, dst_bits);
2580 
2581    const struct dxil_value *v = emit_binary_call(ctx, overload, intr,
2582                                                  op0, op1);
2583    if (!v)
2584       return false;
2585    store_alu_dest(ctx, alu, 0, v);
2586    return true;
2587 }
2588 
2589 static bool
emit_tertiary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)2590 emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2591                     enum dxil_intr intr,
2592                     const struct dxil_value *op0,
2593                     const struct dxil_value *op1,
2594                     const struct dxil_value *op2)
2595 {
2596    const nir_op_info *info = &nir_op_infos[alu->op];
2597    unsigned dst_bits = alu->def.bit_size;
2598    assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2599    assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2600    assert(nir_src_bit_size(alu->src[2].src) == dst_bits);
2601 
2602    assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[0], dst_bits));
2603    assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[1], dst_bits));
2604    assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[2], dst_bits));
2605 
2606    enum overload_type overload = get_overload(info->output_type, dst_bits);
2607 
2608    const struct dxil_value *v = emit_tertiary_call(ctx, overload, intr,
2609                                                    op0, op1, op2);
2610    if (!v)
2611       return false;
2612    store_alu_dest(ctx, alu, 0, v);
2613    return true;
2614 }
2615 
2616 static bool
emit_bitfield_insert(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * base,const struct dxil_value * insert,const struct dxil_value * offset,const struct dxil_value * width)2617 emit_bitfield_insert(struct ntd_context *ctx, nir_alu_instr *alu,
2618                      const struct dxil_value *base,
2619                      const struct dxil_value *insert,
2620                      const struct dxil_value *offset,
2621                      const struct dxil_value *width)
2622 {
2623    /* DXIL is width, offset, insert, base, NIR is base, insert, offset, width */
2624    const struct dxil_value *v = emit_quaternary_call(ctx, DXIL_I32, DXIL_INTR_BFI,
2625                                                      width, offset, insert, base);
2626    if (!v)
2627       return false;
2628 
2629    /* DXIL uses the 5 LSB from width/offset. Special-case width >= 32 == copy insert. */
2630    const struct dxil_value *compare_width = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_SGE,
2631       width, dxil_module_get_int32_const(&ctx->mod, 32));
2632    v = dxil_emit_select(&ctx->mod, compare_width, insert, v);
2633    store_alu_dest(ctx, alu, 0, v);
2634    return true;
2635 }
2636 
2637 static bool
emit_dot4add_packed(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * src0,const struct dxil_value * src1,const struct dxil_value * accum)2638 emit_dot4add_packed(struct ntd_context *ctx, nir_alu_instr *alu,
2639                     enum dxil_intr intr,
2640                     const struct dxil_value *src0,
2641                     const struct dxil_value *src1,
2642                     const struct dxil_value *accum)
2643 {
2644    const struct dxil_func *f = dxil_get_function(&ctx->mod, "dx.op.dot4AddPacked", DXIL_I32);
2645    if (!f)
2646       return false;
2647    const struct dxil_value *srcs[] = { dxil_module_get_int32_const(&ctx->mod, intr), accum, src0, src1 };
2648    const struct dxil_value *v = dxil_emit_call(&ctx->mod, f, srcs, ARRAY_SIZE(srcs));
2649    if (!v)
2650       return false;
2651 
2652    store_alu_dest(ctx, alu, 0, v);
2653    return true;
2654 }
2655 
emit_select(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * sel,const struct dxil_value * val_true,const struct dxil_value * val_false)2656 static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu,
2657                         const struct dxil_value *sel,
2658                         const struct dxil_value *val_true,
2659                         const struct dxil_value *val_false)
2660 {
2661    assert(sel);
2662    assert(val_true);
2663    assert(val_false);
2664 
2665    const struct dxil_value *v = dxil_emit_select(&ctx->mod, sel, val_true, val_false);
2666    if (!v)
2667       return false;
2668 
2669    store_alu_dest(ctx, alu, 0, v);
2670    return true;
2671 }
2672 
2673 static bool
emit_b2f16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2674 emit_b2f16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2675 {
2676    assert(val);
2677 
2678    struct dxil_module *m = &ctx->mod;
2679 
2680    const struct dxil_value *c1 = dxil_module_get_float16_const(m, 0x3C00);
2681    const struct dxil_value *c0 = dxil_module_get_float16_const(m, 0);
2682 
2683    if (!c0 || !c1)
2684       return false;
2685 
2686    return emit_select(ctx, alu, val, c1, c0);
2687 }
2688 
2689 static bool
emit_b2f32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2690 emit_b2f32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2691 {
2692    assert(val);
2693 
2694    struct dxil_module *m = &ctx->mod;
2695 
2696    const struct dxil_value *c1 = dxil_module_get_float_const(m, 1.0f);
2697    const struct dxil_value *c0 = dxil_module_get_float_const(m, 0.0f);
2698 
2699    if (!c0 || !c1)
2700       return false;
2701 
2702    return emit_select(ctx, alu, val, c1, c0);
2703 }
2704 
2705 static bool
emit_b2f64(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2706 emit_b2f64(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2707 {
2708    assert(val);
2709 
2710    struct dxil_module *m = &ctx->mod;
2711 
2712    const struct dxil_value *c1 = dxil_module_get_double_const(m, 1.0);
2713    const struct dxil_value *c0 = dxil_module_get_double_const(m, 0.0);
2714 
2715    if (!c0 || !c1)
2716       return false;
2717 
2718    ctx->mod.feats.doubles = 1;
2719    return emit_select(ctx, alu, val, c1, c0);
2720 }
2721 
2722 static bool
emit_f16tof32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val,bool shift)2723 emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val, bool shift)
2724 {
2725    if (shift) {
2726       val = dxil_emit_binop(&ctx->mod, DXIL_BINOP_LSHR, val,
2727          dxil_module_get_int32_const(&ctx->mod, 16), 0);
2728       if (!val)
2729          return false;
2730    }
2731 
2732    const struct dxil_func *func = dxil_get_function(&ctx->mod,
2733                                                     "dx.op.legacyF16ToF32",
2734                                                     DXIL_NONE);
2735    if (!func)
2736       return false;
2737 
2738    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F16TOF32);
2739    if (!opcode)
2740       return false;
2741 
2742    const struct dxil_value *args[] = {
2743      opcode,
2744      val
2745    };
2746 
2747    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2748    if (!v)
2749       return false;
2750    store_alu_dest(ctx, alu, 0, v);
2751    return true;
2752 }
2753 
2754 static bool
emit_f32tof16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val0,const struct dxil_value * val1)2755 emit_f32tof16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val0, const struct dxil_value *val1)
2756 {
2757    const struct dxil_func *func = dxil_get_function(&ctx->mod,
2758                                                     "dx.op.legacyF32ToF16",
2759                                                     DXIL_NONE);
2760    if (!func)
2761       return false;
2762 
2763    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F32TOF16);
2764    if (!opcode)
2765       return false;
2766 
2767    const struct dxil_value *args[] = {
2768      opcode,
2769      val0
2770    };
2771 
2772    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2773    if (!v)
2774       return false;
2775 
2776    if (!nir_src_is_const(alu->src[1].src) || nir_src_as_int(alu->src[1].src) != 0) {
2777       args[1] = val1;
2778       const struct dxil_value *v_high = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2779       if (!v_high)
2780          return false;
2781 
2782       v_high = dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL, v_high,
2783          dxil_module_get_int32_const(&ctx->mod, 16), 0);
2784       if (!v_high)
2785          return false;
2786 
2787       v = dxil_emit_binop(&ctx->mod, DXIL_BINOP_OR, v, v_high, 0);
2788       if (!v)
2789          return false;
2790    }
2791 
2792    store_alu_dest(ctx, alu, 0, v);
2793    return true;
2794 }
2795 
2796 static bool
emit_vec(struct ntd_context * ctx,nir_alu_instr * alu,unsigned num_inputs)2797 emit_vec(struct ntd_context *ctx, nir_alu_instr *alu, unsigned num_inputs)
2798 {
2799    for (unsigned i = 0; i < num_inputs; i++) {
2800       const struct dxil_value *src =
2801          get_src_ssa(ctx, alu->src[i].src.ssa, alu->src[i].swizzle[0]);
2802       if (!src)
2803          return false;
2804 
2805       store_alu_dest(ctx, alu, i, src);
2806    }
2807    return true;
2808 }
2809 
2810 static bool
emit_make_double(struct ntd_context * ctx,nir_alu_instr * alu)2811 emit_make_double(struct ntd_context *ctx, nir_alu_instr *alu)
2812 {
2813    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.makeDouble", DXIL_F64);
2814    if (!func)
2815       return false;
2816 
2817    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_MAKE_DOUBLE);
2818    if (!opcode)
2819       return false;
2820 
2821    const struct dxil_value *args[3] = {
2822       opcode,
2823       get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_uint32),
2824       get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[1], nir_type_uint32),
2825    };
2826    if (!args[1] || !args[2])
2827       return false;
2828 
2829    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2830    if (!v)
2831       return false;
2832    store_def(ctx, &alu->def, 0, v);
2833    return true;
2834 }
2835 
2836 static bool
emit_split_double(struct ntd_context * ctx,nir_alu_instr * alu)2837 emit_split_double(struct ntd_context *ctx, nir_alu_instr *alu)
2838 {
2839    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.splitDouble", DXIL_F64);
2840    if (!func)
2841       return false;
2842 
2843    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SPLIT_DOUBLE);
2844    if (!opcode)
2845       return false;
2846 
2847    const struct dxil_value *args[] = {
2848       opcode,
2849       get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_float64)
2850    };
2851    if (!args[1])
2852       return false;
2853 
2854    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2855    if (!v)
2856       return false;
2857 
2858    const struct dxil_value *hi = dxil_emit_extractval(&ctx->mod, v, 0);
2859    const struct dxil_value *lo = dxil_emit_extractval(&ctx->mod, v, 1);
2860    if (!hi || !lo)
2861       return false;
2862 
2863    store_def(ctx, &alu->def, 0, hi);
2864    store_def(ctx, &alu->def, 1, lo);
2865    return true;
2866 }
2867 
2868 static bool
emit_alu(struct ntd_context * ctx,nir_alu_instr * alu)2869 emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
2870 {
2871    /* handle vec-instructions first; they are the only ones that produce
2872     * vector results.
2873     */
2874    switch (alu->op) {
2875    case nir_op_vec2:
2876    case nir_op_vec3:
2877    case nir_op_vec4:
2878    case nir_op_vec8:
2879    case nir_op_vec16:
2880       return emit_vec(ctx, alu, nir_op_infos[alu->op].num_inputs);
2881    case nir_op_mov: {
2882          assert(alu->def.num_components == 1);
2883          store_ssa_def(ctx, &alu->def, 0, get_src_ssa(ctx,
2884                         alu->src->src.ssa, alu->src->swizzle[0]));
2885          return true;
2886       }
2887    case nir_op_pack_double_2x32_dxil:
2888       return emit_make_double(ctx, alu);
2889    case nir_op_unpack_double_2x32_dxil:
2890       return emit_split_double(ctx, alu);
2891    case nir_op_bcsel: {
2892       /* Handled here to avoid type forced bitcast to int, since bcsel is used for ints and floats.
2893        * Ideally, the back-typing got both sources to match, but if it didn't, explicitly get src1's type */
2894       const struct dxil_value *src1 = get_src_ssa(ctx, alu->src[1].src.ssa, alu->src[1].swizzle[0]);
2895       nir_alu_type src1_type = dxil_type_to_nir_type(dxil_value_get_type(src1));
2896       return emit_select(ctx, alu,
2897                          get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_bool),
2898                          src1,
2899                          get_src(ctx, &alu->src[2].src, alu->src[2].swizzle[0], src1_type));
2900    }
2901    default:
2902       /* silence warnings */
2903       ;
2904    }
2905 
2906    /* other ops should be scalar */
2907    const struct dxil_value *src[4];
2908    assert(nir_op_infos[alu->op].num_inputs <= 4);
2909    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
2910       src[i] = get_alu_src(ctx, alu, i);
2911       if (!src[i])
2912          return false;
2913    }
2914 
2915    switch (alu->op) {
2916    case nir_op_iadd:
2917    case nir_op_fadd: return emit_binop(ctx, alu, DXIL_BINOP_ADD, src[0], src[1]);
2918 
2919    case nir_op_isub:
2920    case nir_op_fsub: return emit_binop(ctx, alu, DXIL_BINOP_SUB, src[0], src[1]);
2921 
2922    case nir_op_imul:
2923    case nir_op_fmul: return emit_binop(ctx, alu, DXIL_BINOP_MUL, src[0], src[1]);
2924 
2925    case nir_op_fdiv:
2926       if (alu->def.bit_size == 64)
2927          ctx->mod.feats.dx11_1_double_extensions = 1;
2928       return emit_binop(ctx, alu, DXIL_BINOP_SDIV, src[0], src[1]);
2929 
2930    case nir_op_idiv:
2931    case nir_op_udiv:
2932       if (nir_src_is_const(alu->src[1].src)) {
2933          /* It's illegal to emit a literal divide by 0 in DXIL */
2934          nir_scalar divisor = nir_scalar_chase_alu_src(nir_get_scalar(&alu->def, 0), 1);
2935          if (nir_scalar_as_int(divisor) == 0) {
2936             store_alu_dest(ctx, alu, 0,
2937                            dxil_module_get_int_const(&ctx->mod, 0, alu->def.bit_size));
2938             return true;
2939          }
2940       }
2941       return emit_binop(ctx, alu, alu->op == nir_op_idiv ? DXIL_BINOP_SDIV : DXIL_BINOP_UDIV, src[0], src[1]);
2942 
2943    case nir_op_irem: return emit_binop(ctx, alu, DXIL_BINOP_SREM, src[0], src[1]);
2944    case nir_op_imod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2945    case nir_op_umod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2946    case nir_op_ishl: return emit_shift(ctx, alu, DXIL_BINOP_SHL, src[0], src[1]);
2947    case nir_op_ishr: return emit_shift(ctx, alu, DXIL_BINOP_ASHR, src[0], src[1]);
2948    case nir_op_ushr: return emit_shift(ctx, alu, DXIL_BINOP_LSHR, src[0], src[1]);
2949    case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]);
2950    case nir_op_ior:  return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]);
2951    case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]);
2952    case nir_op_inot: {
2953       unsigned bit_size = alu->def.bit_size;
2954       intmax_t val = bit_size == 1 ? 1 : -1;
2955       const struct dxil_value *negative_one = dxil_module_get_int_const(&ctx->mod, val, bit_size);
2956       return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], negative_one);
2957    }
2958    case nir_op_ieq:  return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]);
2959    case nir_op_ine:  return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]);
2960    case nir_op_ige:  return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]);
2961    case nir_op_uge:  return emit_cmp(ctx, alu, DXIL_ICMP_UGE, src[0], src[1]);
2962    case nir_op_ilt:  return emit_cmp(ctx, alu, DXIL_ICMP_SLT, src[0], src[1]);
2963    case nir_op_ult:  return emit_cmp(ctx, alu, DXIL_ICMP_ULT, src[0], src[1]);
2964    case nir_op_feq:  return emit_cmp(ctx, alu, DXIL_FCMP_OEQ, src[0], src[1]);
2965    case nir_op_fneu: return emit_cmp(ctx, alu, DXIL_FCMP_UNE, src[0], src[1]);
2966    case nir_op_flt:  return emit_cmp(ctx, alu, DXIL_FCMP_OLT, src[0], src[1]);
2967    case nir_op_fge:  return emit_cmp(ctx, alu, DXIL_FCMP_OGE, src[0], src[1]);
2968    case nir_op_ftrunc: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_Z, src[0]);
2969    case nir_op_fabs: return emit_unary_intin(ctx, alu, DXIL_INTR_FABS, src[0]);
2970    case nir_op_fcos: return emit_unary_intin(ctx, alu, DXIL_INTR_FCOS, src[0]);
2971    case nir_op_fsin: return emit_unary_intin(ctx, alu, DXIL_INTR_FSIN, src[0]);
2972    case nir_op_fceil: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_PI, src[0]);
2973    case nir_op_fexp2: return emit_unary_intin(ctx, alu, DXIL_INTR_FEXP2, src[0]);
2974    case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]);
2975    case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]);
2976    case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]);
2977    case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]);
2978    case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]);
2979 
2980    case nir_op_fddx:
2981    case nir_op_fddx_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_COARSE, src[0]);
2982    case nir_op_fddx_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_FINE, src[0]);
2983    case nir_op_fddy:
2984    case nir_op_fddy_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_COARSE, src[0]);
2985    case nir_op_fddy_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_FINE, src[0]);
2986 
2987    case nir_op_fround_even: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NE, src[0]);
2988    case nir_op_frcp: {
2989       const struct dxil_value *one;
2990       switch (alu->def.bit_size) {
2991       case 16:
2992          one = dxil_module_get_float16_const(&ctx->mod, 0x3C00);
2993          break;
2994       case 32:
2995          one = dxil_module_get_float_const(&ctx->mod, 1.0f);
2996          break;
2997       case 64:
2998          one = dxil_module_get_double_const(&ctx->mod, 1.0);
2999          break;
3000       default: unreachable("Invalid float size");
3001       }
3002       return emit_binop(ctx, alu, DXIL_BINOP_SDIV, one, src[0]);
3003    }
3004    case nir_op_fsat: return emit_unary_intin(ctx, alu, DXIL_INTR_SATURATE, src[0]);
3005    case nir_op_bit_count: return emit_unary_intin(ctx, alu, DXIL_INTR_COUNTBITS, src[0]);
3006    case nir_op_bitfield_reverse: return emit_unary_intin(ctx, alu, DXIL_INTR_BFREV, src[0]);
3007    case nir_op_ufind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_HI, src[0]);
3008    case nir_op_ifind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_SHI, src[0]);
3009    case nir_op_find_lsb: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_LO, src[0]);
3010    case nir_op_imax: return emit_binary_intin(ctx, alu, DXIL_INTR_IMAX, src[0], src[1]);
3011    case nir_op_imin: return emit_binary_intin(ctx, alu, DXIL_INTR_IMIN, src[0], src[1]);
3012    case nir_op_umax: return emit_binary_intin(ctx, alu, DXIL_INTR_UMAX, src[0], src[1]);
3013    case nir_op_umin: return emit_binary_intin(ctx, alu, DXIL_INTR_UMIN, src[0], src[1]);
3014    case nir_op_frsq: return emit_unary_intin(ctx, alu, DXIL_INTR_RSQRT, src[0]);
3015    case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]);
3016    case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]);
3017    case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]);
3018    case nir_op_ffma:
3019       if (alu->def.bit_size == 64)
3020          ctx->mod.feats.dx11_1_double_extensions = 1;
3021       return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]);
3022 
3023    case nir_op_ibfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_IBFE, src[2], src[1], src[0]);
3024    case nir_op_ubfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_UBFE, src[2], src[1], src[0]);
3025    case nir_op_bitfield_insert: return emit_bitfield_insert(ctx, alu, src[0], src[1], src[2], src[3]);
3026 
3027    case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0], false);
3028    case nir_op_unpack_half_2x16_split_y: return emit_f16tof32(ctx, alu, src[0], true);
3029    case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0], src[1]);
3030 
3031    case nir_op_sdot_4x8_iadd: return emit_dot4add_packed(ctx, alu, DXIL_INTR_DOT4_ADD_I8_PACKED, src[0], src[1], src[2]);
3032    case nir_op_udot_4x8_uadd: return emit_dot4add_packed(ctx, alu, DXIL_INTR_DOT4_ADD_U8_PACKED, src[0], src[1], src[2]);
3033 
3034    case nir_op_i2i1:
3035    case nir_op_u2u1:
3036    case nir_op_b2i16:
3037    case nir_op_i2i16:
3038    case nir_op_i2imp:
3039    case nir_op_f2i16:
3040    case nir_op_f2imp:
3041    case nir_op_f2u16:
3042    case nir_op_f2ump:
3043    case nir_op_u2u16:
3044    case nir_op_u2f16:
3045    case nir_op_u2fmp:
3046    case nir_op_i2f16:
3047    case nir_op_i2fmp:
3048    case nir_op_f2f16_rtz:
3049    case nir_op_f2f16:
3050    case nir_op_f2fmp:
3051    case nir_op_b2i32:
3052    case nir_op_f2f32:
3053    case nir_op_f2i32:
3054    case nir_op_f2u32:
3055    case nir_op_i2f32:
3056    case nir_op_i2i32:
3057    case nir_op_u2f32:
3058    case nir_op_u2u32:
3059    case nir_op_b2i64:
3060    case nir_op_f2f64:
3061    case nir_op_f2i64:
3062    case nir_op_f2u64:
3063    case nir_op_i2f64:
3064    case nir_op_i2i64:
3065    case nir_op_u2f64:
3066    case nir_op_u2u64:
3067       return emit_cast(ctx, alu, src[0]);
3068 
3069    case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]);
3070    case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]);
3071    case nir_op_b2f64: return emit_b2f64(ctx, alu, src[0]);
3072    default:
3073       log_nir_instr_unsupported(ctx->logger, "Unimplemented ALU instruction",
3074                                 &alu->instr);
3075       return false;
3076    }
3077 }
3078 
3079 static const struct dxil_value *
load_ubo(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * offset,enum overload_type overload)3080 load_ubo(struct ntd_context *ctx, const struct dxil_value *handle,
3081          const struct dxil_value *offset, enum overload_type overload)
3082 {
3083    assert(handle && offset);
3084 
3085    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CBUFFER_LOAD_LEGACY);
3086    if (!opcode)
3087       return NULL;
3088 
3089    const struct dxil_value *args[] = {
3090       opcode, handle, offset
3091    };
3092 
3093    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cbufferLoadLegacy", overload);
3094    if (!func)
3095       return NULL;
3096    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3097 }
3098 
3099 static bool
emit_barrier_impl(struct ntd_context * ctx,nir_variable_mode modes,mesa_scope execution_scope,mesa_scope mem_scope)3100 emit_barrier_impl(struct ntd_context *ctx, nir_variable_mode modes, mesa_scope execution_scope, mesa_scope mem_scope)
3101 {
3102    const struct dxil_value *opcode, *mode;
3103    const struct dxil_func *func;
3104    uint32_t flags = 0;
3105 
3106    if (execution_scope == SCOPE_WORKGROUP)
3107       flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP;
3108 
3109    bool is_compute = ctx->mod.shader_kind == DXIL_COMPUTE_SHADER;
3110 
3111    if ((modes & (nir_var_mem_ssbo | nir_var_mem_global | nir_var_image)) &&
3112        (mem_scope > SCOPE_WORKGROUP || !is_compute)) {
3113       flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL;
3114    } else {
3115       flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP;
3116    }
3117 
3118    if ((modes & nir_var_mem_shared) && is_compute)
3119       flags |= DXIL_BARRIER_MODE_GROUPSHARED_MEM_FENCE;
3120 
3121    func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE);
3122    if (!func)
3123       return false;
3124 
3125    opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER);
3126    if (!opcode)
3127       return false;
3128 
3129    mode = dxil_module_get_int32_const(&ctx->mod, flags);
3130    if (!mode)
3131       return false;
3132 
3133    const struct dxil_value *args[] = { opcode, mode };
3134 
3135    return dxil_emit_call_void(&ctx->mod, func,
3136                               args, ARRAY_SIZE(args));
3137 }
3138 
3139 static bool
emit_barrier(struct ntd_context * ctx,nir_intrinsic_instr * intr)3140 emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3141 {
3142    return emit_barrier_impl(ctx,
3143       nir_intrinsic_memory_modes(intr),
3144       nir_intrinsic_execution_scope(intr),
3145       nir_intrinsic_memory_scope(intr));
3146 }
3147 
3148 static bool
emit_load_global_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3149 emit_load_global_invocation_id(struct ntd_context *ctx,
3150                                     nir_intrinsic_instr *intr)
3151 {
3152    nir_component_mask_t comps = nir_def_components_read(&intr->def);
3153 
3154    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3155       if (comps & (1 << i)) {
3156          const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
3157          if (!idx)
3158             return false;
3159          const struct dxil_value *globalid = emit_threadid_call(ctx, idx);
3160 
3161          if (!globalid)
3162             return false;
3163 
3164          store_def(ctx, &intr->def, i, globalid);
3165       }
3166    }
3167    return true;
3168 }
3169 
3170 static bool
emit_load_local_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3171 emit_load_local_invocation_id(struct ntd_context *ctx,
3172                               nir_intrinsic_instr *intr)
3173 {
3174    nir_component_mask_t comps = nir_def_components_read(&intr->def);
3175 
3176    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3177       if (comps & (1 << i)) {
3178          const struct dxil_value
3179             *idx = dxil_module_get_int32_const(&ctx->mod, i);
3180          if (!idx)
3181             return false;
3182          const struct dxil_value
3183             *threadidingroup = emit_threadidingroup_call(ctx, idx);
3184          if (!threadidingroup)
3185             return false;
3186          store_def(ctx, &intr->def, i, threadidingroup);
3187       }
3188    }
3189    return true;
3190 }
3191 
3192 static bool
emit_load_local_invocation_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)3193 emit_load_local_invocation_index(struct ntd_context *ctx,
3194                                  nir_intrinsic_instr *intr)
3195 {
3196    const struct dxil_value
3197       *flattenedthreadidingroup = emit_flattenedthreadidingroup_call(ctx);
3198    if (!flattenedthreadidingroup)
3199       return false;
3200    store_def(ctx, &intr->def, 0, flattenedthreadidingroup);
3201 
3202    return true;
3203 }
3204 
3205 static bool
emit_load_local_workgroup_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3206 emit_load_local_workgroup_id(struct ntd_context *ctx,
3207                               nir_intrinsic_instr *intr)
3208 {
3209    nir_component_mask_t comps = nir_def_components_read(&intr->def);
3210 
3211    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3212       if (comps & (1 << i)) {
3213          const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
3214          if (!idx)
3215             return false;
3216          const struct dxil_value *groupid = emit_groupid_call(ctx, idx);
3217          if (!groupid)
3218             return false;
3219          store_def(ctx, &intr->def, i, groupid);
3220       }
3221    }
3222    return true;
3223 }
3224 
3225 static const struct dxil_value *
call_unary_external_function(struct ntd_context * ctx,const char * name,int32_t dxil_intr,enum overload_type overload)3226 call_unary_external_function(struct ntd_context *ctx,
3227                              const char *name,
3228                              int32_t dxil_intr,
3229                              enum overload_type overload)
3230 {
3231    const struct dxil_func *func =
3232       dxil_get_function(&ctx->mod, name, overload);
3233    if (!func)
3234       return false;
3235 
3236    const struct dxil_value *opcode =
3237       dxil_module_get_int32_const(&ctx->mod, dxil_intr);
3238    if (!opcode)
3239       return false;
3240 
3241    const struct dxil_value *args[] = {opcode};
3242 
3243    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3244 }
3245 
3246 static bool
emit_load_unary_external_function(struct ntd_context * ctx,nir_intrinsic_instr * intr,const char * name,int32_t dxil_intr,nir_alu_type type)3247 emit_load_unary_external_function(struct ntd_context *ctx,
3248                                   nir_intrinsic_instr *intr, const char *name,
3249                                   int32_t dxil_intr,
3250                                   nir_alu_type type)
3251 {
3252    const struct dxil_value *value = call_unary_external_function(ctx, name, dxil_intr,
3253                                                                  get_overload(type, intr->def.bit_size));
3254    store_def(ctx, &intr->def, 0, value);
3255 
3256    return true;
3257 }
3258 
3259 static bool
emit_load_sample_mask_in(struct ntd_context * ctx,nir_intrinsic_instr * intr)3260 emit_load_sample_mask_in(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3261 {
3262    const struct dxil_value *value = call_unary_external_function(ctx,
3263       "dx.op.coverage", DXIL_INTR_COVERAGE, DXIL_I32);
3264 
3265    /* Mask coverage with (1 << sample index). Note, done as an AND to handle extrapolation cases. */
3266    if (ctx->mod.info.has_per_sample_input) {
3267       value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND, value,
3268          dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL,
3269             dxil_module_get_int32_const(&ctx->mod, 1),
3270             call_unary_external_function(ctx, "dx.op.sampleIndex", DXIL_INTR_SAMPLE_INDEX, DXIL_I32), 0), 0);
3271    }
3272 
3273    store_def(ctx, &intr->def, 0, value);
3274    return true;
3275 }
3276 
3277 static bool
emit_load_tess_coord(struct ntd_context * ctx,nir_intrinsic_instr * intr)3278 emit_load_tess_coord(struct ntd_context *ctx,
3279                      nir_intrinsic_instr *intr)
3280 {
3281    const struct dxil_func *func =
3282       dxil_get_function(&ctx->mod, "dx.op.domainLocation", DXIL_F32);
3283    if (!func)
3284       return false;
3285 
3286    const struct dxil_value *opcode =
3287       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DOMAIN_LOCATION);
3288    if (!opcode)
3289       return false;
3290 
3291    unsigned num_coords = ctx->shader->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES ? 3 : 2;
3292    for (unsigned i = 0; i < num_coords; ++i) {
3293       unsigned component_idx = i;
3294 
3295       const struct dxil_value *component = dxil_module_get_int32_const(&ctx->mod, component_idx);
3296       if (!component)
3297          return false;
3298 
3299       const struct dxil_value *args[] = { opcode, component };
3300 
3301       const struct dxil_value *value =
3302          dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3303       store_def(ctx, &intr->def, i, value);
3304    }
3305 
3306    for (unsigned i = num_coords; i < intr->def.num_components; ++i) {
3307       const struct dxil_value *value = dxil_module_get_float_const(&ctx->mod, 0.0f);
3308       store_def(ctx, &intr->def, i, value);
3309    }
3310 
3311    return true;
3312 }
3313 
3314 static const struct dxil_value *
get_int32_undef(struct dxil_module * m)3315 get_int32_undef(struct dxil_module *m)
3316 {
3317    const struct dxil_type *int32_type =
3318       dxil_module_get_int_type(m, 32);
3319    if (!int32_type)
3320       return NULL;
3321 
3322    return dxil_module_get_undef(m, int32_type);
3323 }
3324 
3325 static const struct dxil_value *
get_resource_handle(struct ntd_context * ctx,nir_src * src,enum dxil_resource_class class,enum dxil_resource_kind kind)3326 get_resource_handle(struct ntd_context *ctx, nir_src *src, enum dxil_resource_class class,
3327                     enum dxil_resource_kind kind)
3328 {
3329    /* This source might be one of:
3330     * 1. Constant resource index - just look it up in precomputed handle arrays
3331     *    If it's null in that array, create a handle, and store the result
3332     * 2. A handle from load_vulkan_descriptor - just get the stored SSA value
3333     * 3. Dynamic resource index - create a handle for it here
3334     */
3335    assert(src->ssa->num_components == 1 && src->ssa->bit_size == 32);
3336    nir_const_value *const_block_index = nir_src_as_const_value(*src);
3337    const struct dxil_value **handle_entry = NULL;
3338    if (const_block_index) {
3339       assert(ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN);
3340       switch (kind) {
3341       case DXIL_RESOURCE_KIND_CBUFFER:
3342          handle_entry = &ctx->cbv_handles[const_block_index->u32];
3343          break;
3344       case DXIL_RESOURCE_KIND_RAW_BUFFER:
3345          if (class == DXIL_RESOURCE_CLASS_UAV)
3346             handle_entry = &ctx->ssbo_handles[const_block_index->u32];
3347          else
3348             handle_entry = &ctx->srv_handles[const_block_index->u32];
3349          break;
3350       case DXIL_RESOURCE_KIND_SAMPLER:
3351          handle_entry = &ctx->sampler_handles[const_block_index->u32];
3352          break;
3353       default:
3354          if (class == DXIL_RESOURCE_CLASS_UAV)
3355             handle_entry = &ctx->image_handles[const_block_index->u32];
3356          else
3357             handle_entry = &ctx->srv_handles[const_block_index->u32];
3358          break;
3359       }
3360    }
3361 
3362    if (handle_entry && *handle_entry)
3363       return *handle_entry;
3364 
3365    if (nir_src_as_deref(*src) ||
3366        ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
3367       return get_src_ssa(ctx, src->ssa, 0);
3368    }
3369 
3370    unsigned space = 0;
3371    if (ctx->opts->environment == DXIL_ENVIRONMENT_GL &&
3372        class == DXIL_RESOURCE_CLASS_UAV) {
3373       if (kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
3374          space = 2;
3375       else
3376          space = 1;
3377    }
3378 
3379    /* The base binding here will almost always be zero. The only cases where we end
3380     * up in this type of dynamic indexing are:
3381     * 1. GL UBOs
3382     * 2. GL SSBOs
3383     * 2. CL SSBOs
3384     * In all cases except GL UBOs, the resources are a single zero-based array.
3385     * In that case, the base is 1, because uniforms use 0 and cannot by dynamically
3386     * indexed. All other cases should either fall into static indexing (first early return),
3387     * deref-based dynamic handle creation (images, or Vulkan textures/samplers), or
3388     * load_vulkan_descriptor handle creation.
3389     */
3390    unsigned base_binding = 0;
3391    if (ctx->opts->environment == DXIL_ENVIRONMENT_GL &&
3392        class == DXIL_RESOURCE_CLASS_CBV)
3393       base_binding = 1;
3394 
3395    const struct dxil_value *value = get_src(ctx, src, 0, nir_type_uint);
3396    const struct dxil_value *handle = emit_createhandle_call_dynamic(ctx, class,
3397       space, base_binding, value, !const_block_index);
3398    if (handle_entry)
3399       *handle_entry = handle;
3400 
3401    return handle;
3402 }
3403 
3404 static const struct dxil_value *
create_image_handle(struct ntd_context * ctx,nir_intrinsic_instr * image_intr)3405 create_image_handle(struct ntd_context *ctx, nir_intrinsic_instr *image_intr)
3406 {
3407    const struct dxil_value *unannotated_handle =
3408       emit_createhandle_heap(ctx, get_src(ctx, &image_intr->src[0], 0, nir_type_uint32), false, true /*TODO: divergence*/);
3409    const struct dxil_value *res_props =
3410       dxil_module_get_uav_res_props_const(&ctx->mod, image_intr);
3411 
3412    if (!unannotated_handle || !res_props)
3413       return NULL;
3414 
3415    return emit_annotate_handle(ctx, unannotated_handle, res_props);
3416 }
3417 
3418 static const struct dxil_value *
create_srv_handle(struct ntd_context * ctx,nir_tex_instr * tex,nir_src * src)3419 create_srv_handle(struct ntd_context *ctx, nir_tex_instr *tex, nir_src *src)
3420 {
3421    const struct dxil_value *unannotated_handle =
3422       emit_createhandle_heap(ctx, get_src(ctx, src, 0, nir_type_uint32), false, true /*TODO: divergence*/);
3423    const struct dxil_value *res_props =
3424       dxil_module_get_srv_res_props_const(&ctx->mod, tex);
3425 
3426    if (!unannotated_handle || !res_props)
3427       return NULL;
3428 
3429    return emit_annotate_handle(ctx, unannotated_handle, res_props);
3430 }
3431 
3432 static const struct dxil_value *
create_sampler_handle(struct ntd_context * ctx,bool is_shadow,nir_src * src)3433 create_sampler_handle(struct ntd_context *ctx, bool is_shadow, nir_src *src)
3434 {
3435    const struct dxil_value *unannotated_handle =
3436       emit_createhandle_heap(ctx, get_src(ctx, src, 0, nir_type_uint32), true, true /*TODO: divergence*/);
3437    const struct dxil_value *res_props =
3438       dxil_module_get_sampler_res_props_const(&ctx->mod, is_shadow);
3439 
3440    if (!unannotated_handle || !res_props)
3441       return NULL;
3442 
3443    return emit_annotate_handle(ctx, unannotated_handle, res_props);
3444 }
3445 
3446 static bool
emit_load_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)3447 emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3448 {
3449    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3450 
3451    enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
3452    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
3453       nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
3454       if (var && var->data.access & ACCESS_NON_WRITEABLE)
3455          class = DXIL_RESOURCE_CLASS_SRV;
3456    }
3457 
3458    const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
3459    const struct dxil_value *offset =
3460       get_src(ctx, &intr->src[1], 0, nir_type_uint);
3461    if (!int32_undef || !handle || !offset)
3462       return false;
3463 
3464    assert(nir_src_bit_size(intr->src[0]) == 32);
3465    assert(nir_intrinsic_dest_components(intr) <= 4);
3466 
3467    const struct dxil_value *coord[2] = {
3468       offset,
3469       int32_undef
3470    };
3471 
3472    enum overload_type overload = get_ambiguous_overload_alu_type(ctx, intr, nir_type_uint);
3473    const struct dxil_value *load = ctx->mod.minor_version >= 2 ?
3474       emit_raw_bufferload_call(ctx, handle, coord,
3475                                overload,
3476                                nir_intrinsic_dest_components(intr),
3477                                intr->def.bit_size / 8) :
3478       emit_bufferload_call(ctx, handle, coord, overload);
3479    if (!load)
3480       return false;
3481 
3482    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3483       const struct dxil_value *val =
3484          dxil_emit_extractval(&ctx->mod, load, i);
3485       if (!val)
3486          return false;
3487       store_def(ctx, &intr->def, i, val);
3488    }
3489    if (intr->def.bit_size == 16)
3490       ctx->mod.feats.native_low_precision = true;
3491    return true;
3492 }
3493 
3494 static bool
emit_store_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)3495 emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3496 {
3497    const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[1], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
3498    const struct dxil_value *offset =
3499       get_src(ctx, &intr->src[2], 0, nir_type_uint);
3500    if (!handle || !offset)
3501       return false;
3502 
3503    unsigned num_components = nir_src_num_components(intr->src[0]);
3504    assert(num_components <= 4);
3505    if (nir_src_bit_size(intr->src[0]) == 16)
3506       ctx->mod.feats.native_low_precision = true;
3507 
3508    nir_alu_type type =
3509       dxil_type_to_nir_type(dxil_value_get_type(get_src_ssa(ctx, intr->src[0].ssa, 0)));
3510    const struct dxil_value *value[4] = { 0 };
3511    for (unsigned i = 0; i < num_components; ++i) {
3512       value[i] = get_src(ctx, &intr->src[0], i, type);
3513       if (!value[i])
3514          return false;
3515    }
3516 
3517    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3518    if (!int32_undef)
3519       return false;
3520 
3521    const struct dxil_value *coord[2] = {
3522       offset,
3523       int32_undef
3524    };
3525 
3526    enum overload_type overload = get_overload(type, intr->src[0].ssa->bit_size);
3527    if (num_components < 4) {
3528       const struct dxil_value *value_undef = dxil_module_get_undef(&ctx->mod, dxil_value_get_type(value[0]));
3529       if (!value_undef)
3530          return false;
3531 
3532       for (int i = num_components; i < 4; ++i)
3533          value[i] = value_undef;
3534    }
3535 
3536    const struct dxil_value *write_mask =
3537       dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
3538    if (!write_mask)
3539       return false;
3540 
3541    return ctx->mod.minor_version >= 2 ?
3542       emit_raw_bufferstore_call(ctx, handle, coord, value, write_mask, overload, intr->src[0].ssa->bit_size / 8) :
3543       emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
3544 }
3545 
3546 static bool
emit_load_ubo_vec4(struct ntd_context * ctx,nir_intrinsic_instr * intr)3547 emit_load_ubo_vec4(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3548 {
3549    const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER);
3550    const struct dxil_value *offset =
3551       get_src(ctx, &intr->src[1], 0, nir_type_uint);
3552 
3553    if (!handle || !offset)
3554       return false;
3555 
3556    enum overload_type overload = get_ambiguous_overload_alu_type(ctx, intr, nir_type_uint);
3557    const struct dxil_value *agg = load_ubo(ctx, handle, offset, overload);
3558    if (!agg)
3559       return false;
3560 
3561    unsigned first_component = nir_intrinsic_has_component(intr) ?
3562       nir_intrinsic_component(intr) : 0;
3563    for (unsigned i = 0; i < intr->def.num_components; i++)
3564       store_def(ctx, &intr->def, i,
3565                  dxil_emit_extractval(&ctx->mod, agg, i + first_component));
3566 
3567    if (intr->def.bit_size == 16)
3568       ctx->mod.feats.native_low_precision = true;
3569    return true;
3570 }
3571 
3572 /* Need to add patch-ness as a matching parameter, since driver_location is *not* unique
3573  * between control points and patch variables in HS/DS
3574  */
3575 static nir_variable *
find_patch_matching_variable_by_driver_location(nir_shader * s,nir_variable_mode mode,unsigned driver_location,bool patch)3576 find_patch_matching_variable_by_driver_location(nir_shader *s, nir_variable_mode mode, unsigned driver_location, bool patch)
3577 {
3578    nir_foreach_variable_with_modes(var, s, mode) {
3579       if (var->data.driver_location == driver_location &&
3580           var->data.patch == patch)
3581          return var;
3582    }
3583    return NULL;
3584 }
3585 
3586 static bool
emit_store_output_via_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3587 emit_store_output_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3588 {
3589    assert(intr->intrinsic == nir_intrinsic_store_output ||
3590           ctx->mod.shader_kind == DXIL_HULL_SHADER);
3591    bool is_patch_constant = intr->intrinsic == nir_intrinsic_store_output &&
3592       ctx->mod.shader_kind == DXIL_HULL_SHADER;
3593    nir_alu_type out_type = nir_intrinsic_src_type(intr);
3594    enum overload_type overload = get_overload(out_type, intr->src[0].ssa->bit_size);
3595    const struct dxil_func *func = dxil_get_function(&ctx->mod, is_patch_constant ?
3596       "dx.op.storePatchConstant" : "dx.op.storeOutput",
3597       overload);
3598 
3599    if (!func)
3600       return false;
3601 
3602    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, is_patch_constant ?
3603       DXIL_INTR_STORE_PATCH_CONSTANT : DXIL_INTR_STORE_OUTPUT);
3604    const struct dxil_value *output_id = dxil_module_get_int32_const(&ctx->mod, nir_intrinsic_base(intr));
3605    unsigned row_index = intr->intrinsic == nir_intrinsic_store_output ? 1 : 2;
3606 
3607    /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3608     * generation, so muck with them here too.
3609     */
3610    nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3611    bool is_tess_level = is_patch_constant &&
3612                         (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3613                          semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3614 
3615    const struct dxil_value *row = NULL;
3616    const struct dxil_value *col = NULL;
3617    if (is_tess_level)
3618       col = dxil_module_get_int8_const(&ctx->mod, 0);
3619    else
3620       row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3621 
3622    bool success = true;
3623    uint32_t writemask = nir_intrinsic_write_mask(intr);
3624 
3625    nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_out, nir_intrinsic_base(intr), is_patch_constant);
3626    unsigned var_base_component = var->data.location_frac;
3627    unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3628 
3629    if (ctx->mod.minor_validator >= 5) {
3630       struct dxil_signature_record *sig_rec = is_patch_constant ?
3631          &ctx->mod.patch_consts[nir_intrinsic_base(intr)] :
3632          &ctx->mod.outputs[nir_intrinsic_base(intr)];
3633       unsigned comp_size = intr->src[0].ssa->bit_size == 64 ? 2 : 1;
3634       unsigned comp_mask = 0;
3635       if (is_tess_level)
3636          comp_mask = 1;
3637       else if (comp_size == 1)
3638          comp_mask = writemask << var_base_component;
3639       else {
3640          for (unsigned i = 0; i < intr->num_components; ++i)
3641             if ((writemask & (1 << i)))
3642                comp_mask |= 3 << ((i + var_base_component) * comp_size);
3643       }
3644       for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3645          sig_rec->elements[r].never_writes_mask &= ~comp_mask;
3646 
3647       if (!nir_src_is_const(intr->src[row_index])) {
3648          struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3649             &ctx->mod.psv_patch_consts[nir_intrinsic_base(intr)] :
3650             &ctx->mod.psv_outputs[nir_intrinsic_base(intr)];
3651          psv_rec->dynamic_mask_and_stream |= comp_mask;
3652       }
3653    }
3654 
3655    for (unsigned i = 0; i < intr->num_components && success; ++i) {
3656       if (writemask & (1 << i)) {
3657          if (is_tess_level)
3658             row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3659          else
3660             col = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3661          const struct dxil_value *value = get_src(ctx, &intr->src[0], i, out_type);
3662          if (!col || !row || !value)
3663             return false;
3664 
3665          const struct dxil_value *args[] = {
3666             opcode, output_id, row, col, value
3667          };
3668          success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3669       }
3670    }
3671 
3672    return success;
3673 }
3674 
3675 static bool
emit_load_input_via_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3676 emit_load_input_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3677 {
3678    bool attr_at_vertex = false;
3679    if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER &&
3680       ctx->opts->interpolate_at_vertex &&
3681       ctx->opts->provoking_vertex != 0 &&
3682       (nir_intrinsic_dest_type(intr) & nir_type_float)) {
3683       nir_variable *var = nir_find_variable_with_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr));
3684 
3685       attr_at_vertex = var && var->data.interpolation == INTERP_MODE_FLAT;
3686    }
3687 
3688    bool is_patch_constant = (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER &&
3689                              intr->intrinsic == nir_intrinsic_load_input) ||
3690                             (ctx->mod.shader_kind == DXIL_HULL_SHADER &&
3691                              intr->intrinsic == nir_intrinsic_load_output);
3692    bool is_output_control_point = intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3693 
3694    unsigned opcode_val;
3695    const char *func_name;
3696    if (attr_at_vertex) {
3697       opcode_val = DXIL_INTR_ATTRIBUTE_AT_VERTEX;
3698       func_name = "dx.op.attributeAtVertex";
3699       if (ctx->mod.minor_validator >= 6)
3700          ctx->mod.feats.barycentrics = 1;
3701    } else if (is_patch_constant) {
3702       opcode_val = DXIL_INTR_LOAD_PATCH_CONSTANT;
3703       func_name = "dx.op.loadPatchConstant";
3704    } else if (is_output_control_point) {
3705       opcode_val = DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT;
3706       func_name = "dx.op.loadOutputControlPoint";
3707    } else {
3708       opcode_val = DXIL_INTR_LOAD_INPUT;
3709       func_name = "dx.op.loadInput";
3710    }
3711 
3712    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3713    if (!opcode)
3714       return false;
3715 
3716    const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod,
3717       is_patch_constant || is_output_control_point ?
3718          nir_intrinsic_base(intr) :
3719          ctx->mod.input_mappings[nir_intrinsic_base(intr)]);
3720    if (!input_id)
3721       return false;
3722 
3723    bool is_per_vertex =
3724       intr->intrinsic == nir_intrinsic_load_per_vertex_input ||
3725       intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3726    int row_index = is_per_vertex ? 1 : 0;
3727    const struct dxil_value *vertex_id = NULL;
3728    if (!is_patch_constant) {
3729       if (is_per_vertex) {
3730          vertex_id = get_src(ctx, &intr->src[0], 0, nir_type_int);
3731       } else if (attr_at_vertex) {
3732          vertex_id = dxil_module_get_int8_const(&ctx->mod, ctx->opts->provoking_vertex);
3733       } else {
3734          const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
3735          if (!int32_type)
3736             return false;
3737 
3738          vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);
3739       }
3740       if (!vertex_id)
3741          return false;
3742    }
3743 
3744    /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3745     * generation, so muck with them here too.
3746     */
3747    nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3748    bool is_tess_level = is_patch_constant &&
3749                         (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3750                          semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3751 
3752    const struct dxil_value *row = NULL;
3753    const struct dxil_value *comp = NULL;
3754    if (is_tess_level)
3755       comp = dxil_module_get_int8_const(&ctx->mod, 0);
3756    else
3757       row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3758 
3759    nir_alu_type out_type = nir_intrinsic_dest_type(intr);
3760    enum overload_type overload = get_overload(out_type, intr->def.bit_size);
3761 
3762    const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, overload);
3763 
3764    if (!func)
3765       return false;
3766 
3767    nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), is_patch_constant);
3768    unsigned var_base_component = var ? var->data.location_frac : 0;
3769    unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3770 
3771    if (ctx->mod.minor_validator >= 5 &&
3772        !is_output_control_point &&
3773        intr->intrinsic != nir_intrinsic_load_output) {
3774       struct dxil_signature_record *sig_rec = is_patch_constant ?
3775          &ctx->mod.patch_consts[nir_intrinsic_base(intr)] :
3776          &ctx->mod.inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3777       unsigned comp_size = intr->def.bit_size == 64 ? 2 : 1;
3778       unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3779       comp_mask <<= (var_base_component * comp_size);
3780       if (is_tess_level)
3781          comp_mask = 1;
3782       for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3783          sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3784 
3785       if (!nir_src_is_const(intr->src[row_index])) {
3786          struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3787             &ctx->mod.psv_patch_consts[nir_intrinsic_base(intr)] :
3788             &ctx->mod.psv_inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3789          psv_rec->dynamic_mask_and_stream |= comp_mask;
3790       }
3791    }
3792 
3793    for (unsigned i = 0; i < intr->num_components; ++i) {
3794       if (is_tess_level)
3795          row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3796       else
3797          comp = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3798 
3799       if (!row || !comp)
3800          return false;
3801 
3802       const struct dxil_value *args[] = {
3803          opcode, input_id, row, comp, vertex_id
3804       };
3805 
3806       unsigned num_args = ARRAY_SIZE(args) - (is_patch_constant ? 1 : 0);
3807       const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3808       if (!retval)
3809          return false;
3810       store_def(ctx, &intr->def, i, retval);
3811    }
3812    return true;
3813 }
3814 
3815 static bool
emit_load_interpolated_input(struct ntd_context * ctx,nir_intrinsic_instr * intr)3816 emit_load_interpolated_input(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3817 {
3818    nir_intrinsic_instr *barycentric = nir_src_as_intrinsic(intr->src[0]);
3819 
3820    const struct dxil_value *args[6] = { 0 };
3821 
3822    unsigned opcode_val;
3823    const char *func_name;
3824    unsigned num_args;
3825    switch (barycentric->intrinsic) {
3826    case nir_intrinsic_load_barycentric_at_offset:
3827       opcode_val = DXIL_INTR_EVAL_SNAPPED;
3828       func_name = "dx.op.evalSnapped";
3829       num_args = 6;
3830       for (unsigned i = 0; i < 2; ++i) {
3831          const struct dxil_value *float_offset = get_src(ctx, &barycentric->src[0], i, nir_type_float);
3832          /* GLSL uses [-0.5f, 0.5f), DXIL uses (-8, 7) */
3833          const struct dxil_value *offset_16 = dxil_emit_binop(&ctx->mod,
3834             DXIL_BINOP_MUL, float_offset, dxil_module_get_float_const(&ctx->mod, 16.0f), 0);
3835          args[i + 4] = dxil_emit_cast(&ctx->mod, DXIL_CAST_FPTOSI,
3836             dxil_module_get_int_type(&ctx->mod, 32), offset_16);
3837       }
3838       break;
3839    case nir_intrinsic_load_barycentric_pixel:
3840       opcode_val = DXIL_INTR_EVAL_SNAPPED;
3841       func_name = "dx.op.evalSnapped";
3842       num_args = 6;
3843       args[4] = args[5] = dxil_module_get_int32_const(&ctx->mod, 0);
3844       break;
3845    case nir_intrinsic_load_barycentric_at_sample:
3846       opcode_val = DXIL_INTR_EVAL_SAMPLE_INDEX;
3847       func_name = "dx.op.evalSampleIndex";
3848       num_args = 5;
3849       args[4] = get_src(ctx, &barycentric->src[0], 0, nir_type_int);
3850       break;
3851    case nir_intrinsic_load_barycentric_centroid:
3852       opcode_val = DXIL_INTR_EVAL_CENTROID;
3853       func_name = "dx.op.evalCentroid";
3854       num_args = 4;
3855       break;
3856    default:
3857       unreachable("Unsupported interpolation barycentric intrinsic");
3858    }
3859    args[0] = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3860    args[1] = dxil_module_get_int32_const(&ctx->mod, nir_intrinsic_base(intr));
3861    args[2] = get_src(ctx, &intr->src[1], 0, nir_type_int);
3862 
3863    const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, DXIL_F32);
3864 
3865    if (!func)
3866       return false;
3867 
3868    nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), false);
3869    unsigned var_base_component = var ? var->data.location_frac : 0;
3870    unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3871 
3872    if (ctx->mod.minor_validator >= 5) {
3873       struct dxil_signature_record *sig_rec =
3874          &ctx->mod.inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3875       unsigned comp_size = intr->def.bit_size == 64 ? 2 : 1;
3876       unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3877       comp_mask <<= (var_base_component * comp_size);
3878       for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3879          sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3880 
3881       if (!nir_src_is_const(intr->src[1])) {
3882          struct dxil_psv_signature_element *psv_rec =
3883             &ctx->mod.psv_inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3884          psv_rec->dynamic_mask_and_stream |= comp_mask;
3885       }
3886    }
3887 
3888    for (unsigned i = 0; i < intr->num_components; ++i) {
3889       args[3] = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3890 
3891       const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3892       if (!retval)
3893          return false;
3894       store_def(ctx, &intr->def, i, retval);
3895    }
3896    return true;
3897 }
3898 
3899 static const struct dxil_value *
deref_to_gep(struct ntd_context * ctx,nir_deref_instr * deref)3900 deref_to_gep(struct ntd_context *ctx, nir_deref_instr *deref)
3901 {
3902    nir_deref_path path;
3903    nir_deref_path_init(&path, deref, ctx->ralloc_ctx);
3904    assert(path.path[0]->deref_type == nir_deref_type_var);
3905    uint32_t count = 0;
3906    while (path.path[count])
3907       ++count;
3908 
3909    const struct dxil_value **gep_indices = ralloc_array(ctx->ralloc_ctx,
3910                                                        const struct dxil_value *,
3911                                                        count + 1);
3912    nir_variable *var = path.path[0]->var;
3913    const struct dxil_value **var_array;
3914    switch (deref->modes) {
3915    case nir_var_mem_constant: var_array = ctx->consts; break;
3916    case nir_var_mem_shared: var_array = ctx->sharedvars; break;
3917    case nir_var_function_temp: var_array = ctx->scratchvars; break;
3918    default: unreachable("Invalid deref mode");
3919    }
3920    gep_indices[0] = var_array[var->data.driver_location];
3921 
3922    for (uint32_t i = 0; i < count; ++i)
3923       gep_indices[i + 1] = get_src_ssa(ctx, &path.path[i]->def, 0);
3924 
3925    return dxil_emit_gep_inbounds(&ctx->mod, gep_indices, count + 1);
3926 }
3927 
3928 static bool
emit_load_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3929 emit_load_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3930 {
3931    const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3932    if (!ptr)
3933       return false;
3934 
3935    const struct dxil_value *retval =
3936       dxil_emit_load(&ctx->mod, ptr, intr->def.bit_size / 8, false);
3937    if (!retval)
3938       return false;
3939 
3940    store_def(ctx, &intr->def, 0, retval);
3941    return true;
3942 }
3943 
3944 static bool
emit_store_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3945 emit_store_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3946 {
3947    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
3948    const struct dxil_value *ptr = deref_to_gep(ctx, deref);
3949    if (!ptr)
3950       return false;
3951 
3952    const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, nir_get_nir_type_for_glsl_type(deref->type));
3953    return dxil_emit_store(&ctx->mod, value, ptr, nir_src_bit_size(intr->src[1]) / 8, false);
3954 }
3955 
3956 static bool
emit_atomic_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3957 emit_atomic_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3958 {
3959    const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3960    if (!ptr)
3961       return false;
3962 
3963    const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3964    if (!value)
3965       return false;
3966 
3967    enum dxil_rmw_op dxil_op = nir_atomic_to_dxil_rmw(nir_intrinsic_atomic_op(intr));
3968    const struct dxil_value *retval = dxil_emit_atomicrmw(&ctx->mod, value, ptr, dxil_op, false,
3969                                                          DXIL_ATOMIC_ORDERING_ACQREL,
3970                                                          DXIL_SYNC_SCOPE_CROSSTHREAD);
3971    if (!retval)
3972       return false;
3973 
3974    store_def(ctx, &intr->def, 0, retval);
3975    return true;
3976 }
3977 
3978 static bool
emit_atomic_deref_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)3979 emit_atomic_deref_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3980 {
3981    const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3982    if (!ptr)
3983       return false;
3984 
3985    const struct dxil_value *cmp = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3986    const struct dxil_value *value = get_src(ctx, &intr->src[2], 0, nir_type_uint);
3987    if (!value)
3988       return false;
3989 
3990    const struct dxil_value *retval = dxil_emit_cmpxchg(&ctx->mod, cmp, value, ptr, false,
3991                                                        DXIL_ATOMIC_ORDERING_ACQREL,
3992                                                        DXIL_SYNC_SCOPE_CROSSTHREAD);
3993    if (!retval)
3994       return false;
3995 
3996    store_def(ctx, &intr->def, 0, retval);
3997    return true;
3998 }
3999 
4000 static bool
emit_discard_if_with_value(struct ntd_context * ctx,const struct dxil_value * value)4001 emit_discard_if_with_value(struct ntd_context *ctx, const struct dxil_value *value)
4002 {
4003    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DISCARD);
4004    if (!opcode)
4005       return false;
4006 
4007    const struct dxil_value *args[] = {
4008      opcode,
4009      value
4010    };
4011 
4012    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.discard", DXIL_NONE);
4013    if (!func)
4014       return false;
4015 
4016    return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4017 }
4018 
4019 static bool
emit_discard_if(struct ntd_context * ctx,nir_intrinsic_instr * intr)4020 emit_discard_if(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4021 {
4022    const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_bool);
4023    if (!value)
4024       return false;
4025 
4026    return emit_discard_if_with_value(ctx, value);
4027 }
4028 
4029 static bool
emit_discard(struct ntd_context * ctx)4030 emit_discard(struct ntd_context *ctx)
4031 {
4032    const struct dxil_value *value = dxil_module_get_int1_const(&ctx->mod, true);
4033    return emit_discard_if_with_value(ctx, value);
4034 }
4035 
4036 static bool
emit_emit_vertex(struct ntd_context * ctx,nir_intrinsic_instr * intr)4037 emit_emit_vertex(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4038 {
4039    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_EMIT_STREAM);
4040    const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
4041    if (!opcode || !stream_id)
4042       return false;
4043 
4044    const struct dxil_value *args[] = {
4045      opcode,
4046      stream_id
4047    };
4048 
4049    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.emitStream", DXIL_NONE);
4050    if (!func)
4051       return false;
4052 
4053    return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4054 }
4055 
4056 static bool
emit_end_primitive(struct ntd_context * ctx,nir_intrinsic_instr * intr)4057 emit_end_primitive(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4058 {
4059    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CUT_STREAM);
4060    const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
4061    if (!opcode || !stream_id)
4062       return false;
4063 
4064    const struct dxil_value *args[] = {
4065      opcode,
4066      stream_id
4067    };
4068 
4069    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cutStream", DXIL_NONE);
4070    if (!func)
4071       return false;
4072 
4073    return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4074 }
4075 
4076 static bool
emit_image_store(struct ntd_context * ctx,nir_intrinsic_instr * intr)4077 emit_image_store(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4078 {
4079    const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_store ?
4080       create_image_handle(ctx, intr) :
4081       get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4082    if (!handle)
4083       return false;
4084 
4085    bool is_array = false;
4086    if (intr->intrinsic == nir_intrinsic_image_deref_store)
4087       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4088    else
4089       is_array = nir_intrinsic_image_array(intr);
4090 
4091    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4092    if (!int32_undef)
4093       return false;
4094 
4095    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4096    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_store ?
4097       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4098       nir_intrinsic_image_dim(intr);
4099    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4100    if (is_array)
4101       ++num_coords;
4102 
4103    assert(num_coords <= nir_src_num_components(intr->src[1]));
4104    for (unsigned i = 0; i < num_coords; ++i) {
4105       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4106       if (!coord[i])
4107          return false;
4108    }
4109 
4110    nir_alu_type in_type = nir_intrinsic_src_type(intr);
4111    enum overload_type overload = get_overload(in_type, 32);
4112 
4113    assert(nir_src_bit_size(intr->src[3]) == 32);
4114    unsigned num_components = nir_src_num_components(intr->src[3]);
4115    assert(num_components <= 4);
4116    const struct dxil_value *value[4];
4117    for (unsigned i = 0; i < num_components; ++i) {
4118       value[i] = get_src(ctx, &intr->src[3], i, in_type);
4119       if (!value[i])
4120          return false;
4121    }
4122 
4123    for (int i = num_components; i < 4; ++i)
4124       value[i] = dxil_module_get_undef(&ctx->mod, dxil_value_get_type(value[0]));
4125 
4126    const struct dxil_value *write_mask =
4127       dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
4128    if (!write_mask)
4129       return false;
4130 
4131    if (image_dim == GLSL_SAMPLER_DIM_BUF) {
4132       coord[1] = int32_undef;
4133       return emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
4134    } else
4135       return emit_texturestore_call(ctx, handle, coord, value, write_mask, overload);
4136 }
4137 
4138 static bool
emit_image_load(struct ntd_context * ctx,nir_intrinsic_instr * intr)4139 emit_image_load(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4140 {
4141    const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_load ?
4142       create_image_handle(ctx, intr) :
4143       get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4144    if (!handle)
4145       return false;
4146 
4147    bool is_array = false;
4148    if (intr->intrinsic == nir_intrinsic_image_deref_load)
4149       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4150    else
4151       is_array = nir_intrinsic_image_array(intr);
4152 
4153    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4154    if (!int32_undef)
4155       return false;
4156 
4157    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4158    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_load ?
4159       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4160       nir_intrinsic_image_dim(intr);
4161    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4162    if (is_array)
4163       ++num_coords;
4164 
4165    assert(num_coords <= nir_src_num_components(intr->src[1]));
4166    for (unsigned i = 0; i < num_coords; ++i) {
4167       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4168       if (!coord[i])
4169          return false;
4170    }
4171 
4172    nir_alu_type out_type = nir_intrinsic_dest_type(intr);
4173    enum overload_type overload = get_overload(out_type, 32);
4174 
4175    const struct dxil_value *load_result;
4176    if (image_dim == GLSL_SAMPLER_DIM_BUF) {
4177       coord[1] = int32_undef;
4178       load_result = emit_bufferload_call(ctx, handle, coord, overload);
4179    } else
4180       load_result = emit_textureload_call(ctx, handle, coord, overload);
4181 
4182    if (!load_result)
4183       return false;
4184 
4185    assert(intr->def.bit_size == 32);
4186    unsigned num_components = intr->def.num_components;
4187    assert(num_components <= 4);
4188    for (unsigned i = 0; i < num_components; ++i) {
4189       const struct dxil_value *component = dxil_emit_extractval(&ctx->mod, load_result, i);
4190       if (!component)
4191          return false;
4192       store_def(ctx, &intr->def, i, component);
4193    }
4194 
4195    if (util_format_get_nr_components(nir_intrinsic_format(intr)) > 1)
4196       ctx->mod.feats.typed_uav_load_additional_formats = true;
4197 
4198    return true;
4199 }
4200 
4201 static bool
emit_image_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4202 emit_image_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4203 {
4204    const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_atomic ?
4205       create_image_handle(ctx, intr) :
4206       get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4207    if (!handle)
4208       return false;
4209 
4210    bool is_array = false;
4211    if (intr->intrinsic == nir_intrinsic_image_deref_atomic)
4212       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4213    else
4214       is_array = nir_intrinsic_image_array(intr);
4215 
4216    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4217    if (!int32_undef)
4218       return false;
4219 
4220    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4221    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_atomic ?
4222       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4223       nir_intrinsic_image_dim(intr);
4224    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4225    if (is_array)
4226       ++num_coords;
4227 
4228    assert(num_coords <= nir_src_num_components(intr->src[1]));
4229    for (unsigned i = 0; i < num_coords; ++i) {
4230       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4231       if (!coord[i])
4232          return false;
4233    }
4234 
4235    nir_atomic_op nir_op = nir_intrinsic_atomic_op(intr);
4236    enum dxil_atomic_op dxil_op = nir_atomic_to_dxil_atomic(nir_op);
4237    nir_alu_type type = nir_atomic_op_type(nir_op);
4238    const struct dxil_value *value = get_src(ctx, &intr->src[3], 0, type);
4239    if (!value)
4240       return false;
4241 
4242    const struct dxil_value *retval =
4243       emit_atomic_binop(ctx, handle, dxil_op, coord, value);
4244 
4245    if (!retval)
4246       return false;
4247 
4248    store_def(ctx, &intr->def, 0, retval);
4249    return true;
4250 }
4251 
4252 static bool
emit_image_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)4253 emit_image_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4254 {
4255    const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_atomic_swap ?
4256       create_image_handle(ctx, intr) :
4257       get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4258    if (!handle)
4259       return false;
4260 
4261    bool is_array = false;
4262    if (intr->intrinsic == nir_intrinsic_image_deref_atomic_swap)
4263       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4264    else
4265       is_array = nir_intrinsic_image_array(intr);
4266 
4267    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4268    if (!int32_undef)
4269       return false;
4270 
4271    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4272    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ?
4273       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4274       nir_intrinsic_image_dim(intr);
4275    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4276    if (is_array)
4277       ++num_coords;
4278 
4279    assert(num_coords <= nir_src_num_components(intr->src[1]));
4280    for (unsigned i = 0; i < num_coords; ++i) {
4281       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4282       if (!coord[i])
4283          return false;
4284    }
4285 
4286    const struct dxil_value *cmpval = get_src(ctx, &intr->src[3], 0, nir_type_uint);
4287    const struct dxil_value *newval = get_src(ctx, &intr->src[4], 0, nir_type_uint);
4288    if (!cmpval || !newval)
4289       return false;
4290 
4291    const struct dxil_value *retval =
4292       emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
4293 
4294    if (!retval)
4295       return false;
4296 
4297    store_def(ctx, &intr->def, 0, retval);
4298    return true;
4299 }
4300 
4301 struct texop_parameters {
4302    const struct dxil_value *tex;
4303    const struct dxil_value *sampler;
4304    const struct dxil_value *bias, *lod_or_sample, *min_lod;
4305    const struct dxil_value *coord[4], *offset[3], *dx[3], *dy[3];
4306    const struct dxil_value *cmp;
4307    enum overload_type overload;
4308 };
4309 
4310 static const struct dxil_value *
emit_texture_size(struct ntd_context * ctx,struct texop_parameters * params)4311 emit_texture_size(struct ntd_context *ctx, struct texop_parameters *params)
4312 {
4313    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.getDimensions", DXIL_NONE);
4314    if (!func)
4315       return false;
4316 
4317    const struct dxil_value *args[] = {
4318       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_SIZE),
4319       params->tex,
4320       params->lod_or_sample
4321    };
4322 
4323    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4324 }
4325 
4326 static bool
emit_image_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)4327 emit_image_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4328 {
4329    const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_size ?
4330       create_image_handle(ctx, intr) :
4331       get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4332    if (!handle)
4333       return false;
4334 
4335    enum glsl_sampler_dim sampler_dim = intr->intrinsic == nir_intrinsic_image_deref_size ?
4336       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4337       nir_intrinsic_image_dim(intr);
4338    const struct dxil_value *lod = sampler_dim == GLSL_SAMPLER_DIM_BUF ?
4339       dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32)) :
4340       get_src(ctx, &intr->src[1], 0, nir_type_uint);
4341    if (!lod)
4342       return false;
4343 
4344    struct texop_parameters params = {
4345       .tex = handle,
4346       .lod_or_sample = lod
4347    };
4348    const struct dxil_value *dimensions = emit_texture_size(ctx, &params);
4349    if (!dimensions)
4350       return false;
4351 
4352    for (unsigned i = 0; i < intr->def.num_components; ++i) {
4353       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, i);
4354       store_def(ctx, &intr->def, i, retval);
4355    }
4356 
4357    return true;
4358 }
4359 
4360 static bool
emit_get_ssbo_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)4361 emit_get_ssbo_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4362 {
4363    enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
4364    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
4365       nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
4366       if (var && var->data.access & ACCESS_NON_WRITEABLE)
4367          class = DXIL_RESOURCE_CLASS_SRV;
4368    }
4369 
4370    const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
4371    if (!handle)
4372       return false;
4373 
4374    struct texop_parameters params = {
4375       .tex = handle,
4376       .lod_or_sample = dxil_module_get_undef(
4377                         &ctx->mod, dxil_module_get_int_type(&ctx->mod, 32))
4378    };
4379 
4380    const struct dxil_value *dimensions = emit_texture_size(ctx, &params);
4381    if (!dimensions)
4382       return false;
4383 
4384    const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, 0);
4385    store_def(ctx, &intr->def, 0, retval);
4386 
4387    return true;
4388 }
4389 
4390 static bool
emit_ssbo_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4391 emit_ssbo_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4392 {
4393    nir_atomic_op nir_op = nir_intrinsic_atomic_op(intr);
4394    enum dxil_atomic_op dxil_op = nir_atomic_to_dxil_atomic(nir_op);
4395    nir_alu_type type = nir_atomic_op_type(nir_op);
4396    const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
4397    const struct dxil_value *offset =
4398       get_src(ctx, &intr->src[1], 0, nir_type_uint);
4399    const struct dxil_value *value =
4400       get_src(ctx, &intr->src[2], 0, type);
4401 
4402    if (!value || !handle || !offset)
4403       return false;
4404 
4405    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4406    if (!int32_undef)
4407       return false;
4408 
4409    const struct dxil_value *coord[3] = {
4410       offset, int32_undef, int32_undef
4411    };
4412 
4413    const struct dxil_value *retval =
4414       emit_atomic_binop(ctx, handle, dxil_op, coord, value);
4415 
4416    if (!retval)
4417       return false;
4418 
4419    store_def(ctx, &intr->def, 0, retval);
4420    return true;
4421 }
4422 
4423 static bool
emit_ssbo_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)4424 emit_ssbo_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4425 {
4426    const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
4427    const struct dxil_value *offset =
4428       get_src(ctx, &intr->src[1], 0, nir_type_uint);
4429    const struct dxil_value *cmpval =
4430       get_src(ctx, &intr->src[2], 0, nir_type_int);
4431    const struct dxil_value *newval =
4432       get_src(ctx, &intr->src[3], 0, nir_type_int);
4433 
4434    if (!cmpval || !newval || !handle || !offset)
4435       return false;
4436 
4437    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4438    if (!int32_undef)
4439       return false;
4440 
4441    const struct dxil_value *coord[3] = {
4442       offset, int32_undef, int32_undef
4443    };
4444 
4445    const struct dxil_value *retval =
4446       emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
4447 
4448    if (!retval)
4449       return false;
4450 
4451    store_def(ctx, &intr->def, 0, retval);
4452    return true;
4453 }
4454 
4455 static bool
emit_vulkan_resource_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)4456 emit_vulkan_resource_index(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4457 {
4458    unsigned int binding = nir_intrinsic_binding(intr);
4459 
4460    bool const_index = nir_src_is_const(intr->src[0]);
4461    if (const_index) {
4462       binding += nir_src_as_const_value(intr->src[0])->u32;
4463    }
4464 
4465    const struct dxil_value *index_value = dxil_module_get_int32_const(&ctx->mod, binding);
4466    if (!index_value)
4467       return false;
4468 
4469    if (!const_index) {
4470       const struct dxil_value *offset = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4471       if (!offset)
4472          return false;
4473 
4474       index_value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, index_value, offset, 0);
4475       if (!index_value)
4476          return false;
4477    }
4478 
4479    store_def(ctx, &intr->def, 0, index_value);
4480    store_def(ctx, &intr->def, 1, dxil_module_get_int32_const(&ctx->mod, 0));
4481    return true;
4482 }
4483 
4484 static bool
emit_load_vulkan_descriptor(struct ntd_context * ctx,nir_intrinsic_instr * intr)4485 emit_load_vulkan_descriptor(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4486 {
4487    nir_intrinsic_instr* index = nir_src_as_intrinsic(intr->src[0]);
4488    const struct dxil_value *handle = NULL;
4489 
4490    enum dxil_resource_class resource_class;
4491    enum dxil_resource_kind resource_kind;
4492    switch (nir_intrinsic_desc_type(intr)) {
4493    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4494       resource_class = DXIL_RESOURCE_CLASS_CBV;
4495       resource_kind = DXIL_RESOURCE_KIND_CBUFFER;
4496       break;
4497    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4498       resource_class = DXIL_RESOURCE_CLASS_UAV;
4499       resource_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
4500       break;
4501    default:
4502       unreachable("unknown descriptor type");
4503       return false;
4504    }
4505 
4506    if (index && index->intrinsic == nir_intrinsic_vulkan_resource_index) {
4507       unsigned binding = nir_intrinsic_binding(index);
4508       unsigned space = nir_intrinsic_desc_set(index);
4509 
4510       /* The descriptor_set field for variables is only 5 bits. We shouldn't have intrinsics trying to go beyond that. */
4511       assert(space < 32);
4512 
4513       nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
4514       if (resource_class == DXIL_RESOURCE_CLASS_UAV &&
4515           (var->data.access & ACCESS_NON_WRITEABLE))
4516          resource_class = DXIL_RESOURCE_CLASS_SRV;
4517 
4518       const struct dxil_value *index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4519       if (!index_value)
4520          return false;
4521 
4522       handle = emit_createhandle_call_dynamic(ctx, resource_class, space, binding, index_value, false);
4523    } else {
4524       const struct dxil_value *heap_index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4525       if (!heap_index_value)
4526          return false;
4527       const struct dxil_value *unannotated_handle = emit_createhandle_heap(ctx, heap_index_value, false, true);
4528       const struct dxil_value *res_props = dxil_module_get_buffer_res_props_const(&ctx->mod, resource_class, resource_kind);
4529       if (!unannotated_handle || !res_props)
4530          return false;
4531       handle = emit_annotate_handle(ctx, unannotated_handle, res_props);
4532    }
4533 
4534    store_ssa_def(ctx, &intr->def, 0, handle);
4535    store_def(ctx, &intr->def, 1, get_src(ctx, &intr->src[0], 1, nir_type_uint32));
4536 
4537    return true;
4538 }
4539 
4540 static bool
emit_load_sample_pos_from_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4541 emit_load_sample_pos_from_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4542 {
4543    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.renderTargetGetSamplePosition", DXIL_NONE);
4544    if (!func)
4545       return false;
4546 
4547    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION);
4548    if (!opcode)
4549       return false;
4550 
4551    const struct dxil_value *args[] = {
4552       opcode,
4553       get_src(ctx, &intr->src[0], 0, nir_type_uint32),
4554    };
4555    if (!args[1])
4556       return false;
4557 
4558    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4559    if (!v)
4560       return false;
4561 
4562    for (unsigned i = 0; i < 2; ++i) {
4563       /* GL coords go from 0 -> 1, D3D from -0.5 -> 0.5 */
4564       const struct dxil_value *coord = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
4565          dxil_emit_extractval(&ctx->mod, v, i),
4566          dxil_module_get_float_const(&ctx->mod, 0.5f), 0);
4567       store_def(ctx, &intr->def, i, coord);
4568    }
4569    return true;
4570 }
4571 
4572 static bool
emit_load_sample_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4573 emit_load_sample_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4574 {
4575    assert(ctx->mod.info.has_per_sample_input ||
4576           intr->intrinsic == nir_intrinsic_load_sample_id_no_per_sample);
4577 
4578    if (ctx->mod.info.has_per_sample_input)
4579       return emit_load_unary_external_function(ctx, intr, "dx.op.sampleIndex",
4580                                                DXIL_INTR_SAMPLE_INDEX, nir_type_int);
4581 
4582    store_def(ctx, &intr->def, 0, dxil_module_get_int32_const(&ctx->mod, 0));
4583    return true;
4584 }
4585 
4586 static bool
emit_read_first_invocation(struct ntd_context * ctx,nir_intrinsic_instr * intr)4587 emit_read_first_invocation(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4588 {
4589    ctx->mod.feats.wave_ops = 1;
4590    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveReadLaneFirst",
4591                                                     get_overload(nir_type_uint, intr->def.bit_size));
4592    const struct dxil_value *args[] = {
4593       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_READ_LANE_FIRST),
4594       get_src(ctx, intr->src, 0, nir_type_uint),
4595    };
4596    if (!func || !args[0] || !args[1])
4597       return false;
4598 
4599    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4600    if (!ret)
4601       return false;
4602    store_def(ctx, &intr->def, 0, ret);
4603    return true;
4604 }
4605 
4606 static bool
emit_read_invocation(struct ntd_context * ctx,nir_intrinsic_instr * intr)4607 emit_read_invocation(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4608 {
4609    ctx->mod.feats.wave_ops = 1;
4610    bool quad = intr->intrinsic == nir_intrinsic_quad_broadcast;
4611    const struct dxil_func *func = dxil_get_function(&ctx->mod, quad ? "dx.op.quadReadLaneAt" : "dx.op.waveReadLaneAt",
4612                                                     get_overload(nir_type_uint, intr->def.bit_size));
4613    const struct dxil_value *args[] = {
4614       dxil_module_get_int32_const(&ctx->mod, quad ? DXIL_INTR_QUAD_READ_LANE_AT : DXIL_INTR_WAVE_READ_LANE_AT),
4615       get_src(ctx, &intr->src[0], 0, nir_type_uint),
4616       get_src(ctx, &intr->src[1], 0, nir_type_uint),
4617    };
4618    if (!func || !args[0] || !args[1] || !args[2])
4619       return false;
4620 
4621    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4622    if (!ret)
4623       return false;
4624    store_def(ctx, &intr->def, 0, ret);
4625    return true;
4626 }
4627 
4628 static bool
emit_vote_eq(struct ntd_context * ctx,nir_intrinsic_instr * intr)4629 emit_vote_eq(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4630 {
4631    ctx->mod.feats.wave_ops = 1;
4632    nir_alu_type alu_type = intr->intrinsic == nir_intrinsic_vote_ieq ? nir_type_int : nir_type_float;
4633    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveAllEqual",
4634                                                     get_overload(alu_type, intr->src[0].ssa->bit_size));
4635    const struct dxil_value *args[] = {
4636       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_ALL_EQUAL),
4637       get_src(ctx, intr->src, 0, alu_type),
4638    };
4639    if (!func || !args[0] || !args[1])
4640       return false;
4641 
4642    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4643    if (!ret)
4644       return false;
4645    store_def(ctx, &intr->def, 0, ret);
4646    return true;
4647 }
4648 
4649 static bool
emit_vote(struct ntd_context * ctx,nir_intrinsic_instr * intr)4650 emit_vote(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4651 {
4652    ctx->mod.feats.wave_ops = 1;
4653    bool any = intr->intrinsic == nir_intrinsic_vote_any;
4654    const struct dxil_func *func = dxil_get_function(&ctx->mod,
4655                                                     any ? "dx.op.waveAnyTrue" : "dx.op.waveAllTrue",
4656                                                     DXIL_NONE);
4657    const struct dxil_value *args[] = {
4658       dxil_module_get_int32_const(&ctx->mod, any ? DXIL_INTR_WAVE_ANY_TRUE : DXIL_INTR_WAVE_ALL_TRUE),
4659       get_src(ctx, intr->src, 0, nir_type_bool),
4660    };
4661    if (!func || !args[0] || !args[1])
4662       return false;
4663 
4664    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4665    if (!ret)
4666       return false;
4667    store_def(ctx, &intr->def, 0, ret);
4668    return true;
4669 }
4670 
4671 static bool
emit_ballot(struct ntd_context * ctx,nir_intrinsic_instr * intr)4672 emit_ballot(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4673 {
4674    ctx->mod.feats.wave_ops = 1;
4675    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveBallot", DXIL_NONE);
4676    const struct dxil_value *args[] = {
4677       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_BALLOT),
4678       get_src(ctx, intr->src, 0, nir_type_bool),
4679    };
4680    if (!func || !args[0] || !args[1])
4681       return false;
4682 
4683    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4684    if (!ret)
4685       return false;
4686    for (uint32_t i = 0; i < 4; ++i)
4687       store_def(ctx, &intr->def, i, dxil_emit_extractval(&ctx->mod, ret, i));
4688    return true;
4689 }
4690 
4691 static bool
emit_quad_op(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_quad_op_kind op)4692 emit_quad_op(struct ntd_context *ctx, nir_intrinsic_instr *intr, enum dxil_quad_op_kind op)
4693 {
4694    ctx->mod.feats.wave_ops = 1;
4695    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quadOp",
4696                                                     get_overload(nir_type_uint, intr->def.bit_size));
4697    const struct dxil_value *args[] = {
4698       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_QUAD_OP),
4699       get_src(ctx, intr->src, 0, nir_type_uint),
4700       dxil_module_get_int8_const(&ctx->mod, op),
4701    };
4702    if (!func || !args[0] || !args[1] || !args[2])
4703       return false;
4704 
4705    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4706    if (!ret)
4707       return false;
4708    store_def(ctx, &intr->def, 0, ret);
4709    return true;
4710 }
4711 
4712 static enum dxil_wave_bit_op_kind
get_reduce_bit_op(nir_op op)4713 get_reduce_bit_op(nir_op op)
4714 {
4715    switch (op) {
4716    case nir_op_ior: return DXIL_WAVE_BIT_OP_OR;
4717    case nir_op_ixor: return DXIL_WAVE_BIT_OP_XOR;
4718    case nir_op_iand: return DXIL_WAVE_BIT_OP_AND;
4719    default:
4720       unreachable("Invalid bit op");
4721    }
4722 }
4723 
4724 static bool
emit_reduce_bitwise(struct ntd_context * ctx,nir_intrinsic_instr * intr)4725 emit_reduce_bitwise(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4726 {
4727    enum dxil_wave_bit_op_kind wave_bit_op = get_reduce_bit_op(nir_intrinsic_reduction_op(intr));
4728    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveBit",
4729                                                     get_overload(nir_type_uint, intr->def.bit_size));
4730    const struct dxil_value *args[] = {
4731       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_BIT),
4732       get_src(ctx, intr->src, 0, nir_type_uint),
4733       dxil_module_get_int8_const(&ctx->mod, wave_bit_op),
4734    };
4735    if (!func || !args[0] || !args[1] || !args[2])
4736       return false;
4737 
4738    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4739    if (!ret)
4740       return false;
4741    store_def(ctx, &intr->def, 0, ret);
4742    return true;
4743 }
4744 
4745 static enum dxil_wave_op_kind
get_reduce_op(nir_op op)4746 get_reduce_op(nir_op op)
4747 {
4748    switch (op) {
4749    case nir_op_iadd:
4750    case nir_op_fadd:
4751       return DXIL_WAVE_OP_SUM;
4752    case nir_op_imul:
4753    case nir_op_fmul:
4754       return DXIL_WAVE_OP_PRODUCT;
4755    case nir_op_imax:
4756    case nir_op_umax:
4757    case nir_op_fmax:
4758       return DXIL_WAVE_OP_MAX;
4759    case nir_op_imin:
4760    case nir_op_umin:
4761    case nir_op_fmin:
4762       return DXIL_WAVE_OP_MIN;
4763    default:
4764       unreachable("Unexpected reduction op");
4765    }
4766 }
4767 
4768 static bool
emit_reduce(struct ntd_context * ctx,nir_intrinsic_instr * intr)4769 emit_reduce(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4770 {
4771    ctx->mod.feats.wave_ops = 1;
4772    bool is_prefix = intr->intrinsic == nir_intrinsic_exclusive_scan;
4773    nir_op reduction_op = (nir_op)nir_intrinsic_reduction_op(intr);
4774    switch (reduction_op) {
4775    case nir_op_ior:
4776    case nir_op_ixor:
4777    case nir_op_iand:
4778       assert(!is_prefix);
4779       return emit_reduce_bitwise(ctx, intr);
4780    default:
4781       break;
4782    }
4783    nir_alu_type alu_type = nir_op_infos[reduction_op].input_types[0];
4784    enum dxil_wave_op_kind wave_op = get_reduce_op(reduction_op);
4785    const struct dxil_func *func = dxil_get_function(&ctx->mod, is_prefix ? "dx.op.wavePrefixOp" : "dx.op.waveActiveOp",
4786                                                     get_overload(alu_type, intr->def.bit_size));
4787    bool is_unsigned = alu_type == nir_type_uint;
4788    const struct dxil_value *args[] = {
4789       dxil_module_get_int32_const(&ctx->mod, is_prefix ? DXIL_INTR_WAVE_PREFIX_OP : DXIL_INTR_WAVE_ACTIVE_OP),
4790       get_src(ctx, intr->src, 0, alu_type),
4791       dxil_module_get_int8_const(&ctx->mod, wave_op),
4792       dxil_module_get_int8_const(&ctx->mod, is_unsigned),
4793    };
4794    if (!func || !args[0] || !args[1] || !args[2] || !args[3])
4795       return false;
4796 
4797    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4798    if (!ret)
4799       return false;
4800    store_def(ctx, &intr->def, 0, ret);
4801    return true;
4802 }
4803 
4804 static bool
emit_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4805 emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4806 {
4807    switch (intr->intrinsic) {
4808    case nir_intrinsic_load_global_invocation_id:
4809    case nir_intrinsic_load_global_invocation_id_zero_base:
4810       return emit_load_global_invocation_id(ctx, intr);
4811    case nir_intrinsic_load_local_invocation_id:
4812       return emit_load_local_invocation_id(ctx, intr);
4813    case nir_intrinsic_load_local_invocation_index:
4814       return emit_load_local_invocation_index(ctx, intr);
4815    case nir_intrinsic_load_workgroup_id:
4816    case nir_intrinsic_load_workgroup_id_zero_base:
4817       return emit_load_local_workgroup_id(ctx, intr);
4818    case nir_intrinsic_load_ssbo:
4819       return emit_load_ssbo(ctx, intr);
4820    case nir_intrinsic_store_ssbo:
4821       return emit_store_ssbo(ctx, intr);
4822    case nir_intrinsic_load_deref:
4823       return emit_load_deref(ctx, intr);
4824    case nir_intrinsic_store_deref:
4825       return emit_store_deref(ctx, intr);
4826    case nir_intrinsic_deref_atomic:
4827       return emit_atomic_deref(ctx, intr);
4828    case nir_intrinsic_deref_atomic_swap:
4829       return emit_atomic_deref_swap(ctx, intr);
4830    case nir_intrinsic_load_ubo_vec4:
4831       return emit_load_ubo_vec4(ctx, intr);
4832    case nir_intrinsic_load_primitive_id:
4833       return emit_load_unary_external_function(ctx, intr, "dx.op.primitiveID",
4834                                                DXIL_INTR_PRIMITIVE_ID, nir_type_int);
4835    case nir_intrinsic_load_sample_id:
4836    case nir_intrinsic_load_sample_id_no_per_sample:
4837       return emit_load_sample_id(ctx, intr);
4838    case nir_intrinsic_load_invocation_id:
4839       switch (ctx->mod.shader_kind) {
4840       case DXIL_HULL_SHADER:
4841          return emit_load_unary_external_function(ctx, intr, "dx.op.outputControlPointID",
4842                                                   DXIL_INTR_OUTPUT_CONTROL_POINT_ID, nir_type_int);
4843       case DXIL_GEOMETRY_SHADER:
4844          return emit_load_unary_external_function(ctx, intr, "dx.op.gsInstanceID",
4845                                                   DXIL_INTR_GS_INSTANCE_ID, nir_type_int);
4846       default:
4847          unreachable("Unexpected shader kind for invocation ID");
4848       }
4849    case nir_intrinsic_load_view_index:
4850       ctx->mod.feats.view_id = true;
4851       return emit_load_unary_external_function(ctx, intr, "dx.op.viewID",
4852                                                DXIL_INTR_VIEW_ID, nir_type_int);
4853    case nir_intrinsic_load_sample_mask_in:
4854       return emit_load_sample_mask_in(ctx, intr);
4855    case nir_intrinsic_load_tess_coord:
4856       return emit_load_tess_coord(ctx, intr);
4857    case nir_intrinsic_discard_if:
4858    case nir_intrinsic_demote_if:
4859       return emit_discard_if(ctx, intr);
4860    case nir_intrinsic_discard:
4861    case nir_intrinsic_demote:
4862       return emit_discard(ctx);
4863    case nir_intrinsic_emit_vertex:
4864       return emit_emit_vertex(ctx, intr);
4865    case nir_intrinsic_end_primitive:
4866       return emit_end_primitive(ctx, intr);
4867    case nir_intrinsic_barrier:
4868       return emit_barrier(ctx, intr);
4869    case nir_intrinsic_ssbo_atomic:
4870       return emit_ssbo_atomic(ctx, intr);
4871    case nir_intrinsic_ssbo_atomic_swap:
4872       return emit_ssbo_atomic_comp_swap(ctx, intr);
4873    case nir_intrinsic_image_deref_atomic:
4874    case nir_intrinsic_image_atomic:
4875    case nir_intrinsic_bindless_image_atomic:
4876       return emit_image_atomic(ctx, intr);
4877    case nir_intrinsic_image_deref_atomic_swap:
4878    case nir_intrinsic_image_atomic_swap:
4879    case nir_intrinsic_bindless_image_atomic_swap:
4880       return emit_image_atomic_comp_swap(ctx, intr);
4881    case nir_intrinsic_image_store:
4882    case nir_intrinsic_image_deref_store:
4883    case nir_intrinsic_bindless_image_store:
4884       return emit_image_store(ctx, intr);
4885    case nir_intrinsic_image_load:
4886    case nir_intrinsic_image_deref_load:
4887    case nir_intrinsic_bindless_image_load:
4888       return emit_image_load(ctx, intr);
4889    case nir_intrinsic_image_size:
4890    case nir_intrinsic_image_deref_size:
4891    case nir_intrinsic_bindless_image_size:
4892       return emit_image_size(ctx, intr);
4893    case nir_intrinsic_get_ssbo_size:
4894       return emit_get_ssbo_size(ctx, intr);
4895    case nir_intrinsic_load_input:
4896    case nir_intrinsic_load_per_vertex_input:
4897    case nir_intrinsic_load_output:
4898    case nir_intrinsic_load_per_vertex_output:
4899       return emit_load_input_via_intrinsic(ctx, intr);
4900    case nir_intrinsic_store_output:
4901    case nir_intrinsic_store_per_vertex_output:
4902       return emit_store_output_via_intrinsic(ctx, intr);
4903 
4904    case nir_intrinsic_load_barycentric_at_offset:
4905    case nir_intrinsic_load_barycentric_at_sample:
4906    case nir_intrinsic_load_barycentric_centroid:
4907    case nir_intrinsic_load_barycentric_pixel:
4908       /* Emit nothing, we only support these as inputs to load_interpolated_input */
4909       return true;
4910    case nir_intrinsic_load_interpolated_input:
4911       return emit_load_interpolated_input(ctx, intr);
4912       break;
4913 
4914    case nir_intrinsic_vulkan_resource_index:
4915       return emit_vulkan_resource_index(ctx, intr);
4916    case nir_intrinsic_load_vulkan_descriptor:
4917       return emit_load_vulkan_descriptor(ctx, intr);
4918 
4919    case nir_intrinsic_load_sample_pos_from_id:
4920       return emit_load_sample_pos_from_id(ctx, intr);
4921 
4922    case nir_intrinsic_is_helper_invocation:
4923       return emit_load_unary_external_function(
4924          ctx, intr, "dx.op.isHelperLane", DXIL_INTR_IS_HELPER_LANE, nir_type_int);
4925    case nir_intrinsic_elect:
4926       ctx->mod.feats.wave_ops = 1;
4927       return emit_load_unary_external_function(
4928          ctx, intr, "dx.op.waveIsFirstLane", DXIL_INTR_WAVE_IS_FIRST_LANE, nir_type_invalid);
4929    case nir_intrinsic_load_subgroup_size:
4930       ctx->mod.feats.wave_ops = 1;
4931       return emit_load_unary_external_function(
4932          ctx, intr, "dx.op.waveGetLaneCount", DXIL_INTR_WAVE_GET_LANE_COUNT, nir_type_invalid);
4933    case nir_intrinsic_load_subgroup_invocation:
4934       ctx->mod.feats.wave_ops = 1;
4935       return emit_load_unary_external_function(
4936          ctx, intr, "dx.op.waveGetLaneIndex", DXIL_INTR_WAVE_GET_LANE_INDEX, nir_type_invalid);
4937 
4938    case nir_intrinsic_vote_feq:
4939    case nir_intrinsic_vote_ieq:
4940       return emit_vote_eq(ctx, intr);
4941    case nir_intrinsic_vote_any:
4942    case nir_intrinsic_vote_all:
4943       return emit_vote(ctx, intr);
4944 
4945    case nir_intrinsic_ballot:
4946       return emit_ballot(ctx, intr);
4947 
4948    case nir_intrinsic_read_first_invocation:
4949       return emit_read_first_invocation(ctx, intr);
4950    case nir_intrinsic_read_invocation:
4951    case nir_intrinsic_shuffle:
4952    case nir_intrinsic_quad_broadcast:
4953       return emit_read_invocation(ctx, intr);
4954 
4955    case nir_intrinsic_quad_swap_horizontal:
4956       return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_X);
4957    case nir_intrinsic_quad_swap_vertical:
4958       return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_Y);
4959    case nir_intrinsic_quad_swap_diagonal:
4960       return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_DIAGONAL);
4961 
4962    case nir_intrinsic_reduce:
4963    case nir_intrinsic_exclusive_scan:
4964       return emit_reduce(ctx, intr);
4965 
4966    case nir_intrinsic_load_num_workgroups:
4967    case nir_intrinsic_load_workgroup_size:
4968    default:
4969       log_nir_instr_unsupported(
4970          ctx->logger, "Unimplemented intrinsic instruction", &intr->instr);
4971       return false;
4972    }
4973 }
4974 
4975 static const struct dxil_type *
dxil_type_for_const(struct ntd_context * ctx,nir_def * def)4976 dxil_type_for_const(struct ntd_context *ctx, nir_def *def)
4977 {
4978    if (BITSET_TEST(ctx->int_types, def->index) ||
4979        !BITSET_TEST(ctx->float_types, def->index))
4980       return dxil_module_get_int_type(&ctx->mod, def->bit_size);
4981    return dxil_module_get_float_type(&ctx->mod, def->bit_size);
4982 }
4983 
4984 static bool
emit_load_const(struct ntd_context * ctx,nir_load_const_instr * load_const)4985 emit_load_const(struct ntd_context *ctx, nir_load_const_instr *load_const)
4986 {
4987    for (uint32_t i = 0; i < load_const->def.num_components; ++i) {
4988       const struct dxil_type *type = dxil_type_for_const(ctx, &load_const->def);
4989       store_ssa_def(ctx, &load_const->def, i, get_value_for_const(&ctx->mod, &load_const->value[i], type));
4990    }
4991    return true;
4992 }
4993 
4994 static bool
emit_deref(struct ntd_context * ctx,nir_deref_instr * instr)4995 emit_deref(struct ntd_context* ctx, nir_deref_instr* instr)
4996 {
4997    /* There's two possible reasons we might be walking through derefs:
4998     * 1. Computing an index to be used for a texture/sampler/image binding, which
4999     *    can only do array indexing and should compute the indices along the way with
5000     *    array-of-array sizes.
5001     * 2. Storing an index to be used in a GEP for access to a variable.
5002     */
5003    nir_variable *var = nir_deref_instr_get_variable(instr);
5004    assert(var);
5005 
5006    bool is_aoa_size =
5007       glsl_type_is_sampler(glsl_without_array(var->type)) ||
5008       glsl_type_is_image(glsl_without_array(var->type)) ||
5009       glsl_type_is_texture(glsl_without_array(var->type));
5010 
5011    if (!is_aoa_size) {
5012       /* Just store the values, we'll use these to build a GEP in the load or store */
5013       switch (instr->deref_type) {
5014       case nir_deref_type_var:
5015          store_def(ctx, &instr->def, 0, dxil_module_get_int_const(&ctx->mod, 0, instr->def.bit_size));
5016          return true;
5017       case nir_deref_type_array:
5018          store_def(ctx, &instr->def, 0, get_src(ctx, &instr->arr.index, 0, nir_type_int));
5019          return true;
5020       case nir_deref_type_struct:
5021          store_def(ctx, &instr->def, 0, dxil_module_get_int_const(&ctx->mod, instr->strct.index, 32));
5022          return true;
5023       default:
5024          unreachable("Other deref types not supported");
5025       }
5026    }
5027 
5028    /* In the CL environment, there's nothing to emit. Any references to
5029     * derefs will emit the necessary logic to handle scratch/shared GEP addressing
5030     */
5031    if (ctx->opts->environment == DXIL_ENVIRONMENT_CL)
5032       return true;
5033 
5034    const struct glsl_type *type = instr->type;
5035    const struct dxil_value *binding;
5036    unsigned binding_val = ctx->opts->environment == DXIL_ENVIRONMENT_GL ?
5037       var->data.driver_location : var->data.binding;
5038 
5039    if (instr->deref_type == nir_deref_type_var) {
5040       binding = dxil_module_get_int32_const(&ctx->mod, binding_val);
5041    } else {
5042       const struct dxil_value *base = get_src(ctx, &instr->parent, 0, nir_type_uint32);
5043       const struct dxil_value *offset = get_src(ctx, &instr->arr.index, 0, nir_type_uint32);
5044       if (!base || !offset)
5045          return false;
5046 
5047       if (glsl_type_is_array(instr->type)) {
5048          offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_MUL, offset,
5049             dxil_module_get_int32_const(&ctx->mod, glsl_get_aoa_size(instr->type)), 0);
5050          if (!offset)
5051             return false;
5052       }
5053       binding = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, base, offset, 0);
5054    }
5055 
5056    if (!binding)
5057       return false;
5058 
5059    /* Haven't finished chasing the deref chain yet, just store the value */
5060    if (glsl_type_is_array(type)) {
5061       store_def(ctx, &instr->def, 0, binding);
5062       return true;
5063    }
5064 
5065    assert(glsl_type_is_sampler(type) || glsl_type_is_image(type) || glsl_type_is_texture(type));
5066    enum dxil_resource_class res_class;
5067    if (glsl_type_is_image(type))
5068       res_class = DXIL_RESOURCE_CLASS_UAV;
5069    else if (glsl_type_is_sampler(type))
5070       res_class = DXIL_RESOURCE_CLASS_SAMPLER;
5071    else
5072       res_class = DXIL_RESOURCE_CLASS_SRV;
5073 
5074    unsigned descriptor_set = ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN ?
5075       var->data.descriptor_set : (glsl_type_is_image(type) ? 1 : 0);
5076    const struct dxil_value *handle = emit_createhandle_call_dynamic(ctx, res_class,
5077       descriptor_set, binding_val, binding, false);
5078    if (!handle)
5079       return false;
5080 
5081    store_ssa_def(ctx, &instr->def, 0, handle);
5082    return true;
5083 }
5084 
5085 static bool
emit_cond_branch(struct ntd_context * ctx,const struct dxil_value * cond,int true_block,int false_block)5086 emit_cond_branch(struct ntd_context *ctx, const struct dxil_value *cond,
5087                  int true_block, int false_block)
5088 {
5089    assert(cond);
5090    assert(true_block >= 0);
5091    assert(false_block >= 0);
5092    return dxil_emit_branch(&ctx->mod, cond, true_block, false_block);
5093 }
5094 
5095 static bool
emit_branch(struct ntd_context * ctx,int block)5096 emit_branch(struct ntd_context *ctx, int block)
5097 {
5098    assert(block >= 0);
5099    return dxil_emit_branch(&ctx->mod, NULL, block, -1);
5100 }
5101 
5102 static bool
emit_jump(struct ntd_context * ctx,nir_jump_instr * instr)5103 emit_jump(struct ntd_context *ctx, nir_jump_instr *instr)
5104 {
5105    switch (instr->type) {
5106    case nir_jump_break:
5107    case nir_jump_continue:
5108       assert(instr->instr.block->successors[0]);
5109       assert(!instr->instr.block->successors[1]);
5110       return emit_branch(ctx, instr->instr.block->successors[0]->index);
5111 
5112    default:
5113       unreachable("Unsupported jump type\n");
5114    }
5115 }
5116 
5117 struct phi_block {
5118    unsigned num_components;
5119    struct dxil_instr *comp[NIR_MAX_VEC_COMPONENTS];
5120 };
5121 
5122 static bool
emit_phi(struct ntd_context * ctx,nir_phi_instr * instr)5123 emit_phi(struct ntd_context *ctx, nir_phi_instr *instr)
5124 {
5125    const struct dxil_type *type = NULL;
5126    nir_foreach_phi_src(src, instr) {
5127       /* All sources have the same type, just use the first one */
5128       type = dxil_value_get_type(ctx->defs[src->src.ssa->index].chans[0]);
5129       break;
5130    }
5131 
5132    struct phi_block *vphi = ralloc(ctx->phis, struct phi_block);
5133    vphi->num_components = instr->def.num_components;
5134 
5135    for (unsigned i = 0; i < vphi->num_components; ++i) {
5136       struct dxil_instr *phi = vphi->comp[i] = dxil_emit_phi(&ctx->mod, type);
5137       if (!phi)
5138          return false;
5139       store_ssa_def(ctx, &instr->def, i, dxil_instr_get_return_value(phi));
5140    }
5141    _mesa_hash_table_insert(ctx->phis, instr, vphi);
5142    return true;
5143 }
5144 
5145 static bool
fixup_phi(struct ntd_context * ctx,nir_phi_instr * instr,struct phi_block * vphi)5146 fixup_phi(struct ntd_context *ctx, nir_phi_instr *instr,
5147           struct phi_block *vphi)
5148 {
5149    const struct dxil_value *values[16];
5150    unsigned blocks[16];
5151    for (unsigned i = 0; i < vphi->num_components; ++i) {
5152       size_t num_incoming = 0;
5153       nir_foreach_phi_src(src, instr) {
5154          const struct dxil_value *val = get_src_ssa(ctx, src->src.ssa, i);
5155          values[num_incoming] = val;
5156          blocks[num_incoming] = src->pred->index;
5157          ++num_incoming;
5158          if (num_incoming == ARRAY_SIZE(values)) {
5159             if (!dxil_phi_add_incoming(vphi->comp[i], values, blocks,
5160                                        num_incoming))
5161                return false;
5162             num_incoming = 0;
5163          }
5164       }
5165       if (num_incoming > 0 && !dxil_phi_add_incoming(vphi->comp[i], values,
5166                                                      blocks, num_incoming))
5167          return false;
5168    }
5169    return true;
5170 }
5171 
5172 static unsigned
get_n_src(struct ntd_context * ctx,const struct dxil_value ** values,unsigned max_components,nir_tex_src * src,nir_alu_type type)5173 get_n_src(struct ntd_context *ctx, const struct dxil_value **values,
5174           unsigned max_components, nir_tex_src *src, nir_alu_type type)
5175 {
5176    unsigned num_components = nir_src_num_components(src->src);
5177    unsigned i = 0;
5178 
5179    assert(num_components <= max_components);
5180 
5181    for (i = 0; i < num_components; ++i) {
5182       values[i] = get_src(ctx, &src->src, i, type);
5183       if (!values[i])
5184          return 0;
5185    }
5186 
5187    return num_components;
5188 }
5189 
5190 #define PAD_SRC(ctx, array, components, undef) \
5191    for (unsigned i = components; i < ARRAY_SIZE(array); ++i) { \
5192       array[i] = undef; \
5193    }
5194 
5195 static const struct dxil_value *
emit_sample(struct ntd_context * ctx,struct texop_parameters * params)5196 emit_sample(struct ntd_context *ctx, struct texop_parameters *params)
5197 {
5198    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sample", params->overload);
5199    if (!func)
5200       return NULL;
5201 
5202    const struct dxil_value *args[11] = {
5203       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE),
5204       params->tex, params->sampler,
5205       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5206       params->offset[0], params->offset[1], params->offset[2],
5207       params->min_lod
5208    };
5209 
5210    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5211 }
5212 
5213 static const struct dxil_value *
emit_sample_bias(struct ntd_context * ctx,struct texop_parameters * params)5214 emit_sample_bias(struct ntd_context *ctx, struct texop_parameters *params)
5215 {
5216    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleBias", params->overload);
5217    if (!func)
5218       return NULL;
5219 
5220    assert(params->bias != NULL);
5221 
5222    const struct dxil_value *args[12] = {
5223       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_BIAS),
5224       params->tex, params->sampler,
5225       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5226       params->offset[0], params->offset[1], params->offset[2],
5227       params->bias, params->min_lod
5228    };
5229 
5230    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5231 }
5232 
5233 static const struct dxil_value *
emit_sample_level(struct ntd_context * ctx,struct texop_parameters * params)5234 emit_sample_level(struct ntd_context *ctx, struct texop_parameters *params)
5235 {
5236    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleLevel", params->overload);
5237    if (!func)
5238       return NULL;
5239 
5240    assert(params->lod_or_sample != NULL);
5241 
5242    const struct dxil_value *args[11] = {
5243       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_LEVEL),
5244       params->tex, params->sampler,
5245       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5246       params->offset[0], params->offset[1], params->offset[2],
5247       params->lod_or_sample
5248    };
5249 
5250    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5251 }
5252 
5253 static const struct dxil_value *
emit_sample_cmp(struct ntd_context * ctx,struct texop_parameters * params)5254 emit_sample_cmp(struct ntd_context *ctx, struct texop_parameters *params)
5255 {
5256    const struct dxil_func *func;
5257    enum dxil_intr opcode;
5258 
5259    func = dxil_get_function(&ctx->mod, "dx.op.sampleCmp", DXIL_F32);
5260    opcode = DXIL_INTR_SAMPLE_CMP;
5261 
5262    if (!func)
5263       return NULL;
5264 
5265    const struct dxil_value *args[12] = {
5266       dxil_module_get_int32_const(&ctx->mod, opcode),
5267       params->tex, params->sampler,
5268       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5269       params->offset[0], params->offset[1], params->offset[2],
5270       params->cmp, params->min_lod
5271    };
5272 
5273    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5274 }
5275 
5276 static const struct dxil_value *
emit_sample_cmp_level_zero(struct ntd_context * ctx,struct texop_parameters * params)5277 emit_sample_cmp_level_zero(struct ntd_context *ctx, struct texop_parameters *params)
5278 {
5279    const struct dxil_func *func;
5280    enum dxil_intr opcode;
5281 
5282    func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevelZero", DXIL_F32);
5283    opcode = DXIL_INTR_SAMPLE_CMP_LVL_ZERO;
5284 
5285    if (!func)
5286       return NULL;
5287 
5288    const struct dxil_value *args[11] = {
5289       dxil_module_get_int32_const(&ctx->mod, opcode),
5290       params->tex, params->sampler,
5291       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5292       params->offset[0], params->offset[1], params->offset[2],
5293       params->cmp
5294    };
5295 
5296    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5297 }
5298 
5299 static const struct dxil_value *
emit_sample_cmp_level(struct ntd_context * ctx,struct texop_parameters * params)5300 emit_sample_cmp_level(struct ntd_context *ctx, struct texop_parameters *params)
5301 {
5302    ctx->mod.feats.advanced_texture_ops = true;
5303    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevel", params->overload);
5304    if (!func)
5305       return NULL;
5306 
5307    assert(params->lod_or_sample != NULL);
5308 
5309    const struct dxil_value *args[12] = {
5310       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_LEVEL),
5311       params->tex, params->sampler,
5312       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5313       params->offset[0], params->offset[1], params->offset[2],
5314       params->cmp, params->lod_or_sample
5315    };
5316 
5317    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5318 }
5319 
5320 static const struct dxil_value *
emit_sample_cmp_bias(struct ntd_context * ctx,struct texop_parameters * params)5321 emit_sample_cmp_bias(struct ntd_context *ctx, struct texop_parameters *params)
5322 {
5323    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpBias", params->overload);
5324    if (!func)
5325       return NULL;
5326 
5327    assert(params->bias != NULL);
5328    ctx->mod.feats.sample_cmp_bias_gradient = 1;
5329 
5330    const struct dxil_value *args[13] = {
5331       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_BIAS),
5332       params->tex, params->sampler,
5333       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5334       params->offset[0], params->offset[1], params->offset[2],
5335       params->cmp, params->bias, params->min_lod
5336    };
5337 
5338    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5339 }
5340 
5341 static const struct dxil_value *
emit_sample_grad(struct ntd_context * ctx,struct texop_parameters * params)5342 emit_sample_grad(struct ntd_context *ctx, struct texop_parameters *params)
5343 {
5344    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleGrad", params->overload);
5345    if (!func)
5346       return false;
5347 
5348    const struct dxil_value *args[17] = {
5349       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_GRAD),
5350       params->tex, params->sampler,
5351       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5352       params->offset[0], params->offset[1], params->offset[2],
5353       params->dx[0], params->dx[1], params->dx[2],
5354       params->dy[0], params->dy[1], params->dy[2],
5355       params->min_lod
5356    };
5357 
5358    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5359 }
5360 
5361 static const struct dxil_value *
emit_sample_cmp_grad(struct ntd_context * ctx,struct texop_parameters * params)5362 emit_sample_cmp_grad(struct ntd_context *ctx, struct texop_parameters *params)
5363 {
5364    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpGrad", params->overload);
5365    if (!func)
5366       return false;
5367 
5368    ctx->mod.feats.sample_cmp_bias_gradient = 1;
5369 
5370    const struct dxil_value *args[18] = {
5371       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_GRAD),
5372       params->tex, params->sampler,
5373       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5374       params->offset[0], params->offset[1], params->offset[2],
5375       params->cmp,
5376       params->dx[0], params->dx[1], params->dx[2],
5377       params->dy[0], params->dy[1], params->dy[2],
5378       params->min_lod
5379    };
5380 
5381    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5382 }
5383 
5384 static const struct dxil_value *
emit_texel_fetch(struct ntd_context * ctx,struct texop_parameters * params)5385 emit_texel_fetch(struct ntd_context *ctx, struct texop_parameters *params)
5386 {
5387    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", params->overload);
5388    if (!func)
5389       return false;
5390 
5391    if (!params->lod_or_sample)
5392       params->lod_or_sample = dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32));
5393 
5394    const struct dxil_value *args[] = {
5395       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD),
5396       params->tex,
5397       params->lod_or_sample, params->coord[0], params->coord[1], params->coord[2],
5398       params->offset[0], params->offset[1], params->offset[2]
5399    };
5400 
5401    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5402 }
5403 
5404 static const struct dxil_value *
emit_texture_lod(struct ntd_context * ctx,struct texop_parameters * params,bool clamped)5405 emit_texture_lod(struct ntd_context *ctx, struct texop_parameters *params, bool clamped)
5406 {
5407    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.calculateLOD", DXIL_F32);
5408    if (!func)
5409       return false;
5410 
5411    const struct dxil_value *args[] = {
5412       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOD),
5413       params->tex,
5414       params->sampler,
5415       params->coord[0],
5416       params->coord[1],
5417       params->coord[2],
5418       dxil_module_get_int1_const(&ctx->mod, clamped ? 1 : 0)
5419    };
5420 
5421    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5422 }
5423 
5424 static const struct dxil_value *
emit_texture_gather(struct ntd_context * ctx,struct texop_parameters * params,unsigned component)5425 emit_texture_gather(struct ntd_context *ctx, struct texop_parameters *params, unsigned component)
5426 {
5427    const struct dxil_func *func = dxil_get_function(&ctx->mod,
5428       params->cmp ? "dx.op.textureGatherCmp" : "dx.op.textureGather", params->overload);
5429    if (!func)
5430       return false;
5431 
5432    const struct dxil_value *args[] = {
5433       dxil_module_get_int32_const(&ctx->mod, params->cmp ?
5434          DXIL_INTR_TEXTURE_GATHER_CMP : DXIL_INTR_TEXTURE_GATHER),
5435       params->tex,
5436       params->sampler,
5437       params->coord[0],
5438       params->coord[1],
5439       params->coord[2],
5440       params->coord[3],
5441       params->offset[0],
5442       params->offset[1],
5443       dxil_module_get_int32_const(&ctx->mod, component),
5444       params->cmp
5445    };
5446 
5447    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args) - (params->cmp ? 0 : 1));
5448 }
5449 
5450 static bool
emit_tex(struct ntd_context * ctx,nir_tex_instr * instr)5451 emit_tex(struct ntd_context *ctx, nir_tex_instr *instr)
5452 {
5453    struct texop_parameters params;
5454    memset(&params, 0, sizeof(struct texop_parameters));
5455    if (ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN) {
5456       params.tex = ctx->srv_handles[instr->texture_index];
5457       params.sampler = ctx->sampler_handles[instr->sampler_index];
5458    }
5459 
5460    const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
5461    const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32);
5462    const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
5463    const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type);
5464 
5465    unsigned coord_components = 0, offset_components = 0, dx_components = 0, dy_components = 0;
5466    params.overload = get_overload(instr->dest_type, 32);
5467 
5468    bool lod_is_zero = false;
5469    for (unsigned i = 0; i < instr->num_srcs; i++) {
5470       nir_alu_type type = nir_tex_instr_src_type(instr, i);
5471 
5472       switch (instr->src[i].src_type) {
5473       case nir_tex_src_coord:
5474          coord_components = get_n_src(ctx, params.coord, ARRAY_SIZE(params.coord),
5475                                       &instr->src[i], type);
5476          if (!coord_components)
5477             return false;
5478          break;
5479 
5480       case nir_tex_src_offset:
5481          offset_components = get_n_src(ctx, params.offset, ARRAY_SIZE(params.offset),
5482                                        &instr->src[i],  nir_type_int);
5483          if (!offset_components)
5484             return false;
5485 
5486          /* Dynamic offsets were only allowed with gather, until "advanced texture ops" in SM7 */
5487          if (!nir_src_is_const(instr->src[i].src) && instr->op != nir_texop_tg4)
5488             ctx->mod.feats.advanced_texture_ops = true;
5489          break;
5490 
5491       case nir_tex_src_bias:
5492          assert(instr->op == nir_texop_txb);
5493          assert(nir_src_num_components(instr->src[i].src) == 1);
5494          params.bias = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
5495          if (!params.bias)
5496             return false;
5497          break;
5498 
5499       case nir_tex_src_lod:
5500          assert(nir_src_num_components(instr->src[i].src) == 1);
5501          if (instr->op == nir_texop_txf_ms) {
5502             assert(nir_src_as_int(instr->src[i].src) == 0);
5503             break;
5504          }
5505 
5506          /* Buffers don't have a LOD */
5507          if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF)
5508             params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, type);
5509          else
5510             params.lod_or_sample = int_undef;
5511          if (!params.lod_or_sample)
5512             return false;
5513 
5514          if (nir_src_is_const(instr->src[i].src) && nir_src_as_float(instr->src[i].src) == 0.0f)
5515             lod_is_zero = true;
5516          break;
5517 
5518       case nir_tex_src_min_lod:
5519          assert(nir_src_num_components(instr->src[i].src) == 1);
5520          params.min_lod = get_src(ctx, &instr->src[i].src, 0, type);
5521          if (!params.min_lod)
5522             return false;
5523          break;
5524 
5525       case nir_tex_src_comparator:
5526          assert(nir_src_num_components(instr->src[i].src) == 1);
5527          params.cmp = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
5528          if (!params.cmp)
5529             return false;
5530          break;
5531 
5532       case nir_tex_src_ddx:
5533          dx_components = get_n_src(ctx, params.dx, ARRAY_SIZE(params.dx),
5534                                    &instr->src[i], nir_type_float);
5535          if (!dx_components)
5536             return false;
5537          break;
5538 
5539       case nir_tex_src_ddy:
5540          dy_components = get_n_src(ctx, params.dy, ARRAY_SIZE(params.dy),
5541                                    &instr->src[i], nir_type_float);
5542          if (!dy_components)
5543             return false;
5544          break;
5545 
5546       case nir_tex_src_ms_index:
5547          params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, nir_type_int);
5548          if (!params.lod_or_sample)
5549             return false;
5550          break;
5551 
5552       case nir_tex_src_texture_deref:
5553          assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
5554          params.tex = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
5555          break;
5556 
5557       case nir_tex_src_sampler_deref:
5558          assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
5559          params.sampler = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
5560          break;
5561 
5562       case nir_tex_src_texture_offset:
5563          params.tex = emit_createhandle_call_dynamic(ctx, DXIL_RESOURCE_CLASS_SRV,
5564             0, instr->texture_index,
5565             dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
5566                get_src(ctx, &instr->src[i].src, 0, nir_type_uint),
5567                dxil_module_get_int32_const(&ctx->mod, instr->texture_index), 0),
5568             instr->texture_non_uniform);
5569          break;
5570 
5571       case nir_tex_src_sampler_offset:
5572          if (nir_tex_instr_need_sampler(instr)) {
5573             params.sampler = emit_createhandle_call_dynamic(ctx, DXIL_RESOURCE_CLASS_SAMPLER,
5574                0, instr->sampler_index,
5575                dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
5576                   get_src(ctx, &instr->src[i].src, 0, nir_type_uint),
5577                   dxil_module_get_int32_const(&ctx->mod, instr->sampler_index), 0),
5578                instr->sampler_non_uniform);
5579          }
5580          break;
5581 
5582       case nir_tex_src_texture_handle:
5583          params.tex = create_srv_handle(ctx, instr, &instr->src[i].src);
5584          break;
5585 
5586       case nir_tex_src_sampler_handle:
5587          if (nir_tex_instr_need_sampler(instr))
5588             params.sampler = create_sampler_handle(ctx, instr->is_shadow, &instr->src[i].src);
5589          break;
5590 
5591       case nir_tex_src_projector:
5592          unreachable("Texture projector should have been lowered");
5593 
5594       default:
5595          fprintf(stderr, "texture source: %d\n", instr->src[i].src_type);
5596          unreachable("unknown texture source");
5597       }
5598    }
5599 
5600    assert(params.tex != NULL);
5601    assert(instr->op == nir_texop_txf ||
5602           instr->op == nir_texop_txf_ms ||
5603           nir_tex_instr_is_query(instr) ||
5604           params.sampler != NULL);
5605 
5606    PAD_SRC(ctx, params.coord, coord_components, float_undef);
5607    PAD_SRC(ctx, params.offset, offset_components, int_undef);
5608    if (!params.min_lod) params.min_lod = float_undef;
5609 
5610    const struct dxil_value *sample = NULL;
5611    switch (instr->op) {
5612    case nir_texop_txb:
5613       if (params.cmp != NULL && ctx->mod.minor_version >= 8)
5614          sample = emit_sample_cmp_bias(ctx, &params);
5615       else
5616          sample = emit_sample_bias(ctx, &params);
5617       break;
5618 
5619    case nir_texop_tex:
5620       if (params.cmp != NULL) {
5621          sample = emit_sample_cmp(ctx, &params);
5622          break;
5623       } else if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) {
5624          sample = emit_sample(ctx, &params);
5625          break;
5626       }
5627       params.lod_or_sample = dxil_module_get_float_const(&ctx->mod, 0);
5628       lod_is_zero = true;
5629       FALLTHROUGH;
5630    case nir_texop_txl:
5631       if (lod_is_zero && params.cmp != NULL && ctx->mod.minor_version < 7) {
5632          /* Prior to SM 6.7, if the level is constant 0.0, ignore the LOD argument,
5633           * so level-less DXIL instructions are used. This is needed to avoid emitting
5634           * dx.op.sampleCmpLevel, which would not be available.
5635           */
5636          sample = emit_sample_cmp_level_zero(ctx, &params);
5637       } else {
5638          if (params.cmp != NULL)
5639             sample = emit_sample_cmp_level(ctx, &params);
5640          else
5641             sample = emit_sample_level(ctx, &params);
5642       }
5643       break;
5644 
5645    case nir_texop_txd:
5646       PAD_SRC(ctx, params.dx, dx_components, float_undef);
5647       PAD_SRC(ctx, params.dy, dy_components,float_undef);
5648       if (params.cmp != NULL && ctx->mod.minor_version >= 8)
5649          sample = emit_sample_cmp_grad(ctx, &params);
5650       else
5651          sample = emit_sample_grad(ctx, &params);
5652       break;
5653 
5654    case nir_texop_txf:
5655    case nir_texop_txf_ms:
5656       if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
5657          params.coord[1] = int_undef;
5658          sample = emit_bufferload_call(ctx, params.tex, params.coord, params.overload);
5659       } else {
5660          PAD_SRC(ctx, params.coord, coord_components, int_undef);
5661          sample = emit_texel_fetch(ctx, &params);
5662       }
5663       break;
5664 
5665    case nir_texop_txs:
5666       sample = emit_texture_size(ctx, &params);
5667       break;
5668 
5669    case nir_texop_tg4:
5670       sample = emit_texture_gather(ctx, &params, instr->component);
5671       break;
5672 
5673    case nir_texop_lod:
5674       sample = emit_texture_lod(ctx, &params, true);
5675       store_def(ctx, &instr->def, 0, sample);
5676       sample = emit_texture_lod(ctx, &params, false);
5677       store_def(ctx, &instr->def, 1, sample);
5678       return true;
5679 
5680    case nir_texop_query_levels: {
5681       params.lod_or_sample = dxil_module_get_int_const(&ctx->mod, 0, 32);
5682       sample = emit_texture_size(ctx, &params);
5683       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
5684       store_def(ctx, &instr->def, 0, retval);
5685       return true;
5686    }
5687 
5688    case nir_texop_texture_samples: {
5689       params.lod_or_sample = int_undef;
5690       sample = emit_texture_size(ctx, &params);
5691       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
5692       store_def(ctx, &instr->def, 0, retval);
5693       return true;
5694    }
5695 
5696    default:
5697       fprintf(stderr, "texture op: %d\n", instr->op);
5698       unreachable("unknown texture op");
5699    }
5700 
5701    if (!sample)
5702       return false;
5703 
5704    for (unsigned i = 0; i < instr->def.num_components; ++i) {
5705       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, i);
5706       store_def(ctx, &instr->def, i, retval);
5707    }
5708 
5709    return true;
5710 }
5711 
5712 static bool
emit_undefined(struct ntd_context * ctx,nir_undef_instr * undef)5713 emit_undefined(struct ntd_context *ctx, nir_undef_instr *undef)
5714 {
5715    for (unsigned i = 0; i < undef->def.num_components; ++i)
5716       store_ssa_def(ctx, &undef->def, i, dxil_module_get_int32_const(&ctx->mod, 0));
5717    return true;
5718 }
5719 
emit_instr(struct ntd_context * ctx,struct nir_instr * instr)5720 static bool emit_instr(struct ntd_context *ctx, struct nir_instr* instr)
5721 {
5722    switch (instr->type) {
5723    case nir_instr_type_alu:
5724       return emit_alu(ctx, nir_instr_as_alu(instr));
5725    case nir_instr_type_intrinsic:
5726       return emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
5727    case nir_instr_type_load_const:
5728       return emit_load_const(ctx, nir_instr_as_load_const(instr));
5729    case nir_instr_type_deref:
5730       return emit_deref(ctx, nir_instr_as_deref(instr));
5731    case nir_instr_type_jump:
5732       return emit_jump(ctx, nir_instr_as_jump(instr));
5733    case nir_instr_type_phi:
5734       return emit_phi(ctx, nir_instr_as_phi(instr));
5735    case nir_instr_type_tex:
5736       return emit_tex(ctx, nir_instr_as_tex(instr));
5737    case nir_instr_type_undef:
5738       return emit_undefined(ctx, nir_instr_as_undef(instr));
5739    default:
5740       log_nir_instr_unsupported(ctx->logger, "Unimplemented instruction type",
5741                                 instr);
5742       return false;
5743    }
5744 }
5745 
5746 
5747 static bool
emit_block(struct ntd_context * ctx,struct nir_block * block)5748 emit_block(struct ntd_context *ctx, struct nir_block *block)
5749 {
5750    assert(block->index < ctx->mod.cur_emitting_func->num_basic_block_ids);
5751    ctx->mod.cur_emitting_func->basic_block_ids[block->index] = ctx->mod.cur_emitting_func->curr_block;
5752 
5753    nir_foreach_instr(instr, block) {
5754       TRACE_CONVERSION(instr);
5755 
5756       if (!emit_instr(ctx, instr))  {
5757          return false;
5758       }
5759    }
5760    return true;
5761 }
5762 
5763 static bool
5764 emit_cf_list(struct ntd_context *ctx, struct exec_list *list);
5765 
5766 static bool
emit_if(struct ntd_context * ctx,struct nir_if * if_stmt)5767 emit_if(struct ntd_context *ctx, struct nir_if *if_stmt)
5768 {
5769    assert(nir_src_num_components(if_stmt->condition) == 1);
5770    const struct dxil_value *cond = get_src(ctx, &if_stmt->condition, 0,
5771                                            nir_type_bool);
5772    if (!cond)
5773       return false;
5774 
5775    /* prepare blocks */
5776    nir_block *then_block = nir_if_first_then_block(if_stmt);
5777    assert(nir_if_last_then_block(if_stmt)->successors[0]);
5778    assert(!nir_if_last_then_block(if_stmt)->successors[1]);
5779    int then_succ = nir_if_last_then_block(if_stmt)->successors[0]->index;
5780 
5781    nir_block *else_block = NULL;
5782    int else_succ = -1;
5783    if (!exec_list_is_empty(&if_stmt->else_list)) {
5784       else_block = nir_if_first_else_block(if_stmt);
5785       assert(nir_if_last_else_block(if_stmt)->successors[0]);
5786       assert(!nir_if_last_else_block(if_stmt)->successors[1]);
5787       else_succ = nir_if_last_else_block(if_stmt)->successors[0]->index;
5788    }
5789 
5790    if (!emit_cond_branch(ctx, cond, then_block->index,
5791                          else_block ? else_block->index : then_succ))
5792       return false;
5793 
5794    /* handle then-block */
5795    if (!emit_cf_list(ctx, &if_stmt->then_list) ||
5796        (!nir_block_ends_in_jump(nir_if_last_then_block(if_stmt)) &&
5797         !emit_branch(ctx, then_succ)))
5798       return false;
5799 
5800    if (else_block) {
5801       /* handle else-block */
5802       if (!emit_cf_list(ctx, &if_stmt->else_list) ||
5803           (!nir_block_ends_in_jump(nir_if_last_else_block(if_stmt)) &&
5804            !emit_branch(ctx, else_succ)))
5805          return false;
5806    }
5807 
5808    return true;
5809 }
5810 
5811 static bool
emit_loop(struct ntd_context * ctx,nir_loop * loop)5812 emit_loop(struct ntd_context *ctx, nir_loop *loop)
5813 {
5814    assert(!nir_loop_has_continue_construct(loop));
5815    nir_block *first_block = nir_loop_first_block(loop);
5816    nir_block *last_block = nir_loop_last_block(loop);
5817 
5818    assert(last_block->successors[0]);
5819    assert(!last_block->successors[1]);
5820 
5821    if (!emit_branch(ctx, first_block->index))
5822       return false;
5823 
5824    if (!emit_cf_list(ctx, &loop->body))
5825       return false;
5826 
5827    /* If the loop's last block doesn't explicitly jump somewhere, then there's
5828     * an implicit continue that should take it back to the first loop block
5829     */
5830    nir_instr *last_instr = nir_block_last_instr(last_block);
5831    if ((!last_instr || last_instr->type != nir_instr_type_jump) &&
5832        !emit_branch(ctx, first_block->index))
5833       return false;
5834 
5835    return true;
5836 }
5837 
5838 static bool
emit_cf_list(struct ntd_context * ctx,struct exec_list * list)5839 emit_cf_list(struct ntd_context *ctx, struct exec_list *list)
5840 {
5841    foreach_list_typed(nir_cf_node, node, node, list) {
5842       switch (node->type) {
5843       case nir_cf_node_block:
5844          if (!emit_block(ctx, nir_cf_node_as_block(node)))
5845             return false;
5846          break;
5847 
5848       case nir_cf_node_if:
5849          if (!emit_if(ctx, nir_cf_node_as_if(node)))
5850             return false;
5851          break;
5852 
5853       case nir_cf_node_loop:
5854          if (!emit_loop(ctx, nir_cf_node_as_loop(node)))
5855             return false;
5856          break;
5857 
5858       default:
5859          unreachable("unsupported cf-list node");
5860          break;
5861       }
5862    }
5863    return true;
5864 }
5865 
5866 static void
insert_sorted_by_binding(struct exec_list * var_list,nir_variable * new_var)5867 insert_sorted_by_binding(struct exec_list *var_list, nir_variable *new_var)
5868 {
5869    nir_foreach_variable_in_list(var, var_list) {
5870       if (var->data.binding > new_var->data.binding) {
5871          exec_node_insert_node_before(&var->node, &new_var->node);
5872          return;
5873       }
5874    }
5875    exec_list_push_tail(var_list, &new_var->node);
5876 }
5877 
5878 
5879 static void
sort_uniforms_by_binding_and_remove_structs(nir_shader * s)5880 sort_uniforms_by_binding_and_remove_structs(nir_shader *s)
5881 {
5882    struct exec_list new_list;
5883    exec_list_make_empty(&new_list);
5884 
5885    nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) {
5886       exec_node_remove(&var->node);
5887       const struct glsl_type *type = glsl_without_array(var->type);
5888       if (!glsl_type_is_struct(type))
5889          insert_sorted_by_binding(&new_list, var);
5890    }
5891    exec_list_append(&s->variables, &new_list);
5892 }
5893 
5894 static bool
emit_cbvs(struct ntd_context * ctx)5895 emit_cbvs(struct ntd_context *ctx)
5896 {
5897    if (ctx->opts->environment != DXIL_ENVIRONMENT_GL) {
5898       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ubo) {
5899          if (!emit_ubo_var(ctx, var))
5900             return false;
5901       }
5902    } else {
5903       if (ctx->shader->info.num_ubos) {
5904          const unsigned ubo_size = 16384 /*4096 vec4's*/;
5905          bool has_ubo0 = !ctx->opts->no_ubo0;
5906          bool has_state_vars = ctx->opts->last_ubo_is_not_arrayed;
5907          unsigned ubo1_array_size = ctx->shader->info.num_ubos -
5908             (has_state_vars ? 2 : 1);
5909 
5910          if (has_ubo0 &&
5911              !emit_cbv(ctx, 0, 0, ubo_size, 1, "__ubo_uniforms"))
5912             return false;
5913          if (ubo1_array_size &&
5914              !emit_cbv(ctx, 1, 0, ubo_size, ubo1_array_size, "__ubos"))
5915             return false;
5916          if (has_state_vars &&
5917              !emit_cbv(ctx, ctx->shader->info.num_ubos - 1, 0, ubo_size, 1, "__ubo_state_vars"))
5918             return false;
5919       }
5920    }
5921 
5922    return true;
5923 }
5924 
5925 static bool
emit_scratch(struct ntd_context * ctx,nir_function_impl * impl)5926 emit_scratch(struct ntd_context *ctx, nir_function_impl *impl)
5927 {
5928    uint32_t index = 0;
5929    nir_foreach_function_temp_variable(var, impl)
5930       var->data.driver_location = index++;
5931 
5932    if (ctx->scratchvars)
5933       ralloc_free((void *)ctx->scratchvars);
5934 
5935    ctx->scratchvars = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
5936 
5937    nir_foreach_function_temp_variable(var, impl) {
5938       const struct dxil_type *type = get_type_for_glsl_type(&ctx->mod, var->type);
5939       const struct dxil_value *length = dxil_module_get_int32_const(&ctx->mod, 1);
5940       const struct dxil_value *ptr = dxil_emit_alloca(&ctx->mod, type, length, 16);
5941       if (!ptr)
5942          return false;
5943 
5944       ctx->scratchvars[var->data.driver_location] = ptr;
5945    }
5946 
5947    return true;
5948 }
5949 
5950 static bool
emit_function(struct ntd_context * ctx,nir_function * func,nir_function_impl * impl)5951 emit_function(struct ntd_context *ctx, nir_function *func, nir_function_impl *impl)
5952 {
5953    assert(func->num_params == 0);
5954    nir_metadata_require(impl, nir_metadata_block_index);
5955 
5956    const char *attr_keys[2] = { NULL };
5957    const char *attr_values[2] = { NULL };
5958    if (ctx->shader->info.float_controls_execution_mode &
5959        (FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32 | FLOAT_CONTROLS_DENORM_PRESERVE_FP32))
5960       attr_keys[0] = "fp32-denorm-mode";
5961    if (ctx->shader->info.float_controls_execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32)
5962       attr_values[0] = "ftz";
5963    else if (ctx->shader->info.float_controls_execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32)
5964       attr_values[0] = "preserve";
5965 
5966    const struct dxil_type *void_type = dxil_module_get_void_type(&ctx->mod);
5967    const struct dxil_type *func_type = dxil_module_add_function_type(&ctx->mod, void_type, NULL, 0);
5968    struct dxil_func_def *func_def = dxil_add_function_def(&ctx->mod, func->name, func_type, impl->num_blocks, attr_keys, attr_values);
5969    if (!func_def)
5970       return false;
5971 
5972    if (func->is_entrypoint)
5973       ctx->main_func_def = func_def;
5974    else if (func == ctx->tess_ctrl_patch_constant_func)
5975       ctx->tess_ctrl_patch_constant_func_def = func_def;
5976 
5977    ctx->defs = rzalloc_array(ctx->ralloc_ctx, struct dxil_def, impl->ssa_alloc);
5978    ctx->float_types = rzalloc_array(ctx->ralloc_ctx, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc));
5979    ctx->int_types = rzalloc_array(ctx->ralloc_ctx, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc));
5980    if (!ctx->defs || !ctx->float_types || !ctx->int_types)
5981       return false;
5982    ctx->num_defs = impl->ssa_alloc;
5983 
5984    ctx->phis = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
5985    if (!ctx->phis)
5986       return false;
5987 
5988    nir_gather_types(impl, ctx->float_types, ctx->int_types);
5989 
5990    if (!emit_scratch(ctx, impl))
5991       return false;
5992 
5993    if (!emit_static_indexing_handles(ctx))
5994       return false;
5995 
5996    if (!emit_cf_list(ctx, &impl->body))
5997       return false;
5998 
5999    hash_table_foreach(ctx->phis, entry) {
6000       if (!fixup_phi(ctx, (nir_phi_instr *)entry->key,
6001                      (struct phi_block *)entry->data))
6002          return false;
6003    }
6004 
6005    if (!dxil_emit_ret_void(&ctx->mod))
6006       return false;
6007 
6008    ralloc_free(ctx->defs);
6009    ctx->defs = NULL;
6010    _mesa_hash_table_destroy(ctx->phis, NULL);
6011    return true;
6012 }
6013 
6014 static bool
emit_module(struct ntd_context * ctx,const struct nir_to_dxil_options * opts)6015 emit_module(struct ntd_context *ctx, const struct nir_to_dxil_options *opts)
6016 {
6017    /* The validator forces us to emit resources in a specific order:
6018     * CBVs, Samplers, SRVs, UAVs. While we are at it also remove
6019     * stale struct uniforms, they are lowered but might not have been removed */
6020    sort_uniforms_by_binding_and_remove_structs(ctx->shader);
6021 
6022    /* CBVs */
6023    if (!emit_cbvs(ctx))
6024       return false;
6025 
6026    /* Samplers */
6027    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
6028       unsigned count = glsl_type_get_sampler_count(var->type);
6029       assert(count == 0 || glsl_type_is_bare_sampler(glsl_without_array(var->type)));
6030       if (count > 0 && !emit_sampler(ctx, var, count))
6031          return false;
6032    }
6033 
6034    /* SRVs */
6035    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
6036       unsigned count = glsl_type_get_texture_count(var->type);
6037       assert(count == 0 || glsl_type_is_texture(glsl_without_array(var->type)));
6038       if (count > 0 && !emit_srv(ctx, var, count))
6039          return false;
6040    }
6041 
6042    /* Handle read-only SSBOs as SRVs */
6043    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6044       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
6045          if ((var->data.access & ACCESS_NON_WRITEABLE) != 0) {
6046             unsigned count = 1;
6047             if (glsl_type_is_array(var->type))
6048                count = glsl_get_length(var->type);
6049             if (!emit_srv(ctx, var, count))
6050                return false;
6051          }
6052       }
6053    }
6054 
6055    if (!emit_shared_vars(ctx))
6056       return false;
6057    if (!emit_global_consts(ctx))
6058       return false;
6059 
6060    /* UAVs */
6061    if (ctx->shader->info.stage == MESA_SHADER_KERNEL) {
6062       if (!emit_globals(ctx, opts->num_kernel_globals))
6063          return false;
6064 
6065    } else if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6066       /* Handle read/write SSBOs as UAVs */
6067       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
6068          if ((var->data.access & ACCESS_NON_WRITEABLE) == 0) {
6069             unsigned count = 1;
6070             if (glsl_type_is_array(var->type))
6071                count = glsl_get_length(var->type);
6072             if (!emit_uav(ctx, var->data.binding, var->data.descriptor_set,
6073                         count, DXIL_COMP_TYPE_INVALID, 1,
6074                         DXIL_RESOURCE_KIND_RAW_BUFFER, var->data.access, var->name))
6075                return false;
6076 
6077          }
6078       }
6079    } else {
6080       for (unsigned i = 0; i < ctx->shader->info.num_ssbos; ++i) {
6081          char name[64];
6082          snprintf(name, sizeof(name), "__ssbo%d", i);
6083          if (!emit_uav(ctx, i, 0, 1, DXIL_COMP_TYPE_INVALID, 1,
6084                        DXIL_RESOURCE_KIND_RAW_BUFFER, 0, name))
6085             return false;
6086       }
6087       /* To work around a WARP bug, bind these descriptors a second time in descriptor
6088        * space 2. Space 0 will be used for static indexing, while space 2 will be used
6089        * for dynamic indexing. Space 0 will be individual SSBOs in the DXIL shader, while
6090        * space 2 will be a single array.
6091        */
6092       if (ctx->shader->info.num_ssbos &&
6093           !emit_uav(ctx, 0, 2, ctx->shader->info.num_ssbos, DXIL_COMP_TYPE_INVALID, 1,
6094                     DXIL_RESOURCE_KIND_RAW_BUFFER, 0, "__ssbo_dynamic"))
6095          return false;
6096    }
6097 
6098    nir_foreach_image_variable(var, ctx->shader) {
6099       if (!emit_uav_var(ctx, var, glsl_type_get_image_count(var->type)))
6100          return false;
6101    }
6102 
6103    ctx->mod.info.has_per_sample_input =
6104       BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
6105       ctx->shader->info.fs.uses_sample_shading ||
6106       ctx->shader->info.fs.uses_sample_qualifier;
6107    if (!ctx->mod.info.has_per_sample_input && ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
6108       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in | nir_var_system_value) {
6109          if (var->data.sample) {
6110             ctx->mod.info.has_per_sample_input = true;
6111             break;
6112          }
6113       }
6114    }
6115 
6116    /* From the Vulkan spec 1.3.238, section 15.8:
6117     * When Sample Shading is enabled, the x and y components of FragCoord reflect the location
6118     * of one of the samples corresponding to the shader invocation.
6119     *
6120     * In other words, if the fragment shader is executing per-sample, then the position variable
6121     * should always be per-sample,
6122     *
6123     * Also:
6124     * The Centroid interpolation decoration is ignored, but allowed, on FragCoord.
6125     */
6126    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6127       nir_variable *pos_var = nir_find_variable_with_location(ctx->shader, nir_var_shader_in, VARYING_SLOT_POS);
6128       if (pos_var) {
6129          if (ctx->mod.info.has_per_sample_input)
6130             pos_var->data.sample = true;
6131          pos_var->data.centroid = false;
6132       }
6133    }
6134 
6135    unsigned input_clip_size = ctx->mod.shader_kind == DXIL_PIXEL_SHADER ?
6136       ctx->shader->info.clip_distance_array_size : ctx->opts->input_clip_size;
6137    preprocess_signatures(&ctx->mod, ctx->shader, input_clip_size);
6138 
6139    nir_foreach_function_with_impl(func, impl, ctx->shader) {
6140       if (!emit_function(ctx, func, impl))
6141          return false;
6142    }
6143 
6144    if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
6145       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_out) {
6146          if (var->data.location == FRAG_RESULT_STENCIL) {
6147             ctx->mod.feats.stencil_ref = true;
6148          }
6149       }
6150    } else if (ctx->shader->info.stage == MESA_SHADER_VERTEX ||
6151               ctx->shader->info.stage == MESA_SHADER_TESS_EVAL) {
6152       if (ctx->shader->info.outputs_written &
6153           (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER))
6154          ctx->mod.feats.array_layer_from_vs_or_ds = true;
6155    } else if (ctx->shader->info.stage == MESA_SHADER_GEOMETRY ||
6156               ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) {
6157       if (ctx->shader->info.inputs_read &
6158           (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER))
6159          ctx->mod.feats.array_layer_from_vs_or_ds = true;
6160    }
6161 
6162    if (ctx->mod.feats.native_low_precision && ctx->mod.minor_version < 2) {
6163       ctx->logger->log(ctx->logger->priv,
6164                        "Shader uses 16bit, which requires shader model 6.2, but 6.2 is unsupported\n");
6165       return false;
6166    }
6167 
6168    return emit_metadata(ctx) &&
6169           dxil_emit_module(&ctx->mod);
6170 }
6171 
6172 static unsigned int
get_dxil_shader_kind(struct nir_shader * s)6173 get_dxil_shader_kind(struct nir_shader *s)
6174 {
6175    switch (s->info.stage) {
6176    case MESA_SHADER_VERTEX:
6177       return DXIL_VERTEX_SHADER;
6178    case MESA_SHADER_TESS_CTRL:
6179       return DXIL_HULL_SHADER;
6180    case MESA_SHADER_TESS_EVAL:
6181       return DXIL_DOMAIN_SHADER;
6182    case MESA_SHADER_GEOMETRY:
6183       return DXIL_GEOMETRY_SHADER;
6184    case MESA_SHADER_FRAGMENT:
6185       return DXIL_PIXEL_SHADER;
6186    case MESA_SHADER_KERNEL:
6187    case MESA_SHADER_COMPUTE:
6188       return DXIL_COMPUTE_SHADER;
6189    default:
6190       unreachable("unknown shader stage in nir_to_dxil");
6191       return DXIL_COMPUTE_SHADER;
6192    }
6193 }
6194 
6195 static unsigned
lower_bit_size_callback(const nir_instr * instr,void * data)6196 lower_bit_size_callback(const nir_instr* instr, void *data)
6197 {
6198    if (instr->type != nir_instr_type_alu)
6199       return 0;
6200    nir_alu_instr *alu = nir_instr_as_alu(instr);
6201 
6202    if (nir_op_infos[alu->op].is_conversion)
6203       return 0;
6204 
6205    if (nir_op_is_vec_or_mov(alu->op))
6206       return 0;
6207 
6208    unsigned num_inputs = nir_op_infos[alu->op].num_inputs;
6209    const struct nir_to_dxil_options *opts = (const struct nir_to_dxil_options*)data;
6210    unsigned min_bit_size = opts->lower_int16 ? 32 : 16;
6211 
6212    unsigned ret = 0;
6213    for (unsigned i = 0; i < num_inputs; i++) {
6214       unsigned bit_size = nir_src_bit_size(alu->src[i].src);
6215       if (bit_size != 1 && bit_size < min_bit_size)
6216          ret = min_bit_size;
6217    }
6218 
6219    return ret;
6220 }
6221 
6222 static bool
vectorize_filter(unsigned align_mul,unsigned align_offset,unsigned bit_size,unsigned num_components,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * data)6223 vectorize_filter(
6224    unsigned align_mul,
6225    unsigned align_offset,
6226    unsigned bit_size,
6227    unsigned num_components,
6228    nir_intrinsic_instr *low, nir_intrinsic_instr *high,
6229    void *data)
6230 {
6231    return util_is_power_of_two_nonzero(num_components);
6232 }
6233 
6234 struct lower_mem_bit_sizes_data {
6235    const nir_shader_compiler_options *nir_options;
6236    const struct nir_to_dxil_options *dxil_options;
6237 };
6238 
6239 static nir_mem_access_size_align
lower_mem_access_bit_sizes_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size_in,uint32_t align_mul,uint32_t align_offset,bool offset_is_const,const void * cb_data)6240 lower_mem_access_bit_sizes_cb(nir_intrinsic_op intrin,
6241                               uint8_t bytes,
6242                               uint8_t bit_size_in,
6243                               uint32_t align_mul,
6244                               uint32_t align_offset,
6245                               bool offset_is_const,
6246                               const void *cb_data)
6247 {
6248    const struct lower_mem_bit_sizes_data *data = cb_data;
6249    unsigned max_bit_size = 32;
6250    unsigned min_bit_size = data->dxil_options->lower_int16 ? 32 : 16;
6251    unsigned closest_bit_size = MAX2(min_bit_size, MIN2(max_bit_size, bit_size_in));
6252    if (intrin == nir_intrinsic_load_ubo) {
6253       /* UBO loads can be done at whatever (supported) bit size, but require 16 byte
6254        * alignment and can load up to 16 bytes per instruction. However this pass requires
6255        * loading 16 bytes of data to get 16-byte alignment. We're going to run lower_ubo_vec4
6256        * which can deal with unaligned vec4s, so for this pass let's just deal with bit size
6257        * and total size restrictions. */
6258       return (nir_mem_access_size_align) {
6259          .align = closest_bit_size / 8,
6260          .bit_size = closest_bit_size,
6261          .num_components = DIV_ROUND_UP(MIN2(bytes, 16) * 8, closest_bit_size),
6262       };
6263    }
6264 
6265    assert(intrin == nir_intrinsic_load_ssbo || intrin == nir_intrinsic_store_ssbo);
6266    uint32_t align = nir_combined_align(align_mul, align_offset);
6267    if (align < min_bit_size / 8) {
6268       /* Unaligned load/store, use the minimum bit size, up to 4 components */
6269       unsigned ideal_num_components = intrin == nir_intrinsic_load_ssbo ?
6270          DIV_ROUND_UP(bytes * 8, min_bit_size) :
6271          (32 / min_bit_size);
6272       return (nir_mem_access_size_align) {
6273          .align = min_bit_size / 8,
6274          .bit_size = min_bit_size,
6275          .num_components = MIN2(4, ideal_num_components),
6276       };
6277    }
6278 
6279    /* Increase/decrease bit size to try to get closer to the requested byte size/align */
6280    unsigned bit_size = closest_bit_size;
6281    unsigned target = MIN2(bytes, align);
6282    while (target < bit_size / 8 && bit_size > min_bit_size)
6283       bit_size /= 2;
6284    while (target > bit_size / 8 * 4 && bit_size < max_bit_size)
6285       bit_size *= 2;
6286 
6287    /* This is the best we can do */
6288    unsigned num_components = intrin == nir_intrinsic_load_ssbo ?
6289       DIV_ROUND_UP(bytes * 8, bit_size) :
6290       MAX2(1, (bytes * 8 / bit_size));
6291    return (nir_mem_access_size_align) {
6292       .align = bit_size / 8,
6293       .bit_size = bit_size,
6294       .num_components = MIN2(4, num_components),
6295    };
6296 }
6297 
6298 static void
optimize_nir(struct nir_shader * s,const struct nir_to_dxil_options * opts)6299 optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts)
6300 {
6301    bool progress;
6302    do {
6303       progress = false;
6304       NIR_PASS_V(s, nir_lower_vars_to_ssa);
6305       NIR_PASS(progress, s, nir_lower_indirect_derefs, nir_var_function_temp, 4);
6306       NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
6307       NIR_PASS(progress, s, nir_copy_prop);
6308       NIR_PASS(progress, s, nir_opt_copy_prop_vars);
6309       NIR_PASS(progress, s, nir_lower_bit_size, lower_bit_size_callback, (void*)opts);
6310       NIR_PASS(progress, s, dxil_nir_lower_8bit_conv);
6311       if (opts->lower_int16)
6312          NIR_PASS(progress, s, dxil_nir_lower_16bit_conv);
6313       NIR_PASS(progress, s, nir_opt_remove_phis);
6314       NIR_PASS(progress, s, nir_opt_dce);
6315       NIR_PASS(progress, s, nir_opt_if,
6316                nir_opt_if_optimize_phi_true_false | nir_opt_if_avoid_64bit_phis);
6317       NIR_PASS(progress, s, nir_opt_dead_cf);
6318       NIR_PASS(progress, s, nir_opt_cse);
6319       NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
6320       NIR_PASS(progress, s, nir_opt_algebraic);
6321       NIR_PASS(progress, s, dxil_nir_algebraic);
6322       if (s->options->lower_int64_options)
6323          NIR_PASS(progress, s, nir_lower_int64);
6324       NIR_PASS(progress, s, nir_lower_alu);
6325       NIR_PASS(progress, s, nir_opt_constant_folding);
6326       NIR_PASS(progress, s, nir_opt_undef);
6327       NIR_PASS(progress, s, nir_lower_undef_to_zero);
6328       NIR_PASS(progress, s, nir_opt_deref);
6329       NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16);
6330       NIR_PASS(progress, s, nir_lower_64bit_phis);
6331       NIR_PASS(progress, s, nir_lower_phis_to_scalar, true);
6332       NIR_PASS(progress, s, nir_opt_loop_unroll);
6333       NIR_PASS(progress, s, nir_lower_pack);
6334       NIR_PASS_V(s, nir_lower_system_values);
6335    } while (progress);
6336 
6337    do {
6338       progress = false;
6339       NIR_PASS(progress, s, nir_opt_algebraic_late);
6340    } while (progress);
6341 }
6342 
6343 static
dxil_fill_validation_state(struct ntd_context * ctx,struct dxil_validation_state * state)6344 void dxil_fill_validation_state(struct ntd_context *ctx,
6345                                 struct dxil_validation_state *state)
6346 {
6347    unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
6348       sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
6349    state->num_resources = ctx->resources.size / resource_element_size;
6350    state->resources.v0 = (struct dxil_resource_v0*)ctx->resources.data;
6351    if (ctx->shader->info.subgroup_size >= SUBGROUP_SIZE_REQUIRE_8) {
6352       state->state.psv1.psv0.max_expected_wave_lane_count = ctx->shader->info.subgroup_size;
6353       state->state.psv1.psv0.min_expected_wave_lane_count = ctx->shader->info.subgroup_size;
6354    } else {
6355       state->state.psv1.psv0.max_expected_wave_lane_count = UINT_MAX;
6356    }
6357    state->state.psv1.shader_stage = (uint8_t)ctx->mod.shader_kind;
6358    state->state.psv1.uses_view_id = (uint8_t)ctx->mod.feats.view_id;
6359    state->state.psv1.sig_input_elements = (uint8_t)ctx->mod.num_sig_inputs;
6360    state->state.psv1.sig_output_elements = (uint8_t)ctx->mod.num_sig_outputs;
6361    state->state.psv1.sig_patch_const_or_prim_elements = (uint8_t)ctx->mod.num_sig_patch_consts;
6362 
6363    switch (ctx->mod.shader_kind) {
6364    case DXIL_VERTEX_SHADER:
6365       state->state.psv1.psv0.vs.output_position_present = ctx->mod.info.has_out_position;
6366       break;
6367    case DXIL_PIXEL_SHADER:
6368       /* TODO: handle depth outputs */
6369       state->state.psv1.psv0.ps.depth_output = ctx->mod.info.has_out_depth;
6370       state->state.psv1.psv0.ps.sample_frequency =
6371          ctx->mod.info.has_per_sample_input;
6372       break;
6373    case DXIL_COMPUTE_SHADER:
6374       state->state.num_threads_x = MAX2(ctx->shader->info.workgroup_size[0], 1);
6375       state->state.num_threads_y = MAX2(ctx->shader->info.workgroup_size[1], 1);
6376       state->state.num_threads_z = MAX2(ctx->shader->info.workgroup_size[2], 1);
6377       break;
6378    case DXIL_GEOMETRY_SHADER:
6379       state->state.psv1.max_vertex_count = ctx->shader->info.gs.vertices_out;
6380       state->state.psv1.psv0.gs.input_primitive = dxil_get_input_primitive(ctx->shader->info.gs.input_primitive);
6381       state->state.psv1.psv0.gs.output_toplology = dxil_get_primitive_topology(ctx->shader->info.gs.output_primitive);
6382       state->state.psv1.psv0.gs.output_stream_mask = MAX2(ctx->shader->info.gs.active_stream_mask, 1);
6383       state->state.psv1.psv0.gs.output_position_present = ctx->mod.info.has_out_position;
6384       break;
6385    case DXIL_HULL_SHADER:
6386       state->state.psv1.psv0.hs.input_control_point_count = ctx->tess_input_control_point_count;
6387       state->state.psv1.psv0.hs.output_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
6388       state->state.psv1.psv0.hs.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
6389       state->state.psv1.psv0.hs.tessellator_output_primitive = get_tessellator_output_primitive(&ctx->shader->info);
6390       state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
6391       break;
6392    case DXIL_DOMAIN_SHADER:
6393       state->state.psv1.psv0.ds.input_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
6394       state->state.psv1.psv0.ds.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
6395       state->state.psv1.psv0.ds.output_position_present = ctx->mod.info.has_out_position;
6396       state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
6397       break;
6398    default:
6399       assert(0 && "Shader type not (yet) supported");
6400    }
6401 }
6402 
6403 static nir_variable *
add_sysvalue(struct ntd_context * ctx,uint8_t value,char * name,int driver_location)6404 add_sysvalue(struct ntd_context *ctx,
6405               uint8_t value, char *name,
6406               int driver_location)
6407 {
6408 
6409    nir_variable *var = rzalloc(ctx->shader, nir_variable);
6410    if (!var)
6411       return NULL;
6412    var->data.driver_location = driver_location;
6413    var->data.location = value;
6414    var->type = glsl_uint_type();
6415    var->name = name;
6416    var->data.mode = nir_var_system_value;
6417    var->data.interpolation = INTERP_MODE_FLAT;
6418    return var;
6419 }
6420 
6421 static bool
append_input_or_sysvalue(struct ntd_context * ctx,int input_loc,int sv_slot,char * name,int driver_location)6422 append_input_or_sysvalue(struct ntd_context *ctx,
6423                          int input_loc,  int sv_slot,
6424                          char *name, int driver_location)
6425 {
6426    if (input_loc >= 0) {
6427       /* Check inputs whether a variable is available the corresponds
6428        * to the sysvalue */
6429       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
6430          if (var->data.location == input_loc) {
6431             ctx->system_value[sv_slot] = var;
6432             return true;
6433          }
6434       }
6435    }
6436 
6437    ctx->system_value[sv_slot] = add_sysvalue(ctx, sv_slot, name, driver_location);
6438    if (!ctx->system_value[sv_slot])
6439       return false;
6440 
6441    nir_shader_add_variable(ctx->shader, ctx->system_value[sv_slot]);
6442    return true;
6443 }
6444 
6445 struct sysvalue_name {
6446    gl_system_value value;
6447    int slot;
6448    char *name;
6449    gl_shader_stage only_in_shader;
6450 } possible_sysvalues[] = {
6451    {SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, -1, "SV_VertexID", MESA_SHADER_NONE},
6452    {SYSTEM_VALUE_INSTANCE_ID, -1, "SV_InstanceID", MESA_SHADER_NONE},
6453    {SYSTEM_VALUE_FRONT_FACE, VARYING_SLOT_FACE, "SV_IsFrontFace", MESA_SHADER_NONE},
6454    {SYSTEM_VALUE_PRIMITIVE_ID, VARYING_SLOT_PRIMITIVE_ID, "SV_PrimitiveID", MESA_SHADER_GEOMETRY},
6455    {SYSTEM_VALUE_SAMPLE_ID, -1, "SV_SampleIndex", MESA_SHADER_NONE},
6456 };
6457 
6458 static bool
allocate_sysvalues(struct ntd_context * ctx)6459 allocate_sysvalues(struct ntd_context *ctx)
6460 {
6461    unsigned driver_location = 0;
6462    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in)
6463       driver_location = MAX2(driver_location, var->data.driver_location + 1);
6464    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_system_value)
6465       driver_location = MAX2(driver_location, var->data.driver_location + 1);
6466 
6467    if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
6468        !BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID)) {
6469       bool need_sample_id = ctx->shader->info.fs.uses_sample_shading;
6470 
6471       /* "var->data.sample = true" sometimes just mean, "I want per-sample
6472        * shading", which explains why we can end up with vars having flat
6473        * interpolation with the per-sample bit set. If there's only such
6474        * type of variables, we need to tell DXIL that we read SV_SampleIndex
6475        * to make DXIL validation happy.
6476        */
6477       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
6478          bool var_can_be_sample_rate = !var->data.centroid && var->data.interpolation != INTERP_MODE_FLAT;
6479          /* If there's an input that will actually force sample-rate shading, then we don't
6480           * need SV_SampleIndex. */
6481          if (var->data.sample && var_can_be_sample_rate) {
6482             need_sample_id = false;
6483             break;
6484          }
6485          /* If there's an input that wants to be sample-rate, but can't be, then we might
6486           * need SV_SampleIndex. */
6487          if (var->data.sample && !var_can_be_sample_rate)
6488             need_sample_id = true;
6489       }
6490 
6491       if (need_sample_id)
6492          BITSET_SET(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
6493    }
6494 
6495    for (unsigned i = 0; i < ARRAY_SIZE(possible_sysvalues); ++i) {
6496       struct sysvalue_name *info = &possible_sysvalues[i];
6497       if (info->only_in_shader != MESA_SHADER_NONE &&
6498           info->only_in_shader != ctx->shader->info.stage)
6499          continue;
6500       if (BITSET_TEST(ctx->shader->info.system_values_read, info->value)) {
6501          if (!append_input_or_sysvalue(ctx, info->slot,
6502                                        info->value, info->name,
6503                                        driver_location++))
6504             return false;
6505       }
6506    }
6507    return true;
6508 }
6509 
6510 static int
type_size_vec4(const struct glsl_type * type,bool bindless)6511 type_size_vec4(const struct glsl_type *type, bool bindless)
6512 {
6513    return glsl_count_attribute_slots(type, false);
6514 }
6515 
6516 static const unsigned dxil_validator_min_capable_version = DXIL_VALIDATOR_1_4;
6517 static const unsigned dxil_validator_max_capable_version = DXIL_VALIDATOR_1_8;
6518 static const unsigned dxil_min_shader_model = SHADER_MODEL_6_0;
6519 static const unsigned dxil_max_shader_model = SHADER_MODEL_6_8;
6520 
6521 bool
nir_to_dxil(struct nir_shader * s,const struct nir_to_dxil_options * opts,const struct dxil_logger * logger,struct blob * blob)6522 nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts,
6523             const struct dxil_logger *logger, struct blob *blob)
6524 {
6525    assert(opts);
6526    bool retval = true;
6527    debug_dxil = (int)debug_get_option_debug_dxil();
6528    blob_init(blob);
6529 
6530    if (opts->shader_model_max < dxil_min_shader_model) {
6531       debug_printf("D3D12: cannot support emitting shader models lower than %d.%d\n",
6532                    dxil_min_shader_model >> 16,
6533                    dxil_min_shader_model & 0xffff);
6534       return false;
6535    }
6536 
6537    if (opts->shader_model_max > dxil_max_shader_model) {
6538       debug_printf("D3D12: cannot support emitting higher than shader model %d.%d\n",
6539                    dxil_max_shader_model >> 16,
6540                    dxil_max_shader_model & 0xffff);
6541       return false;
6542    }
6543 
6544    if (opts->validator_version_max != NO_DXIL_VALIDATION &&
6545        opts->validator_version_max < dxil_validator_min_capable_version) {
6546       debug_printf("D3D12: Invalid validator version %d.%d, must be 1.4 or greater\n",
6547          opts->validator_version_max >> 16,
6548          opts->validator_version_max & 0xffff);
6549       return false;
6550    }
6551 
6552    /* If no validation, write a blob as if it was going to be validated by the newest understood validator.
6553     * Same if the validator is newer than we know how to write for.
6554     */
6555    uint32_t validator_version =
6556       opts->validator_version_max == NO_DXIL_VALIDATION ||
6557       opts->validator_version_max > dxil_validator_max_capable_version ?
6558       dxil_validator_max_capable_version : opts->validator_version_max;
6559 
6560    struct ntd_context *ctx = calloc(1, sizeof(*ctx));
6561    if (!ctx)
6562       return false;
6563 
6564    ctx->opts = opts;
6565    ctx->shader = s;
6566    ctx->logger = logger ? logger : &default_logger;
6567 
6568    ctx->ralloc_ctx = ralloc_context(NULL);
6569    if (!ctx->ralloc_ctx) {
6570       retval = false;
6571       goto out;
6572    }
6573 
6574    util_dynarray_init(&ctx->srv_metadata_nodes, ctx->ralloc_ctx);
6575    util_dynarray_init(&ctx->uav_metadata_nodes, ctx->ralloc_ctx);
6576    util_dynarray_init(&ctx->cbv_metadata_nodes, ctx->ralloc_ctx);
6577    util_dynarray_init(&ctx->sampler_metadata_nodes, ctx->ralloc_ctx);
6578    util_dynarray_init(&ctx->resources, ctx->ralloc_ctx);
6579    dxil_module_init(&ctx->mod, ctx->ralloc_ctx);
6580    ctx->mod.shader_kind = get_dxil_shader_kind(s);
6581    ctx->mod.major_version = 6;
6582    /* Use the highest shader model that's supported and can be validated */
6583    ctx->mod.minor_version =
6584       MIN2(opts->shader_model_max & 0xffff, validator_version & 0xffff);
6585    ctx->mod.major_validator = validator_version >> 16;
6586    ctx->mod.minor_validator = validator_version & 0xffff;
6587 
6588    if (s->info.stage <= MESA_SHADER_FRAGMENT) {
6589       uint64_t in_mask =
6590          s->info.stage == MESA_SHADER_VERTEX ?
6591          0 : (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
6592       uint64_t out_mask =
6593          s->info.stage == MESA_SHADER_FRAGMENT ?
6594          ((1ull << FRAG_RESULT_STENCIL) | (1ull << FRAG_RESULT_SAMPLE_MASK)) :
6595          (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
6596 
6597       NIR_PASS_V(s, dxil_nir_fix_io_uint_type, in_mask, out_mask);
6598    }
6599 
6600    NIR_PASS_V(s, dxil_nir_lower_fquantize2f16);
6601    NIR_PASS_V(s, nir_lower_frexp);
6602    NIR_PASS_V(s, nir_lower_flrp, 16 | 32 | 64, true);
6603    NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32);
6604    NIR_PASS_V(s, dxil_nir_ensure_position_writes);
6605    NIR_PASS_V(s, dxil_nir_lower_system_values);
6606    NIR_PASS_V(s, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_system_value | nir_var_shader_out, NULL, NULL);
6607 
6608    /* Do a round of optimization to try to vectorize loads/stores. Otherwise the addresses used for loads
6609     * might be too opaque for the pass to see that they're next to each other. */
6610    optimize_nir(s, opts);
6611 
6612    /* Vectorize UBO/SSBO accesses aggressively. This can help increase alignment to enable us to do better
6613     * chunking of loads and stores after lowering bit sizes. Ignore load/store size limitations here, we'll
6614     * address them with lower_mem_access_bit_sizes */
6615    nir_load_store_vectorize_options vectorize_opts = {
6616       .callback = vectorize_filter,
6617       .modes = nir_var_mem_ubo | nir_var_mem_ssbo,
6618    };
6619    NIR_PASS_V(s, nir_opt_load_store_vectorize, &vectorize_opts);
6620 
6621    /* Now that they're bloated to the max, address bit size restrictions and overall size limitations for
6622     * a single load/store op. */
6623    struct lower_mem_bit_sizes_data mem_size_data = { s->options, opts };
6624    nir_lower_mem_access_bit_sizes_options mem_size_options = {
6625       .modes = nir_var_mem_ubo | nir_var_mem_ssbo,
6626       .callback = lower_mem_access_bit_sizes_cb,
6627       .may_lower_unaligned_stores_to_atomics = true,
6628       .cb_data = &mem_size_data
6629    };
6630    NIR_PASS_V(s, nir_lower_mem_access_bit_sizes, &mem_size_options);
6631 
6632    /* Lastly, conver byte-address UBO loads to vec-addressed. This pass can also deal with selecting sub-
6633     * components from the load and dealing with vec-straddling loads. */
6634    NIR_PASS_V(s, nir_lower_ubo_vec4);
6635 
6636    if (opts->shader_model_max < SHADER_MODEL_6_6) {
6637       /* In a later pass, load_helper_invocation will be lowered to sample mask based fallback,
6638        * so both load- and is- will be emulated eventually.
6639        */
6640       NIR_PASS_V(s, nir_lower_is_helper_invocation);
6641    }
6642 
6643    if (ctx->mod.shader_kind == DXIL_HULL_SHADER)
6644       NIR_PASS_V(s, dxil_nir_split_tess_ctrl, &ctx->tess_ctrl_patch_constant_func);
6645 
6646    if (ctx->mod.shader_kind == DXIL_HULL_SHADER ||
6647        ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
6648       /* Make sure any derefs are gone after lower_io before updating tess level vars */
6649       NIR_PASS_V(s, nir_opt_dce);
6650       NIR_PASS_V(s, dxil_nir_fixup_tess_level_for_domain);
6651    }
6652 
6653    optimize_nir(s, opts);
6654 
6655    NIR_PASS_V(s, nir_remove_dead_variables,
6656               nir_var_function_temp | nir_var_mem_constant | nir_var_mem_shared, NULL);
6657 
6658    if (!allocate_sysvalues(ctx))
6659       return false;
6660 
6661    NIR_PASS_V(s, dxil_nir_lower_sysval_to_load_input, ctx->system_value);
6662    NIR_PASS_V(s, nir_opt_dce);
6663 
6664    /* This needs to be after any copy prop is done to prevent these movs from being erased */
6665    NIR_PASS_V(s, dxil_nir_move_consts);
6666    NIR_PASS_V(s, nir_opt_dce);
6667 
6668    NIR_PASS_V(s, dxil_nir_guess_image_formats);
6669 
6670    if (debug_dxil & DXIL_DEBUG_VERBOSE)
6671       nir_print_shader(s, stderr);
6672 
6673    if (!emit_module(ctx, opts)) {
6674       debug_printf("D3D12: dxil_container_add_module failed\n");
6675       retval = false;
6676       goto out;
6677    }
6678 
6679    if (debug_dxil & DXIL_DEBUG_DUMP_MODULE) {
6680       struct dxil_dumper *dumper = dxil_dump_create();
6681       dxil_dump_module(dumper, &ctx->mod);
6682       fprintf(stderr, "\n");
6683       dxil_dump_buf_to_file(dumper, stderr);
6684       fprintf(stderr, "\n\n");
6685       dxil_dump_free(dumper);
6686    }
6687 
6688    struct dxil_container container;
6689    dxil_container_init(&container);
6690    /* Native low precision disables min-precision */
6691    if (ctx->mod.feats.native_low_precision)
6692       ctx->mod.feats.min_precision = false;
6693    if (!dxil_container_add_features(&container, &ctx->mod.feats)) {
6694       debug_printf("D3D12: dxil_container_add_features failed\n");
6695       retval = false;
6696       goto out;
6697    }
6698 
6699    if (!dxil_container_add_io_signature(&container,
6700                                         DXIL_ISG1,
6701                                         ctx->mod.num_sig_inputs,
6702                                         ctx->mod.inputs,
6703                                         ctx->mod.minor_validator >= 7)) {
6704       debug_printf("D3D12: failed to write input signature\n");
6705       retval = false;
6706       goto out;
6707    }
6708 
6709    if (!dxil_container_add_io_signature(&container,
6710                                         DXIL_OSG1,
6711                                         ctx->mod.num_sig_outputs,
6712                                         ctx->mod.outputs,
6713                                         ctx->mod.minor_validator >= 7)) {
6714       debug_printf("D3D12: failed to write output signature\n");
6715       retval = false;
6716       goto out;
6717    }
6718 
6719    if ((ctx->mod.shader_kind == DXIL_HULL_SHADER ||
6720         ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) &&
6721        !dxil_container_add_io_signature(&container,
6722                                         DXIL_PSG1,
6723                                         ctx->mod.num_sig_patch_consts,
6724                                         ctx->mod.patch_consts,
6725                                         ctx->mod.minor_validator >= 7)) {
6726       debug_printf("D3D12: failed to write patch constant signature\n");
6727       retval = false;
6728       goto out;
6729    }
6730 
6731    struct dxil_validation_state validation_state;
6732    memset(&validation_state, 0, sizeof(validation_state));
6733    dxil_fill_validation_state(ctx, &validation_state);
6734 
6735    if (!dxil_container_add_state_validation(&container,&ctx->mod,
6736                                             &validation_state)) {
6737       debug_printf("D3D12: failed to write state-validation\n");
6738       retval = false;
6739       goto out;
6740    }
6741 
6742    if (!dxil_container_add_module(&container, &ctx->mod)) {
6743       debug_printf("D3D12: failed to write module\n");
6744       retval = false;
6745       goto out;
6746    }
6747 
6748    if (!dxil_container_write(&container, blob)) {
6749       debug_printf("D3D12: dxil_container_write failed\n");
6750       retval = false;
6751       goto out;
6752    }
6753    dxil_container_finish(&container);
6754 
6755    if (debug_dxil & DXIL_DEBUG_DUMP_BLOB) {
6756       static int shader_id = 0;
6757       char buffer[64];
6758       snprintf(buffer, sizeof(buffer), "shader_%s_%d.blob",
6759                get_shader_kind_str(ctx->mod.shader_kind), shader_id++);
6760       debug_printf("Try to write blob to %s\n", buffer);
6761       FILE *f = fopen(buffer, "wb");
6762       if (f) {
6763          fwrite(blob->data, 1, blob->size, f);
6764          fclose(f);
6765       }
6766    }
6767 
6768 out:
6769    dxil_module_release(&ctx->mod);
6770    ralloc_free(ctx->ralloc_ctx);
6771    free(ctx);
6772    return retval;
6773 }
6774