• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir_to_dxil.h"
25 
26 #include "dxil_container.h"
27 #include "dxil_dump.h"
28 #include "dxil_enums.h"
29 #include "dxil_function.h"
30 #include "dxil_module.h"
31 #include "dxil_nir.h"
32 #include "dxil_signature.h"
33 
34 #include "nir/nir_builder.h"
35 #include "nir_deref.h"
36 #include "util/ralloc.h"
37 #include "util/u_debug.h"
38 #include "util/u_dynarray.h"
39 #include "util/u_math.h"
40 
41 #include "git_sha1.h"
42 
43 #include "vulkan/vulkan_core.h"
44 
45 #include <stdint.h>
46 
47 int debug_dxil = 0;
48 
49 static const struct debug_named_value
50 dxil_debug_options[] = {
51    { "verbose", DXIL_DEBUG_VERBOSE, NULL },
52    { "dump_blob",  DXIL_DEBUG_DUMP_BLOB , "Write shader blobs" },
53    { "trace",  DXIL_DEBUG_TRACE , "Trace instruction conversion" },
54    { "dump_module", DXIL_DEBUG_DUMP_MODULE, "dump module tree to stderr"},
55    DEBUG_NAMED_VALUE_END
56 };
57 
58 DEBUG_GET_ONCE_FLAGS_OPTION(debug_dxil, "DXIL_DEBUG", dxil_debug_options, 0)
59 
60 static void
log_nir_instr_unsupported(const struct dxil_logger * logger,const char * message_prefix,const nir_instr * instr)61 log_nir_instr_unsupported(const struct dxil_logger *logger,
62                           const char *message_prefix, const nir_instr *instr)
63 {
64    char *msg = NULL;
65    char *instr_str = nir_instr_as_str(instr, NULL);
66    asprintf(&msg, "%s: %s\n", message_prefix, instr_str);
67    ralloc_free(instr_str);
68    assert(msg);
69    logger->log(logger->priv, msg);
70    free(msg);
71 }
72 
73 static void
default_logger_func(void * priv,const char * msg)74 default_logger_func(void *priv, const char *msg)
75 {
76    fprintf(stderr, "%s", msg);
77    unreachable("Unhandled error");
78 }
79 
80 static const struct dxil_logger default_logger = { .priv = NULL, .log = default_logger_func };
81 
82 #define TRACE_CONVERSION(instr) \
83    if (debug_dxil & DXIL_DEBUG_TRACE) \
84       do { \
85          fprintf(stderr, "Convert '"); \
86          nir_print_instr(instr, stderr); \
87          fprintf(stderr, "'\n"); \
88       } while (0)
89 
90 static const nir_shader_compiler_options
91 nir_options = {
92    .compact_arrays = true,
93    .lower_ineg = true,
94    .lower_fneg = true,
95    .lower_ffma16 = true,
96    .lower_ffma32 = true,
97    .lower_isign = true,
98    .lower_fsign = true,
99    .lower_iabs = true,
100    .lower_fmod = true,
101    .lower_fpow = true,
102    .lower_scmp = true,
103    .lower_ldexp = true,
104    .lower_flrp16 = true,
105    .lower_flrp32 = true,
106    .lower_flrp64 = true,
107    .lower_bitfield_extract = true,
108    .lower_ifind_msb = true,
109    .lower_ufind_msb = true,
110    .lower_extract_word = true,
111    .lower_extract_byte = true,
112    .lower_insert_word = true,
113    .lower_insert_byte = true,
114    .lower_hadd = true,
115    .lower_uadd_sat = true,
116    .lower_usub_sat = true,
117    .lower_iadd_sat = true,
118    .lower_uadd_carry = true,
119    .lower_usub_borrow = true,
120    .lower_mul_high = true,
121    .lower_pack_half_2x16 = true,
122    .lower_pack_unorm_4x8 = true,
123    .lower_pack_snorm_4x8 = true,
124    .lower_pack_snorm_2x16 = true,
125    .lower_pack_unorm_2x16 = true,
126    .lower_pack_64_2x32_split = true,
127    .lower_pack_32_2x16_split = true,
128    .lower_pack_64_4x16 = true,
129    .lower_unpack_64_2x32_split = true,
130    .lower_unpack_32_2x16_split = true,
131    .lower_unpack_half_2x16 = true,
132    .lower_unpack_snorm_2x16 = true,
133    .lower_unpack_snorm_4x8 = true,
134    .lower_unpack_unorm_2x16 = true,
135    .lower_unpack_unorm_4x8 = true,
136    .lower_interpolate_at = true,
137    .has_fsub = true,
138    .has_isub = true,
139    .has_bfe = true,
140    .has_find_msb_rev = true,
141    .vertex_id_zero_based = true,
142    .lower_base_vertex = true,
143    .lower_helper_invocation = true,
144    .has_cs_global_id = true,
145    .lower_mul_2x32_64 = true,
146    .lower_doubles_options =
147       nir_lower_drcp |
148       nir_lower_dsqrt |
149       nir_lower_drsq |
150       nir_lower_dfract |
151       nir_lower_dtrunc |
152       nir_lower_dfloor |
153       nir_lower_dceil |
154       nir_lower_dround_even,
155    .lower_uniforms_to_ubo = true,
156    .max_unroll_iterations = 32, /* arbitrary */
157    .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out),
158    .lower_device_index_to_zero = true,
159    .support_16bit_alu = true,
160    .preserve_mediump = true,
161    .discard_is_demote = true,
162    .scalarize_ddx = true,
163    .io_options = nir_io_dont_use_pos_for_non_fs_varyings | nir_io_mediump_is_32bit,
164 };
165 
166 const nir_shader_compiler_options*
dxil_get_base_nir_compiler_options(void)167 dxil_get_base_nir_compiler_options(void)
168 {
169    return &nir_options;
170 }
171 
172 void
dxil_get_nir_compiler_options(nir_shader_compiler_options * options,enum dxil_shader_model shader_model_max,unsigned supported_int_sizes,unsigned supported_float_sizes)173 dxil_get_nir_compiler_options(nir_shader_compiler_options *options,
174                               enum dxil_shader_model shader_model_max,
175                               unsigned supported_int_sizes,
176                               unsigned supported_float_sizes)
177 {
178    *options = nir_options;
179    if (!(supported_int_sizes & 64)) {
180       options->lower_pack_64_2x32_split = false;
181       options->lower_unpack_64_2x32_split = false;
182       options->lower_int64_options = ~0;
183    }
184    if (!(supported_float_sizes & 64))
185       options->lower_doubles_options = ~0;
186    if (shader_model_max >= SHADER_MODEL_6_4) {
187       options->has_sdot_4x8 = true;
188       options->has_udot_4x8 = true;
189    }
190 }
191 
192 static bool
emit_llvm_ident(struct dxil_module * m)193 emit_llvm_ident(struct dxil_module *m)
194 {
195    const struct dxil_mdnode *compiler = dxil_get_metadata_string(m, "Mesa version " PACKAGE_VERSION MESA_GIT_SHA1);
196    if (!compiler)
197       return false;
198 
199    const struct dxil_mdnode *llvm_ident = dxil_get_metadata_node(m, &compiler, 1);
200    return llvm_ident &&
201           dxil_add_metadata_named_node(m, "llvm.ident", &llvm_ident, 1);
202 }
203 
204 static bool
emit_named_version(struct dxil_module * m,const char * name,int major,int minor)205 emit_named_version(struct dxil_module *m, const char *name,
206                    int major, int minor)
207 {
208    const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, major);
209    const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, minor);
210    const struct dxil_mdnode *version_nodes[] = { major_node, minor_node };
211    const struct dxil_mdnode *version = dxil_get_metadata_node(m, version_nodes,
212                                                      ARRAY_SIZE(version_nodes));
213    return dxil_add_metadata_named_node(m, name, &version, 1);
214 }
215 
216 static const char *
get_shader_kind_str(enum dxil_shader_kind kind)217 get_shader_kind_str(enum dxil_shader_kind kind)
218 {
219    switch (kind) {
220    case DXIL_PIXEL_SHADER:
221       return "ps";
222    case DXIL_VERTEX_SHADER:
223       return "vs";
224    case DXIL_GEOMETRY_SHADER:
225       return "gs";
226    case DXIL_HULL_SHADER:
227       return "hs";
228    case DXIL_DOMAIN_SHADER:
229       return "ds";
230    case DXIL_COMPUTE_SHADER:
231       return "cs";
232    default:
233       unreachable("invalid shader kind");
234    }
235 }
236 
237 static bool
emit_dx_shader_model(struct dxil_module * m)238 emit_dx_shader_model(struct dxil_module *m)
239 {
240    const struct dxil_mdnode *type_node = dxil_get_metadata_string(m, get_shader_kind_str(m->shader_kind));
241    const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, m->major_version);
242    const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, m->minor_version);
243    const struct dxil_mdnode *shader_model[] = { type_node, major_node,
244                                                 minor_node };
245    const struct dxil_mdnode *dx_shader_model = dxil_get_metadata_node(m, shader_model, ARRAY_SIZE(shader_model));
246 
247    return dxil_add_metadata_named_node(m, "dx.shaderModel",
248                                        &dx_shader_model, 1);
249 }
250 
251 enum {
252    DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG = 0,
253    DXIL_STRUCTURED_BUFFER_ELEMENT_STRIDE_TAG = 1
254 };
255 
256 enum dxil_intr {
257    DXIL_INTR_LOAD_INPUT = 4,
258    DXIL_INTR_STORE_OUTPUT = 5,
259    DXIL_INTR_FABS = 6,
260    DXIL_INTR_SATURATE = 7,
261 
262    DXIL_INTR_ISFINITE = 10,
263    DXIL_INTR_ISNORMAL = 11,
264 
265    DXIL_INTR_FCOS = 12,
266    DXIL_INTR_FSIN = 13,
267 
268    DXIL_INTR_FEXP2 = 21,
269    DXIL_INTR_FRC = 22,
270    DXIL_INTR_FLOG2 = 23,
271 
272    DXIL_INTR_SQRT = 24,
273    DXIL_INTR_RSQRT = 25,
274    DXIL_INTR_ROUND_NE = 26,
275    DXIL_INTR_ROUND_NI = 27,
276    DXIL_INTR_ROUND_PI = 28,
277    DXIL_INTR_ROUND_Z = 29,
278 
279    DXIL_INTR_BFREV = 30,
280    DXIL_INTR_COUNTBITS = 31,
281    DXIL_INTR_FIRSTBIT_LO = 32,
282    DXIL_INTR_FIRSTBIT_HI = 33,
283    DXIL_INTR_FIRSTBIT_SHI = 34,
284 
285    DXIL_INTR_FMAX = 35,
286    DXIL_INTR_FMIN = 36,
287    DXIL_INTR_IMAX = 37,
288    DXIL_INTR_IMIN = 38,
289    DXIL_INTR_UMAX = 39,
290    DXIL_INTR_UMIN = 40,
291 
292    DXIL_INTR_FMA = 47,
293 
294    DXIL_INTR_IBFE = 51,
295    DXIL_INTR_UBFE = 52,
296    DXIL_INTR_BFI = 53,
297 
298    DXIL_INTR_CREATE_HANDLE = 57,
299    DXIL_INTR_CBUFFER_LOAD_LEGACY = 59,
300 
301    DXIL_INTR_SAMPLE = 60,
302    DXIL_INTR_SAMPLE_BIAS = 61,
303    DXIL_INTR_SAMPLE_LEVEL = 62,
304    DXIL_INTR_SAMPLE_GRAD = 63,
305    DXIL_INTR_SAMPLE_CMP = 64,
306    DXIL_INTR_SAMPLE_CMP_LVL_ZERO = 65,
307 
308    DXIL_INTR_TEXTURE_LOAD = 66,
309    DXIL_INTR_TEXTURE_STORE = 67,
310 
311    DXIL_INTR_BUFFER_LOAD = 68,
312    DXIL_INTR_BUFFER_STORE = 69,
313 
314    DXIL_INTR_TEXTURE_SIZE = 72,
315    DXIL_INTR_TEXTURE_GATHER = 73,
316    DXIL_INTR_TEXTURE_GATHER_CMP = 74,
317 
318    DXIL_INTR_TEXTURE2DMS_GET_SAMPLE_POSITION = 75,
319    DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION = 76,
320    DXIL_INTR_RENDER_TARGET_GET_SAMPLE_COUNT = 77,
321 
322    DXIL_INTR_ATOMIC_BINOP = 78,
323    DXIL_INTR_ATOMIC_CMPXCHG = 79,
324    DXIL_INTR_BARRIER = 80,
325    DXIL_INTR_TEXTURE_LOD = 81,
326 
327    DXIL_INTR_DISCARD = 82,
328    DXIL_INTR_DDX_COARSE = 83,
329    DXIL_INTR_DDY_COARSE = 84,
330    DXIL_INTR_DDX_FINE = 85,
331    DXIL_INTR_DDY_FINE = 86,
332 
333    DXIL_INTR_EVAL_SNAPPED = 87,
334    DXIL_INTR_EVAL_SAMPLE_INDEX = 88,
335    DXIL_INTR_EVAL_CENTROID = 89,
336 
337    DXIL_INTR_SAMPLE_INDEX = 90,
338    DXIL_INTR_COVERAGE = 91,
339 
340    DXIL_INTR_THREAD_ID = 93,
341    DXIL_INTR_GROUP_ID = 94,
342    DXIL_INTR_THREAD_ID_IN_GROUP = 95,
343    DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP = 96,
344 
345    DXIL_INTR_EMIT_STREAM = 97,
346    DXIL_INTR_CUT_STREAM = 98,
347 
348    DXIL_INTR_GS_INSTANCE_ID = 100,
349 
350    DXIL_INTR_MAKE_DOUBLE = 101,
351    DXIL_INTR_SPLIT_DOUBLE = 102,
352 
353    DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT = 103,
354    DXIL_INTR_LOAD_PATCH_CONSTANT = 104,
355    DXIL_INTR_DOMAIN_LOCATION = 105,
356    DXIL_INTR_STORE_PATCH_CONSTANT = 106,
357    DXIL_INTR_OUTPUT_CONTROL_POINT_ID = 107,
358    DXIL_INTR_PRIMITIVE_ID = 108,
359 
360    DXIL_INTR_WAVE_IS_FIRST_LANE = 110,
361    DXIL_INTR_WAVE_GET_LANE_INDEX = 111,
362    DXIL_INTR_WAVE_GET_LANE_COUNT = 112,
363    DXIL_INTR_WAVE_ANY_TRUE = 113,
364    DXIL_INTR_WAVE_ALL_TRUE = 114,
365    DXIL_INTR_WAVE_ACTIVE_ALL_EQUAL = 115,
366    DXIL_INTR_WAVE_ACTIVE_BALLOT = 116,
367    DXIL_INTR_WAVE_READ_LANE_AT = 117,
368    DXIL_INTR_WAVE_READ_LANE_FIRST = 118,
369    DXIL_INTR_WAVE_ACTIVE_OP = 119,
370    DXIL_INTR_WAVE_ACTIVE_BIT = 120,
371    DXIL_INTR_WAVE_PREFIX_OP = 121,
372    DXIL_INTR_QUAD_READ_LANE_AT = 122,
373    DXIL_INTR_QUAD_OP = 123,
374 
375    DXIL_INTR_LEGACY_F32TOF16 = 130,
376    DXIL_INTR_LEGACY_F16TOF32 = 131,
377 
378    DXIL_INTR_ATTRIBUTE_AT_VERTEX = 137,
379    DXIL_INTR_VIEW_ID = 138,
380 
381    DXIL_INTR_RAW_BUFFER_LOAD = 139,
382    DXIL_INTR_RAW_BUFFER_STORE = 140,
383 
384    DXIL_INTR_DOT4_ADD_I8_PACKED = 163,
385    DXIL_INTR_DOT4_ADD_U8_PACKED = 164,
386 
387    DXIL_INTR_ANNOTATE_HANDLE = 216,
388    DXIL_INTR_CREATE_HANDLE_FROM_BINDING = 217,
389    DXIL_INTR_CREATE_HANDLE_FROM_HEAP = 218,
390 
391    DXIL_INTR_IS_HELPER_LANE = 221,
392    DXIL_INTR_SAMPLE_CMP_LEVEL = 224,
393    DXIL_INTR_SAMPLE_CMP_GRAD = 254,
394    DXIL_INTR_SAMPLE_CMP_BIAS = 255,
395 
396    DXIL_INTR_START_VERTEX_LOCATION = 256,
397    DXIL_INTR_START_INSTANCE_LOCATION = 257,
398 };
399 
400 enum dxil_atomic_op {
401    DXIL_ATOMIC_ADD = 0,
402    DXIL_ATOMIC_AND = 1,
403    DXIL_ATOMIC_OR = 2,
404    DXIL_ATOMIC_XOR = 3,
405    DXIL_ATOMIC_IMIN = 4,
406    DXIL_ATOMIC_IMAX = 5,
407    DXIL_ATOMIC_UMIN = 6,
408    DXIL_ATOMIC_UMAX = 7,
409    DXIL_ATOMIC_EXCHANGE = 8,
410 };
411 
412 static enum dxil_atomic_op
nir_atomic_to_dxil_atomic(nir_atomic_op op)413 nir_atomic_to_dxil_atomic(nir_atomic_op op)
414 {
415    switch (op) {
416    case nir_atomic_op_iadd: return DXIL_ATOMIC_ADD;
417    case nir_atomic_op_iand: return DXIL_ATOMIC_AND;
418    case nir_atomic_op_ior: return DXIL_ATOMIC_OR;
419    case nir_atomic_op_ixor: return DXIL_ATOMIC_XOR;
420    case nir_atomic_op_imin: return DXIL_ATOMIC_IMIN;
421    case nir_atomic_op_imax: return DXIL_ATOMIC_IMAX;
422    case nir_atomic_op_umin: return DXIL_ATOMIC_UMIN;
423    case nir_atomic_op_umax: return DXIL_ATOMIC_UMAX;
424    case nir_atomic_op_xchg: return DXIL_ATOMIC_EXCHANGE;
425    default: unreachable("Unsupported atomic op");
426    }
427 }
428 
429 static enum dxil_rmw_op
nir_atomic_to_dxil_rmw(nir_atomic_op op)430 nir_atomic_to_dxil_rmw(nir_atomic_op op)
431 {
432    switch (op) {
433    case nir_atomic_op_iadd: return DXIL_RMWOP_ADD;
434    case nir_atomic_op_iand: return DXIL_RMWOP_AND;
435    case nir_atomic_op_ior: return DXIL_RMWOP_OR;
436    case nir_atomic_op_ixor: return DXIL_RMWOP_XOR;
437    case nir_atomic_op_imin: return DXIL_RMWOP_MIN;
438    case nir_atomic_op_imax: return DXIL_RMWOP_MAX;
439    case nir_atomic_op_umin: return DXIL_RMWOP_UMIN;
440    case nir_atomic_op_umax: return DXIL_RMWOP_UMAX;
441    case nir_atomic_op_xchg: return DXIL_RMWOP_XCHG;
442    default: unreachable("Unsupported atomic op");
443    }
444 }
445 
446 typedef struct {
447    unsigned id;
448    unsigned binding;
449    unsigned size;
450    unsigned space;
451 } resource_array_layout;
452 
453 static void
fill_resource_metadata(struct dxil_module * m,const struct dxil_mdnode ** fields,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout)454 fill_resource_metadata(struct dxil_module *m, const struct dxil_mdnode **fields,
455                        const struct dxil_type *struct_type,
456                        const char *name, const resource_array_layout *layout)
457 {
458    const struct dxil_type *pointer_type = dxil_module_get_pointer_type(m, struct_type);
459    const struct dxil_value *pointer_undef = dxil_module_get_undef(m, pointer_type);
460 
461    fields[0] = dxil_get_metadata_int32(m, layout->id); // resource ID
462    fields[1] = dxil_get_metadata_value(m, pointer_type, pointer_undef); // global constant symbol
463    fields[2] = dxil_get_metadata_string(m, name ? name : ""); // name
464    fields[3] = dxil_get_metadata_int32(m, layout->space); // space ID
465    fields[4] = dxil_get_metadata_int32(m, layout->binding); // lower bound
466    fields[5] = dxil_get_metadata_int32(m, layout->size); // range size
467 }
468 
469 static const struct dxil_mdnode *
emit_srv_metadata(struct dxil_module * m,const struct dxil_type * elem_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind)470 emit_srv_metadata(struct dxil_module *m, const struct dxil_type *elem_type,
471                   const char *name, const resource_array_layout *layout,
472                   enum dxil_component_type comp_type,
473                   enum dxil_resource_kind res_kind)
474 {
475    const struct dxil_mdnode *fields[9];
476 
477    const struct dxil_mdnode *metadata_tag_nodes[2];
478 
479    fill_resource_metadata(m, fields, elem_type, name, layout);
480    fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
481    fields[7] = dxil_get_metadata_int1(m, 0); // sample count
482    if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
483        res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
484       metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
485       metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
486       fields[8] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
487    } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
488       fields[8] = NULL;
489    else
490       unreachable("Structured buffers not supported yet");
491 
492    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
493 }
494 
495 static const struct dxil_mdnode *
emit_uav_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind,enum gl_access_qualifier access)496 emit_uav_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
497                   const char *name, const resource_array_layout *layout,
498                   enum dxil_component_type comp_type,
499                   enum dxil_resource_kind res_kind,
500                   enum gl_access_qualifier access)
501 {
502    const struct dxil_mdnode *fields[11];
503 
504    const struct dxil_mdnode *metadata_tag_nodes[2];
505 
506    fill_resource_metadata(m, fields, struct_type, name, layout);
507    fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
508    fields[7] = dxil_get_metadata_int1(m, (access & ACCESS_COHERENT) != 0); // globally-coherent
509    fields[8] = dxil_get_metadata_int1(m, false); // has counter
510    fields[9] = dxil_get_metadata_int1(m, false); // is ROV
511    if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
512        res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
513       metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
514       metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
515       fields[10] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
516    } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
517       fields[10] = NULL;
518    else
519       unreachable("Structured buffers not supported yet");
520 
521    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
522 }
523 
524 static const struct dxil_mdnode *
emit_cbv_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,unsigned size)525 emit_cbv_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
526                   const char *name, const resource_array_layout *layout,
527                   unsigned size)
528 {
529    const struct dxil_mdnode *fields[8];
530 
531    fill_resource_metadata(m, fields, struct_type, name, layout);
532    fields[6] = dxil_get_metadata_int32(m, size); // constant buffer size
533    fields[7] = NULL; // metadata
534 
535    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
536 }
537 
538 static const struct dxil_mdnode *
emit_sampler_metadata(struct dxil_module * m,const struct dxil_type * struct_type,nir_variable * var,const resource_array_layout * layout)539 emit_sampler_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
540                       nir_variable *var, const resource_array_layout *layout)
541 {
542    const struct dxil_mdnode *fields[8];
543    const struct glsl_type *type = glsl_without_array(var->type);
544 
545    fill_resource_metadata(m, fields, struct_type, var->name, layout);
546    enum dxil_sampler_kind sampler_kind = glsl_sampler_type_is_shadow(type) ?
547           DXIL_SAMPLER_KIND_COMPARISON : DXIL_SAMPLER_KIND_DEFAULT;
548    fields[6] = dxil_get_metadata_int32(m, sampler_kind); // sampler kind
549    fields[7] = NULL; // metadata
550 
551    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
552 }
553 
554 
555 #define MAX_SRVS 128
556 #define MAX_UAVS 64
557 #define MAX_CBVS 64 // ??
558 #define MAX_SAMPLERS 64 // ??
559 
560 struct dxil_def {
561    const struct dxil_value *chans[NIR_MAX_VEC_COMPONENTS];
562 };
563 
564 struct ntd_context {
565    void *ralloc_ctx;
566    const struct nir_to_dxil_options *opts;
567    struct nir_shader *shader;
568 
569    struct dxil_module mod;
570 
571    struct util_dynarray srv_metadata_nodes;
572    const struct dxil_value *srv_handles[MAX_SRVS];
573 
574    struct util_dynarray uav_metadata_nodes;
575    const struct dxil_value *ssbo_handles[MAX_UAVS];
576    const struct dxil_value *image_handles[MAX_UAVS];
577    uint32_t num_uavs;
578 
579    struct util_dynarray cbv_metadata_nodes;
580    const struct dxil_value *cbv_handles[MAX_CBVS];
581 
582    struct util_dynarray sampler_metadata_nodes;
583    const struct dxil_value *sampler_handles[MAX_SAMPLERS];
584 
585    struct util_dynarray resources;
586 
587    const struct dxil_mdnode *shader_property_nodes[6];
588    size_t num_shader_property_nodes;
589 
590    struct dxil_def *defs;
591    unsigned num_defs;
592    struct hash_table *phis;
593 
594    const struct dxil_value **sharedvars;
595    const struct dxil_value **scratchvars;
596    const struct dxil_value **consts;
597 
598    nir_variable *system_value[SYSTEM_VALUE_MAX];
599 
600    nir_function *tess_ctrl_patch_constant_func;
601    unsigned tess_input_control_point_count;
602 
603    struct dxil_func_def *main_func_def;
604    struct dxil_func_def *tess_ctrl_patch_constant_func_def;
605    unsigned unnamed_ubo_count;
606 
607    BITSET_WORD *float_types;
608    BITSET_WORD *int_types;
609 
610    const struct dxil_logger *logger;
611 };
612 
613 static const char*
unary_func_name(enum dxil_intr intr)614 unary_func_name(enum dxil_intr intr)
615 {
616    switch (intr) {
617    case DXIL_INTR_COUNTBITS:
618    case DXIL_INTR_FIRSTBIT_HI:
619    case DXIL_INTR_FIRSTBIT_SHI:
620    case DXIL_INTR_FIRSTBIT_LO:
621       return "dx.op.unaryBits";
622    case DXIL_INTR_ISFINITE:
623    case DXIL_INTR_ISNORMAL:
624       return "dx.op.isSpecialFloat";
625    default:
626       return "dx.op.unary";
627    }
628 }
629 
630 static const struct dxil_value *
emit_unary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0)631 emit_unary_call(struct ntd_context *ctx, enum overload_type overload,
632                 enum dxil_intr intr,
633                 const struct dxil_value *op0)
634 {
635    const struct dxil_func *func = dxil_get_function(&ctx->mod,
636                                                     unary_func_name(intr),
637                                                     overload);
638    if (!func)
639       return NULL;
640 
641    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
642    if (!opcode)
643       return NULL;
644 
645    const struct dxil_value *args[] = {
646      opcode,
647      op0
648    };
649 
650    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
651 }
652 
653 static const struct dxil_value *
emit_binary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)654 emit_binary_call(struct ntd_context *ctx, enum overload_type overload,
655                  enum dxil_intr intr,
656                  const struct dxil_value *op0, const struct dxil_value *op1)
657 {
658    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.binary", overload);
659    if (!func)
660       return NULL;
661 
662    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
663    if (!opcode)
664       return NULL;
665 
666    const struct dxil_value *args[] = {
667      opcode,
668      op0,
669      op1
670    };
671 
672    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
673 }
674 
675 static const struct dxil_value *
emit_tertiary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)676 emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload,
677                    enum dxil_intr intr,
678                    const struct dxil_value *op0,
679                    const struct dxil_value *op1,
680                    const struct dxil_value *op2)
681 {
682    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.tertiary", overload);
683    if (!func)
684       return NULL;
685 
686    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
687    if (!opcode)
688       return NULL;
689 
690    const struct dxil_value *args[] = {
691      opcode,
692      op0,
693      op1,
694      op2
695    };
696 
697    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
698 }
699 
700 static const struct dxil_value *
emit_quaternary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2,const struct dxil_value * op3)701 emit_quaternary_call(struct ntd_context *ctx, enum overload_type overload,
702                      enum dxil_intr intr,
703                      const struct dxil_value *op0,
704                      const struct dxil_value *op1,
705                      const struct dxil_value *op2,
706                      const struct dxil_value *op3)
707 {
708    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quaternary", overload);
709    if (!func)
710       return NULL;
711 
712    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
713    if (!opcode)
714       return NULL;
715 
716    const struct dxil_value *args[] = {
717      opcode,
718      op0,
719      op1,
720      op2,
721      op3
722    };
723 
724    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
725 }
726 
727 static const struct dxil_value *
emit_threadid_call(struct ntd_context * ctx,const struct dxil_value * comp)728 emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp)
729 {
730    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadId", DXIL_I32);
731    if (!func)
732       return NULL;
733 
734    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
735        DXIL_INTR_THREAD_ID);
736    if (!opcode)
737       return NULL;
738 
739    const struct dxil_value *args[] = {
740      opcode,
741      comp
742    };
743 
744    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
745 }
746 
747 static const struct dxil_value *
emit_threadidingroup_call(struct ntd_context * ctx,const struct dxil_value * comp)748 emit_threadidingroup_call(struct ntd_context *ctx,
749                           const struct dxil_value *comp)
750 {
751    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadIdInGroup", DXIL_I32);
752 
753    if (!func)
754       return NULL;
755 
756    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
757        DXIL_INTR_THREAD_ID_IN_GROUP);
758    if (!opcode)
759       return NULL;
760 
761    const struct dxil_value *args[] = {
762      opcode,
763      comp
764    };
765 
766    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
767 }
768 
769 static const struct dxil_value *
emit_flattenedthreadidingroup_call(struct ntd_context * ctx)770 emit_flattenedthreadidingroup_call(struct ntd_context *ctx)
771 {
772    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.flattenedThreadIdInGroup", DXIL_I32);
773 
774    if (!func)
775       return NULL;
776 
777    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
778       DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP);
779    if (!opcode)
780       return NULL;
781 
782    const struct dxil_value *args[] = {
783      opcode
784    };
785 
786    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
787 }
788 
789 static const struct dxil_value *
emit_groupid_call(struct ntd_context * ctx,const struct dxil_value * comp)790 emit_groupid_call(struct ntd_context *ctx, const struct dxil_value *comp)
791 {
792    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.groupId", DXIL_I32);
793 
794    if (!func)
795       return NULL;
796 
797    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
798        DXIL_INTR_GROUP_ID);
799    if (!opcode)
800       return NULL;
801 
802    const struct dxil_value *args[] = {
803      opcode,
804      comp
805    };
806 
807    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
808 }
809 
810 static const struct dxil_value *
emit_raw_bufferload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],enum overload_type overload,unsigned component_count,unsigned alignment)811 emit_raw_bufferload_call(struct ntd_context *ctx,
812                          const struct dxil_value *handle,
813                          const struct dxil_value *coord[2],
814                          enum overload_type overload,
815                          unsigned component_count,
816                          unsigned alignment)
817 {
818    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.rawBufferLoad", overload);
819    if (!func)
820       return NULL;
821 
822    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
823                                                                  DXIL_INTR_RAW_BUFFER_LOAD);
824    const struct dxil_value *args[] = {
825       opcode, handle, coord[0], coord[1],
826       dxil_module_get_int8_const(&ctx->mod, (1 << component_count) - 1),
827       dxil_module_get_int32_const(&ctx->mod, alignment),
828    };
829 
830    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
831 }
832 
833 static const struct dxil_value *
emit_bufferload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],enum overload_type overload)834 emit_bufferload_call(struct ntd_context *ctx,
835                      const struct dxil_value *handle,
836                      const struct dxil_value *coord[2],
837                      enum overload_type overload)
838 {
839    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferLoad", overload);
840    if (!func)
841       return NULL;
842 
843    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
844       DXIL_INTR_BUFFER_LOAD);
845    const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1] };
846 
847    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
848 }
849 
850 static bool
emit_raw_bufferstore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload,unsigned alignment)851 emit_raw_bufferstore_call(struct ntd_context *ctx,
852                           const struct dxil_value *handle,
853                           const struct dxil_value *coord[2],
854                           const struct dxil_value *value[4],
855                           const struct dxil_value *write_mask,
856                           enum overload_type overload,
857                           unsigned alignment)
858 {
859    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.rawBufferStore", overload);
860 
861    if (!func)
862       return false;
863 
864    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
865                                                                  DXIL_INTR_RAW_BUFFER_STORE);
866    const struct dxil_value *args[] = {
867       opcode, handle, coord[0], coord[1],
868       value[0], value[1], value[2], value[3],
869       write_mask,
870       dxil_module_get_int32_const(&ctx->mod, alignment),
871    };
872 
873    return dxil_emit_call_void(&ctx->mod, func,
874                               args, ARRAY_SIZE(args));
875 }
876 
877 static bool
emit_bufferstore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)878 emit_bufferstore_call(struct ntd_context *ctx,
879                       const struct dxil_value *handle,
880                       const struct dxil_value *coord[2],
881                       const struct dxil_value *value[4],
882                       const struct dxil_value *write_mask,
883                       enum overload_type overload)
884 {
885    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferStore", overload);
886 
887    if (!func)
888       return false;
889 
890    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
891       DXIL_INTR_BUFFER_STORE);
892    const struct dxil_value *args[] = {
893       opcode, handle, coord[0], coord[1],
894       value[0], value[1], value[2], value[3],
895       write_mask
896    };
897 
898    return dxil_emit_call_void(&ctx->mod, func,
899                               args, ARRAY_SIZE(args));
900 }
901 
902 static const struct dxil_value *
emit_textureload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],enum overload_type overload)903 emit_textureload_call(struct ntd_context *ctx,
904                       const struct dxil_value *handle,
905                       const struct dxil_value *coord[3],
906                       enum overload_type overload)
907 {
908    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", overload);
909    if (!func)
910       return NULL;
911    const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
912    const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
913 
914    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
915       DXIL_INTR_TEXTURE_LOAD);
916    const struct dxil_value *args[] = { opcode, handle,
917       /*lod_or_sample*/ int_undef,
918       coord[0], coord[1], coord[2],
919       /* offsets */ int_undef, int_undef, int_undef};
920 
921    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
922 }
923 
924 static bool
emit_texturestore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)925 emit_texturestore_call(struct ntd_context *ctx,
926                        const struct dxil_value *handle,
927                        const struct dxil_value *coord[3],
928                        const struct dxil_value *value[4],
929                        const struct dxil_value *write_mask,
930                        enum overload_type overload)
931 {
932    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureStore", overload);
933 
934    if (!func)
935       return false;
936 
937    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
938       DXIL_INTR_TEXTURE_STORE);
939    const struct dxil_value *args[] = {
940       opcode, handle, coord[0], coord[1], coord[2],
941       value[0], value[1], value[2], value[3],
942       write_mask
943    };
944 
945    return dxil_emit_call_void(&ctx->mod, func,
946                               args, ARRAY_SIZE(args));
947 }
948 
949 static const struct dxil_value *
emit_atomic_binop(struct ntd_context * ctx,const struct dxil_value * handle,enum dxil_atomic_op atomic_op,const struct dxil_value * coord[3],const struct dxil_value * value)950 emit_atomic_binop(struct ntd_context *ctx,
951                   const struct dxil_value *handle,
952                   enum dxil_atomic_op atomic_op,
953                   const struct dxil_value *coord[3],
954                   const struct dxil_value *value)
955 {
956    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicBinOp", DXIL_I32);
957 
958    if (!func)
959       return false;
960 
961    const struct dxil_value *opcode =
962       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_BINOP);
963    const struct dxil_value *atomic_op_value =
964       dxil_module_get_int32_const(&ctx->mod, atomic_op);
965    const struct dxil_value *args[] = {
966       opcode, handle, atomic_op_value,
967       coord[0], coord[1], coord[2], value
968    };
969 
970    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
971 }
972 
973 static const struct dxil_value *
emit_atomic_cmpxchg(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * cmpval,const struct dxil_value * newval)974 emit_atomic_cmpxchg(struct ntd_context *ctx,
975                     const struct dxil_value *handle,
976                     const struct dxil_value *coord[3],
977                     const struct dxil_value *cmpval,
978                     const struct dxil_value *newval)
979 {
980    const struct dxil_func *func =
981       dxil_get_function(&ctx->mod, "dx.op.atomicCompareExchange", DXIL_I32);
982 
983    if (!func)
984       return false;
985 
986    const struct dxil_value *opcode =
987       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_CMPXCHG);
988    const struct dxil_value *args[] = {
989       opcode, handle, coord[0], coord[1], coord[2], cmpval, newval
990    };
991 
992    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
993 }
994 
995 static const struct dxil_value *
emit_createhandle_call_pre_6_6(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)996 emit_createhandle_call_pre_6_6(struct ntd_context *ctx,
997                                enum dxil_resource_class resource_class,
998                                unsigned lower_bound,
999                                unsigned upper_bound,
1000                                unsigned space,
1001                                unsigned resource_range_id,
1002                                const struct dxil_value *resource_range_index,
1003                                bool non_uniform_resource_index)
1004 {
1005    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE);
1006    const struct dxil_value *resource_class_value = dxil_module_get_int8_const(&ctx->mod, resource_class);
1007    const struct dxil_value *resource_range_id_value = dxil_module_get_int32_const(&ctx->mod, resource_range_id);
1008    const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1009    if (!opcode || !resource_class_value || !resource_range_id_value ||
1010        !non_uniform_resource_index_value)
1011       return NULL;
1012 
1013    const struct dxil_value *args[] = {
1014       opcode,
1015       resource_class_value,
1016       resource_range_id_value,
1017       resource_range_index,
1018       non_uniform_resource_index_value
1019    };
1020 
1021    const struct dxil_func *func =
1022          dxil_get_function(&ctx->mod, "dx.op.createHandle", DXIL_NONE);
1023 
1024    if (!func)
1025          return NULL;
1026 
1027    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1028 }
1029 
1030 static const struct dxil_value *
emit_annotate_handle(struct ntd_context * ctx,const struct dxil_value * unannotated_handle,const struct dxil_value * res_props)1031 emit_annotate_handle(struct ntd_context *ctx,
1032                      const struct dxil_value *unannotated_handle,
1033                      const struct dxil_value *res_props)
1034 {
1035    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ANNOTATE_HANDLE);
1036    if (!opcode)
1037       return NULL;
1038 
1039    const struct dxil_value *args[] = {
1040       opcode,
1041       unannotated_handle,
1042       res_props
1043    };
1044 
1045    const struct dxil_func *func =
1046       dxil_get_function(&ctx->mod, "dx.op.annotateHandle", DXIL_NONE);
1047 
1048    if (!func)
1049       return NULL;
1050 
1051    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1052 }
1053 
1054 static const struct dxil_value *
emit_annotate_handle_from_metadata(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned resource_range_id,const struct dxil_value * unannotated_handle)1055 emit_annotate_handle_from_metadata(struct ntd_context *ctx,
1056                                    enum dxil_resource_class resource_class,
1057                                    unsigned resource_range_id,
1058                                    const struct dxil_value *unannotated_handle)
1059 {
1060 
1061    const struct util_dynarray *mdnodes;
1062    switch (resource_class) {
1063    case DXIL_RESOURCE_CLASS_SRV:
1064       mdnodes = &ctx->srv_metadata_nodes;
1065       break;
1066    case DXIL_RESOURCE_CLASS_UAV:
1067       mdnodes = &ctx->uav_metadata_nodes;
1068       break;
1069    case DXIL_RESOURCE_CLASS_CBV:
1070       mdnodes = &ctx->cbv_metadata_nodes;
1071       break;
1072    case DXIL_RESOURCE_CLASS_SAMPLER:
1073       mdnodes = &ctx->sampler_metadata_nodes;
1074       break;
1075    default:
1076       unreachable("Invalid resource class");
1077    }
1078 
1079    const struct dxil_mdnode *mdnode = *util_dynarray_element(mdnodes, const struct dxil_mdnode *, resource_range_id);
1080    const struct dxil_value *res_props = dxil_module_get_res_props_const(&ctx->mod, resource_class, mdnode);
1081    if (!res_props)
1082       return NULL;
1083 
1084    return emit_annotate_handle(ctx, unannotated_handle, res_props);
1085 }
1086 
1087 static const struct dxil_value *
emit_createhandle_and_annotate(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1088 emit_createhandle_and_annotate(struct ntd_context *ctx,
1089                                enum dxil_resource_class resource_class,
1090                                unsigned lower_bound,
1091                                unsigned upper_bound,
1092                                unsigned space,
1093                                unsigned resource_range_id,
1094                                const struct dxil_value *resource_range_index,
1095                                bool non_uniform_resource_index)
1096 {
1097    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE_FROM_BINDING);
1098    const struct dxil_value *res_bind = dxil_module_get_res_bind_const(&ctx->mod, lower_bound, upper_bound, space, resource_class);
1099    const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1100    if (!opcode || !res_bind || !non_uniform_resource_index_value)
1101       return NULL;
1102 
1103    const struct dxil_value *args[] = {
1104       opcode,
1105       res_bind,
1106       resource_range_index,
1107       non_uniform_resource_index_value
1108    };
1109 
1110    const struct dxil_func *func =
1111       dxil_get_function(&ctx->mod, "dx.op.createHandleFromBinding", DXIL_NONE);
1112 
1113    if (!func)
1114       return NULL;
1115 
1116    const struct dxil_value *unannotated_handle = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1117    if (!unannotated_handle)
1118       return NULL;
1119 
1120    return emit_annotate_handle_from_metadata(ctx, resource_class, resource_range_id, unannotated_handle);
1121 }
1122 
1123 static const struct dxil_value *
emit_createhandle_call(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1124 emit_createhandle_call(struct ntd_context *ctx,
1125                        enum dxil_resource_class resource_class,
1126                        unsigned lower_bound,
1127                        unsigned upper_bound,
1128                        unsigned space,
1129                        unsigned resource_range_id,
1130                        const struct dxil_value *resource_range_index,
1131                        bool non_uniform_resource_index)
1132 {
1133    if (ctx->mod.minor_version < 6)
1134       return emit_createhandle_call_pre_6_6(ctx, resource_class, lower_bound, upper_bound, space, resource_range_id, resource_range_index, non_uniform_resource_index);
1135    else
1136       return emit_createhandle_and_annotate(ctx, resource_class, lower_bound, upper_bound, space, resource_range_id, resource_range_index, non_uniform_resource_index);
1137 }
1138 
1139 static const struct dxil_value *
emit_createhandle_call_const_index(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned lower_bound,unsigned upper_bound,unsigned space,unsigned resource_range_id,unsigned resource_range_index,bool non_uniform_resource_index)1140 emit_createhandle_call_const_index(struct ntd_context *ctx,
1141                                    enum dxil_resource_class resource_class,
1142                                    unsigned lower_bound,
1143                                    unsigned upper_bound,
1144                                    unsigned space,
1145                                    unsigned resource_range_id,
1146                                    unsigned resource_range_index,
1147                                    bool non_uniform_resource_index)
1148 {
1149 
1150    const struct dxil_value *resource_range_index_value = dxil_module_get_int32_const(&ctx->mod, resource_range_index);
1151    if (!resource_range_index_value)
1152       return NULL;
1153 
1154    return emit_createhandle_call(ctx, resource_class, lower_bound, upper_bound, space,
1155                                  resource_range_id, resource_range_index_value,
1156                                  non_uniform_resource_index);
1157 }
1158 
1159 static const struct dxil_value *
emit_createhandle_heap(struct ntd_context * ctx,const struct dxil_value * resource_range_index,bool is_sampler,bool non_uniform_resource_index)1160 emit_createhandle_heap(struct ntd_context *ctx,
1161                        const struct dxil_value *resource_range_index,
1162                        bool is_sampler,
1163                        bool non_uniform_resource_index)
1164 {
1165    if (is_sampler)
1166       ctx->mod.feats.sampler_descriptor_heap_indexing = true;
1167    else
1168       ctx->mod.feats.resource_descriptor_heap_indexing = true;
1169 
1170    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE_FROM_HEAP);
1171    const struct dxil_value *sampler = dxil_module_get_int1_const(&ctx->mod, is_sampler);
1172    const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
1173    if (!opcode || !sampler || !non_uniform_resource_index_value)
1174       return NULL;
1175 
1176    const struct dxil_value *args[] = {
1177       opcode,
1178       resource_range_index,
1179       sampler,
1180       non_uniform_resource_index_value
1181    };
1182 
1183    const struct dxil_func *func =
1184       dxil_get_function(&ctx->mod, "dx.op.createHandleFromHeap", DXIL_NONE);
1185 
1186    if (!func)
1187       return NULL;
1188 
1189    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1190 }
1191 
1192 static void
add_resource(struct ntd_context * ctx,enum dxil_resource_type type,enum dxil_resource_kind kind,const resource_array_layout * layout)1193 add_resource(struct ntd_context *ctx, enum dxil_resource_type type,
1194              enum dxil_resource_kind kind,
1195              const resource_array_layout *layout)
1196 {
1197    struct dxil_resource_v0 *resource_v0 = NULL;
1198    struct dxil_resource_v1 *resource_v1 = NULL;
1199    if (ctx->mod.minor_validator >= 6) {
1200       resource_v1 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v1, 1);
1201       resource_v0 = &resource_v1->v0;
1202    } else {
1203       resource_v0 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v0, 1);
1204    }
1205    resource_v0->resource_type = type;
1206    resource_v0->space = layout->space;
1207    resource_v0->lower_bound = layout->binding;
1208    if (layout->size == 0 || (uint64_t)layout->size + layout->binding >= UINT_MAX)
1209       resource_v0->upper_bound = UINT_MAX;
1210    else
1211       resource_v0->upper_bound = layout->binding + layout->size - 1;
1212    if (type == DXIL_RES_UAV_TYPED ||
1213        type == DXIL_RES_UAV_RAW ||
1214        type == DXIL_RES_UAV_STRUCTURED) {
1215       uint32_t new_uav_count = ctx->num_uavs + layout->size;
1216       if (layout->size == 0 || new_uav_count < ctx->num_uavs)
1217          ctx->num_uavs = UINT_MAX;
1218       else
1219          ctx->num_uavs = new_uav_count;
1220       if (ctx->mod.minor_validator >= 6 && ctx->num_uavs > 8)
1221          ctx->mod.feats.use_64uavs = 1;
1222    }
1223 
1224    if (resource_v1) {
1225       resource_v1->resource_kind = kind;
1226       /* No flags supported yet */
1227       resource_v1->resource_flags = 0;
1228    }
1229 }
1230 
1231 static const struct dxil_value *
emit_createhandle_call_dynamic(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned space,unsigned binding,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)1232 emit_createhandle_call_dynamic(struct ntd_context *ctx,
1233                                enum dxil_resource_class resource_class,
1234                                unsigned space,
1235                                unsigned binding,
1236                                const struct dxil_value *resource_range_index,
1237                                bool non_uniform_resource_index)
1238 {
1239    unsigned offset = 0;
1240    unsigned count = 0;
1241 
1242    unsigned num_srvs = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
1243    unsigned num_uavs = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
1244    unsigned num_cbvs = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1245    unsigned num_samplers = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1246 
1247    switch (resource_class) {
1248    case DXIL_RESOURCE_CLASS_UAV:
1249       offset = num_srvs + num_samplers + num_cbvs;
1250       count = num_uavs;
1251       break;
1252    case DXIL_RESOURCE_CLASS_SRV:
1253       offset = num_samplers + num_cbvs;
1254       count = num_srvs;
1255       break;
1256    case DXIL_RESOURCE_CLASS_SAMPLER:
1257       offset = num_cbvs;
1258       count = num_samplers;
1259       break;
1260    case DXIL_RESOURCE_CLASS_CBV:
1261       offset = 0;
1262       count = num_cbvs;
1263       break;
1264    }
1265 
1266    unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
1267       sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
1268    assert(offset + count <= ctx->resources.size / resource_element_size);
1269    for (unsigned i = offset; i < offset + count; ++i) {
1270       const struct dxil_resource_v0 *resource = (const struct dxil_resource_v0 *)((const char *)ctx->resources.data + resource_element_size * i);
1271       if (resource->space == space &&
1272           resource->lower_bound <= binding &&
1273           resource->upper_bound >= binding) {
1274          return emit_createhandle_call(ctx, resource_class, resource->lower_bound,
1275                                        resource->upper_bound, space,
1276                                        i - offset,
1277                                        resource_range_index,
1278                                        non_uniform_resource_index);
1279       }
1280    }
1281 
1282    unreachable("Resource access for undeclared range");
1283 }
1284 
1285 static bool
emit_srv(struct ntd_context * ctx,nir_variable * var,unsigned count)1286 emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned count)
1287 {
1288    unsigned id = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
1289    unsigned binding = var->data.binding;
1290    resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1291 
1292    enum dxil_component_type comp_type;
1293    enum dxil_resource_kind res_kind;
1294    enum dxil_resource_type res_type;
1295    if (var->data.mode == nir_var_mem_ssbo) {
1296       comp_type = DXIL_COMP_TYPE_INVALID;
1297       res_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
1298       res_type = DXIL_RES_SRV_RAW;
1299    } else {
1300       comp_type = dxil_get_comp_type(var->type);
1301       res_kind = dxil_get_resource_kind(var->type);
1302       res_type = DXIL_RES_SRV_TYPED;
1303    }
1304    const struct dxil_type *res_type_as_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, 4, false /* readwrite */);
1305 
1306    if (glsl_type_is_array(var->type))
1307       res_type_as_type = dxil_module_get_array_type(&ctx->mod, res_type_as_type, count);
1308 
1309    const struct dxil_mdnode *srv_meta = emit_srv_metadata(&ctx->mod, res_type_as_type, var->name,
1310                                                           &layout, comp_type, res_kind);
1311 
1312    if (!srv_meta)
1313       return false;
1314 
1315    util_dynarray_append(&ctx->srv_metadata_nodes, const struct dxil_mdnode *, srv_meta);
1316    add_resource(ctx, res_type, res_kind, &layout);
1317    if (res_type == DXIL_RES_SRV_RAW)
1318       ctx->mod.raw_and_structured_buffers = true;
1319 
1320    return true;
1321 }
1322 
1323 static bool
emit_uav(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned count,enum dxil_component_type comp_type,unsigned num_comps,enum dxil_resource_kind res_kind,enum gl_access_qualifier access,const char * name)1324 emit_uav(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned count,
1325          enum dxil_component_type comp_type, unsigned num_comps, enum dxil_resource_kind res_kind,
1326          enum gl_access_qualifier access, const char *name)
1327 {
1328    unsigned id = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
1329    resource_array_layout layout = { id, binding, count, space };
1330 
1331    const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, num_comps, true /* readwrite */);
1332    res_type = dxil_module_get_array_type(&ctx->mod, res_type, count);
1333    const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, res_type, name,
1334                                                           &layout, comp_type, res_kind, access);
1335 
1336    if (!uav_meta)
1337       return false;
1338 
1339    util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
1340    if (ctx->mod.minor_validator < 6 &&
1341        util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
1342       ctx->mod.feats.use_64uavs = 1;
1343 
1344    add_resource(ctx, res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER ? DXIL_RES_UAV_RAW : DXIL_RES_UAV_TYPED, res_kind, &layout);
1345    if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
1346       ctx->mod.raw_and_structured_buffers = true;
1347    if (ctx->mod.shader_kind != DXIL_PIXEL_SHADER &&
1348        ctx->mod.shader_kind != DXIL_COMPUTE_SHADER)
1349       ctx->mod.feats.uavs_at_every_stage = true;
1350 
1351    return true;
1352 }
1353 
1354 static bool
emit_globals(struct ntd_context * ctx,unsigned size)1355 emit_globals(struct ntd_context *ctx, unsigned size)
1356 {
1357    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo)
1358       size++;
1359 
1360    if (!size)
1361       return true;
1362 
1363    if (!emit_uav(ctx, 0, 0, size, DXIL_COMP_TYPE_INVALID, 1, DXIL_RESOURCE_KIND_RAW_BUFFER, 0, "globals"))
1364       return false;
1365 
1366    return true;
1367 }
1368 
1369 static bool
emit_uav_var(struct ntd_context * ctx,nir_variable * var,unsigned count)1370 emit_uav_var(struct ntd_context *ctx, nir_variable *var, unsigned count)
1371 {
1372    unsigned binding, space;
1373    if (ctx->opts->environment == DXIL_ENVIRONMENT_GL) {
1374       /* For GL, the image intrinsics are already lowered, using driver_location
1375        * as the 0-based image index. Use space 1 so that we can keep using these
1376        * NIR constants without having to remap them, and so they don't overlap
1377        * SSBOs, which are also 0-based UAV bindings.
1378        */
1379       binding = var->data.driver_location;
1380       space = 1;
1381    } else {
1382       binding = var->data.binding;
1383       space = var->data.descriptor_set;
1384    }
1385    enum dxil_component_type comp_type = dxil_get_comp_type(var->type);
1386    enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type);
1387    const char *name = var->name;
1388 
1389    return emit_uav(ctx, binding, space, count, comp_type,
1390                    util_format_get_nr_components(var->data.image.format),
1391                    res_kind, var->data.access, name);
1392 }
1393 
1394 static const struct dxil_value *
get_value_for_const(struct dxil_module * mod,nir_const_value * c,const struct dxil_type * type)1395 get_value_for_const(struct dxil_module *mod, nir_const_value *c, const struct dxil_type *type)
1396 {
1397    if (type == mod->int1_type) return dxil_module_get_int1_const(mod, c->b);
1398    if (type == mod->float32_type) return dxil_module_get_float_const(mod, c->f32);
1399    if (type == mod->int32_type) return dxil_module_get_int32_const(mod, c->i32);
1400    if (type == mod->int16_type) {
1401       mod->feats.min_precision = true;
1402       return dxil_module_get_int16_const(mod, c->i16);
1403    }
1404    if (type == mod->int64_type) {
1405       mod->feats.int64_ops = true;
1406       return dxil_module_get_int64_const(mod, c->i64);
1407    }
1408    if (type == mod->float16_type) {
1409       mod->feats.min_precision = true;
1410       return dxil_module_get_float16_const(mod, c->u16);
1411    }
1412    if (type == mod->float64_type) {
1413       mod->feats.doubles = true;
1414       return dxil_module_get_double_const(mod, c->f64);
1415    }
1416    unreachable("Invalid type");
1417 }
1418 
1419 static const struct dxil_type *
get_type_for_glsl_base_type(struct dxil_module * mod,enum glsl_base_type type)1420 get_type_for_glsl_base_type(struct dxil_module *mod, enum glsl_base_type type)
1421 {
1422    uint32_t bit_size = glsl_base_type_bit_size(type);
1423    if (nir_alu_type_get_base_type(nir_get_nir_type_for_glsl_base_type(type)) == nir_type_float)
1424       return dxil_module_get_float_type(mod, bit_size);
1425    return dxil_module_get_int_type(mod, bit_size);
1426 }
1427 
1428 static const struct dxil_type *
get_type_for_glsl_type(struct dxil_module * mod,const struct glsl_type * type)1429 get_type_for_glsl_type(struct dxil_module *mod, const struct glsl_type *type)
1430 {
1431    if (glsl_type_is_scalar(type))
1432       return get_type_for_glsl_base_type(mod, glsl_get_base_type(type));
1433 
1434    if (glsl_type_is_vector(type))
1435       return dxil_module_get_vector_type(mod, get_type_for_glsl_base_type(mod, glsl_get_base_type(type)),
1436                                          glsl_get_vector_elements(type));
1437 
1438    if (glsl_type_is_array(type))
1439       return dxil_module_get_array_type(mod, get_type_for_glsl_type(mod, glsl_get_array_element(type)),
1440                                         glsl_array_size(type));
1441 
1442    assert(glsl_type_is_struct(type));
1443    uint32_t size = glsl_get_length(type);
1444    const struct dxil_type **fields = calloc(sizeof(const struct dxil_type *), size);
1445    for (uint32_t i = 0; i < size; ++i)
1446       fields[i] = get_type_for_glsl_type(mod, glsl_get_struct_field(type, i));
1447    const struct dxil_type *ret = dxil_module_get_struct_type(mod, glsl_get_type_name(type), fields, size);
1448    free((void *)fields);
1449    return ret;
1450 }
1451 
1452 static const struct dxil_value *
get_value_for_const_aggregate(struct dxil_module * mod,nir_constant * c,const struct glsl_type * type)1453 get_value_for_const_aggregate(struct dxil_module *mod, nir_constant *c, const struct glsl_type *type)
1454 {
1455    const struct dxil_type *dxil_type = get_type_for_glsl_type(mod, type);
1456    if (glsl_type_is_vector_or_scalar(type)) {
1457       const struct dxil_type *element_type = get_type_for_glsl_base_type(mod, glsl_get_base_type(type));
1458       const struct dxil_value *elements[NIR_MAX_VEC_COMPONENTS];
1459       for (uint32_t i = 0; i < glsl_get_vector_elements(type); ++i)
1460          elements[i] = get_value_for_const(mod, &c->values[i], element_type);
1461       if (glsl_type_is_scalar(type))
1462          return elements[0];
1463       return dxil_module_get_vector_const(mod, dxil_type, elements);
1464    }
1465 
1466    uint32_t num_values = glsl_get_length(type);
1467    assert(num_values == c->num_elements);
1468    const struct dxil_value **values = calloc(sizeof(const struct dxil_value *), num_values);
1469    const struct dxil_value *ret;
1470    if (glsl_type_is_array(type)) {
1471       const struct glsl_type *element_type = glsl_get_array_element(type);
1472       for (uint32_t i = 0; i < num_values; ++i)
1473          values[i] = get_value_for_const_aggregate(mod, c->elements[i], element_type);
1474       ret = dxil_module_get_array_const(mod, dxil_type, values);
1475    } else {
1476       for (uint32_t i = 0; i < num_values; ++i)
1477          values[i] = get_value_for_const_aggregate(mod, c->elements[i], glsl_get_struct_field(type, i));
1478       ret = dxil_module_get_struct_const(mod, dxil_type, values);
1479    }
1480    free((void *)values);
1481    return ret;
1482 }
1483 
1484 static bool
emit_global_consts(struct ntd_context * ctx)1485 emit_global_consts(struct ntd_context *ctx)
1486 {
1487    uint32_t index = 0;
1488    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_constant) {
1489       assert(var->constant_initializer);
1490       var->data.driver_location = index++;
1491    }
1492 
1493    ctx->consts = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
1494 
1495    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_constant) {
1496       if (!var->name)
1497          var->name = ralloc_asprintf(var, "const_%d", var->data.driver_location);
1498 
1499       const struct dxil_value *agg_vals =
1500          get_value_for_const_aggregate(&ctx->mod, var->constant_initializer, var->type);
1501       if (!agg_vals)
1502          return false;
1503 
1504       const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name,
1505                                                               dxil_value_get_type(agg_vals),
1506                                                               DXIL_AS_DEFAULT, 16,
1507                                                               agg_vals);
1508       if (!gvar)
1509          return false;
1510 
1511       ctx->consts[var->data.driver_location] = gvar;
1512    }
1513 
1514    return true;
1515 }
1516 
1517 static bool
emit_shared_vars(struct ntd_context * ctx)1518 emit_shared_vars(struct ntd_context *ctx)
1519 {
1520    uint32_t index = 0;
1521    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_shared)
1522       var->data.driver_location = index++;
1523 
1524    ctx->sharedvars = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
1525 
1526    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_shared) {
1527       if (!var->name)
1528          var->name = ralloc_asprintf(var, "shared_%d", var->data.driver_location);
1529       const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name,
1530                                                               get_type_for_glsl_type(&ctx->mod, var->type),
1531                                                               DXIL_AS_GROUPSHARED, 16,
1532                                                               NULL);
1533       if (!gvar)
1534          return false;
1535 
1536       ctx->sharedvars[var->data.driver_location] = gvar;
1537    }
1538 
1539    return true;
1540 }
1541 
1542 static bool
emit_cbv(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned size,unsigned count,char * name)1543 emit_cbv(struct ntd_context *ctx, unsigned binding, unsigned space,
1544          unsigned size, unsigned count, char *name)
1545 {
1546    assert(count != 0);
1547 
1548    unsigned idx = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1549 
1550    const struct dxil_type *float32 = dxil_module_get_float_type(&ctx->mod, 32);
1551    const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, float32, size);
1552    const struct dxil_type *buffer_type = dxil_module_get_struct_type(&ctx->mod, name,
1553                                                                      &array_type, 1);
1554    // All ubo[1]s should have been lowered to ubo with static indexing
1555    const struct dxil_type *final_type = count != 1 ? dxil_module_get_array_type(&ctx->mod, buffer_type, count) : buffer_type;
1556    resource_array_layout layout = {idx, binding, count, space};
1557    const struct dxil_mdnode *cbv_meta = emit_cbv_metadata(&ctx->mod, final_type,
1558                                                           name, &layout, 4 * size);
1559 
1560    if (!cbv_meta)
1561       return false;
1562 
1563    util_dynarray_append(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *, cbv_meta);
1564    add_resource(ctx, DXIL_RES_CBV, DXIL_RESOURCE_KIND_CBUFFER, &layout);
1565 
1566    return true;
1567 }
1568 
1569 static bool
emit_ubo_var(struct ntd_context * ctx,nir_variable * var)1570 emit_ubo_var(struct ntd_context *ctx, nir_variable *var)
1571 {
1572    unsigned count = 1;
1573    if (glsl_type_is_array(var->type))
1574       count = glsl_get_length(var->type);
1575 
1576    char *name = var->name;
1577    char temp_name[30];
1578    if (name && strlen(name) == 0) {
1579       snprintf(temp_name, sizeof(temp_name), "__unnamed_ubo_%d",
1580                ctx->unnamed_ubo_count++);
1581       name = temp_name;
1582    }
1583 
1584    const struct glsl_type *type = glsl_without_array(var->type);
1585    assert(glsl_type_is_struct(type) || glsl_type_is_interface(type));
1586    unsigned dwords = ALIGN_POT(glsl_get_explicit_size(type, false), 16) / 4;
1587 
1588    return emit_cbv(ctx, var->data.binding, var->data.descriptor_set,
1589                    dwords, count, name);
1590 }
1591 
1592 static bool
emit_sampler(struct ntd_context * ctx,nir_variable * var,unsigned count)1593 emit_sampler(struct ntd_context *ctx, nir_variable *var, unsigned count)
1594 {
1595    unsigned id = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1596    unsigned binding = var->data.binding;
1597    resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1598    const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
1599    const struct dxil_type *sampler_type = dxil_module_get_struct_type(&ctx->mod, "struct.SamplerState", &int32_type, 1);
1600 
1601    if (glsl_type_is_array(var->type))
1602       sampler_type = dxil_module_get_array_type(&ctx->mod, sampler_type, count);
1603 
1604    const struct dxil_mdnode *sampler_meta = emit_sampler_metadata(&ctx->mod, sampler_type, var, &layout);
1605 
1606    if (!sampler_meta)
1607       return false;
1608 
1609    util_dynarray_append(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *, sampler_meta);
1610    add_resource(ctx, DXIL_RES_SAMPLER, DXIL_RESOURCE_KIND_SAMPLER, &layout);
1611 
1612    return true;
1613 }
1614 
1615 static bool
emit_static_indexing_handles(struct ntd_context * ctx)1616 emit_static_indexing_handles(struct ntd_context *ctx)
1617 {
1618    /* Vulkan always uses dynamic handles, from instructions in the NIR */
1619    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN)
1620       return true;
1621 
1622    unsigned last_res_class = -1;
1623    unsigned id = 0;
1624 
1625    unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
1626       sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
1627    for (struct dxil_resource_v0 *res = (struct dxil_resource_v0 *)ctx->resources.data;
1628         res < (struct dxil_resource_v0 *)((char *)ctx->resources.data + ctx->resources.size);
1629         res = (struct dxil_resource_v0 *)((char *)res + resource_element_size)) {
1630       enum dxil_resource_class res_class;
1631       const struct dxil_value **handle_array;
1632       switch (res->resource_type) {
1633       case DXIL_RES_SRV_TYPED:
1634       case DXIL_RES_SRV_RAW:
1635       case DXIL_RES_SRV_STRUCTURED:
1636          res_class = DXIL_RESOURCE_CLASS_SRV;
1637          handle_array = ctx->srv_handles;
1638          break;
1639       case DXIL_RES_CBV:
1640          res_class = DXIL_RESOURCE_CLASS_CBV;
1641          handle_array = ctx->cbv_handles;
1642          break;
1643       case DXIL_RES_SAMPLER:
1644          res_class = DXIL_RESOURCE_CLASS_SAMPLER;
1645          handle_array = ctx->sampler_handles;
1646          break;
1647       case DXIL_RES_UAV_RAW:
1648          res_class = DXIL_RESOURCE_CLASS_UAV;
1649          handle_array = ctx->ssbo_handles;
1650          break;
1651       case DXIL_RES_UAV_TYPED:
1652       case DXIL_RES_UAV_STRUCTURED:
1653       case DXIL_RES_UAV_STRUCTURED_WITH_COUNTER:
1654          res_class = DXIL_RESOURCE_CLASS_UAV;
1655          handle_array = ctx->image_handles;
1656          break;
1657       default:
1658          unreachable("Unexpected resource type");
1659       }
1660 
1661       if (last_res_class != res_class)
1662          id = 0;
1663       else
1664          id++;
1665       last_res_class = res_class;
1666 
1667       if (res->space > 1)
1668          continue;
1669       assert(res->space == 0 ||
1670          (res->space == 1 &&
1671             res->resource_type != DXIL_RES_UAV_RAW &&
1672             ctx->opts->environment == DXIL_ENVIRONMENT_GL));
1673 
1674       /* CL uses dynamic handles for the "globals" UAV array, but uses static
1675        * handles for UBOs, textures, and samplers.
1676        */
1677       if (ctx->opts->environment == DXIL_ENVIRONMENT_CL &&
1678           res->resource_type == DXIL_RES_UAV_RAW)
1679          continue;
1680 
1681       for (unsigned i = res->lower_bound; i <= res->upper_bound; ++i) {
1682          handle_array[i] = emit_createhandle_call_const_index(ctx,
1683                                                               res_class,
1684                                                               res->lower_bound,
1685                                                               res->upper_bound,
1686                                                               res->space,
1687                                                               id,
1688                                                               i,
1689                                                               false);
1690          if (!handle_array[i])
1691             return false;
1692       }
1693    }
1694    return true;
1695 }
1696 
1697 static const struct dxil_mdnode *
emit_gs_state(struct ntd_context * ctx)1698 emit_gs_state(struct ntd_context *ctx)
1699 {
1700    const struct dxil_mdnode *gs_state_nodes[5];
1701    const nir_shader *s = ctx->shader;
1702 
1703    gs_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, dxil_get_input_primitive(s->info.gs.input_primitive));
1704    gs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.vertices_out);
1705    gs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.gs.active_stream_mask, 1));
1706    gs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, dxil_get_primitive_topology(s->info.gs.output_primitive));
1707    gs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.invocations);
1708 
1709    for (unsigned i = 0; i < ARRAY_SIZE(gs_state_nodes); ++i) {
1710       if (!gs_state_nodes[i])
1711          return NULL;
1712    }
1713 
1714    return dxil_get_metadata_node(&ctx->mod, gs_state_nodes, ARRAY_SIZE(gs_state_nodes));
1715 }
1716 
1717 static enum dxil_tessellator_domain
get_tessellator_domain(enum tess_primitive_mode primitive_mode)1718 get_tessellator_domain(enum tess_primitive_mode primitive_mode)
1719 {
1720    switch (primitive_mode) {
1721    case TESS_PRIMITIVE_QUADS: return DXIL_TESSELLATOR_DOMAIN_QUAD;
1722    case TESS_PRIMITIVE_TRIANGLES: return DXIL_TESSELLATOR_DOMAIN_TRI;
1723    case TESS_PRIMITIVE_ISOLINES: return DXIL_TESSELLATOR_DOMAIN_ISOLINE;
1724    default:
1725       unreachable("Invalid tessellator primitive mode");
1726    }
1727 }
1728 
1729 static enum dxil_tessellator_partitioning
get_tessellator_partitioning(enum gl_tess_spacing spacing)1730 get_tessellator_partitioning(enum gl_tess_spacing spacing)
1731 {
1732    switch (spacing) {
1733    default:
1734    case TESS_SPACING_EQUAL:
1735       return DXIL_TESSELLATOR_PARTITIONING_INTEGER;
1736    case TESS_SPACING_FRACTIONAL_EVEN:
1737       return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
1738    case TESS_SPACING_FRACTIONAL_ODD:
1739       return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
1740    }
1741 }
1742 
1743 static enum dxil_tessellator_output_primitive
get_tessellator_output_primitive(const struct shader_info * info)1744 get_tessellator_output_primitive(const struct shader_info *info)
1745 {
1746    if (info->tess.point_mode)
1747       return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_POINT;
1748    if (info->tess._primitive_mode == TESS_PRIMITIVE_ISOLINES)
1749       return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_LINE;
1750    /* Note: GL tessellation domain is inverted from D3D, which means triangle
1751     * winding needs to be inverted.
1752     */
1753    if (info->tess.ccw)
1754       return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CW;
1755    return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CCW;
1756 }
1757 
1758 static const struct dxil_mdnode *
emit_hs_state(struct ntd_context * ctx)1759 emit_hs_state(struct ntd_context *ctx)
1760 {
1761    const struct dxil_mdnode *hs_state_nodes[7];
1762 
1763    hs_state_nodes[0] = dxil_get_metadata_func(&ctx->mod, ctx->tess_ctrl_patch_constant_func_def->func);
1764    hs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->tess_input_control_point_count);
1765    hs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1766    hs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1767    hs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_partitioning(ctx->shader->info.tess.spacing));
1768    hs_state_nodes[5] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_output_primitive(&ctx->shader->info));
1769    hs_state_nodes[6] = dxil_get_metadata_float32(&ctx->mod, 64.0f);
1770 
1771    return dxil_get_metadata_node(&ctx->mod, hs_state_nodes, ARRAY_SIZE(hs_state_nodes));
1772 }
1773 
1774 static const struct dxil_mdnode *
emit_ds_state(struct ntd_context * ctx)1775 emit_ds_state(struct ntd_context *ctx)
1776 {
1777    const struct dxil_mdnode *ds_state_nodes[2];
1778 
1779    ds_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1780    ds_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1781 
1782    return dxil_get_metadata_node(&ctx->mod, ds_state_nodes, ARRAY_SIZE(ds_state_nodes));
1783 }
1784 
1785 static const struct dxil_mdnode *
emit_threads(struct ntd_context * ctx)1786 emit_threads(struct ntd_context *ctx)
1787 {
1788    const nir_shader *s = ctx->shader;
1789    const struct dxil_mdnode *threads_x = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[0], 1));
1790    const struct dxil_mdnode *threads_y = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[1], 1));
1791    const struct dxil_mdnode *threads_z = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[2], 1));
1792    if (!threads_x || !threads_y || !threads_z)
1793       return false;
1794 
1795    const struct dxil_mdnode *threads_nodes[] = { threads_x, threads_y, threads_z };
1796    return dxil_get_metadata_node(&ctx->mod, threads_nodes, ARRAY_SIZE(threads_nodes));
1797 }
1798 
1799 static const struct dxil_mdnode *
emit_wave_size(struct ntd_context * ctx)1800 emit_wave_size(struct ntd_context *ctx)
1801 {
1802    const nir_shader *s = ctx->shader;
1803    const struct dxil_mdnode *wave_size_node = dxil_get_metadata_int32(&ctx->mod, s->info.subgroup_size);
1804    return dxil_get_metadata_node(&ctx->mod, &wave_size_node, 1);
1805 }
1806 
1807 static const struct dxil_mdnode *
emit_wave_size_range(struct ntd_context * ctx)1808 emit_wave_size_range(struct ntd_context *ctx)
1809 {
1810    const nir_shader *s = ctx->shader;
1811    const struct dxil_mdnode *wave_size_nodes[3];
1812    wave_size_nodes[0] = dxil_get_metadata_int32(&ctx->mod, s->info.subgroup_size);
1813    wave_size_nodes[1] = wave_size_nodes[0];
1814    wave_size_nodes[2] = wave_size_nodes[0];
1815    return dxil_get_metadata_node(&ctx->mod, wave_size_nodes, ARRAY_SIZE(wave_size_nodes));
1816 }
1817 
1818 static int64_t
get_module_flags(struct ntd_context * ctx)1819 get_module_flags(struct ntd_context *ctx)
1820 {
1821    /* See the DXIL documentation for the definition of these flags:
1822     *
1823     * https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-flags
1824     */
1825 
1826    uint64_t flags = 0;
1827    if (ctx->mod.feats.doubles)
1828       flags |= (1 << 2);
1829    if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
1830        ctx->shader->info.fs.early_fragment_tests)
1831       flags |= (1 << 3);
1832    if (ctx->mod.raw_and_structured_buffers)
1833       flags |= (1 << 4);
1834    if (ctx->mod.feats.min_precision)
1835       flags |= (1 << 5);
1836    if (ctx->mod.feats.dx11_1_double_extensions)
1837       flags |= (1 << 6);
1838    if (ctx->mod.feats.array_layer_from_vs_or_ds)
1839       flags |= (1 << 9);
1840    if (ctx->mod.feats.inner_coverage)
1841       flags |= (1 << 10);
1842    if (ctx->mod.feats.stencil_ref)
1843       flags |= (1 << 11);
1844    if (ctx->mod.feats.tiled_resources)
1845       flags |= (1 << 12);
1846    if (ctx->mod.feats.typed_uav_load_additional_formats)
1847       flags |= (1 << 13);
1848    if (ctx->mod.feats.use_64uavs)
1849       flags |= (1 << 15);
1850    if (ctx->mod.feats.uavs_at_every_stage)
1851       flags |= (1 << 16);
1852    if (ctx->mod.feats.cs_4x_raw_sb)
1853       flags |= (1 << 17);
1854    if (ctx->mod.feats.rovs)
1855       flags |= (1 << 18);
1856    if (ctx->mod.feats.wave_ops)
1857       flags |= (1 << 19);
1858    if (ctx->mod.feats.int64_ops)
1859       flags |= (1 << 20);
1860    if (ctx->mod.feats.view_id)
1861       flags |= (1 << 21);
1862    if (ctx->mod.feats.barycentrics)
1863       flags |= (1 << 22);
1864    if (ctx->mod.feats.native_low_precision)
1865       flags |= (1 << 23) | (1 << 5);
1866    if (ctx->mod.feats.shading_rate)
1867       flags |= (1 << 24);
1868    if (ctx->mod.feats.raytracing_tier_1_1)
1869       flags |= (1 << 25);
1870    if (ctx->mod.feats.sampler_feedback)
1871       flags |= (1 << 26);
1872    if (ctx->mod.feats.atomic_int64_typed)
1873       flags |= (1 << 27);
1874    if (ctx->mod.feats.atomic_int64_tgsm)
1875       flags |= (1 << 28);
1876    if (ctx->mod.feats.derivatives_in_mesh_or_amp)
1877       flags |= (1 << 29);
1878    if (ctx->mod.feats.resource_descriptor_heap_indexing)
1879       flags |= (1 << 30);
1880    if (ctx->mod.feats.sampler_descriptor_heap_indexing)
1881       flags |= (1ull << 31);
1882    if (ctx->mod.feats.atomic_int64_heap_resource)
1883       flags |= (1ull << 32);
1884    if (ctx->mod.feats.advanced_texture_ops)
1885       flags |= (1ull << 34);
1886    if (ctx->mod.feats.writable_msaa)
1887       flags |= (1ull << 35);
1888    // Bit 36 is wave MMA
1889    if (ctx->mod.feats.sample_cmp_bias_gradient)
1890       flags |= (1ull << 37);
1891    if (ctx->mod.feats.extended_command_info)
1892       flags |= (1ull << 38);
1893 
1894    if (ctx->opts->disable_math_refactoring)
1895       flags |= (1 << 1);
1896 
1897    /* Work around https://github.com/microsoft/DirectXShaderCompiler/issues/4616
1898     * When targeting SM6.7 and with at least one UAV, if no other flags are present,
1899     * set the resources-may-not-alias flag, or else the DXIL validator may end up
1900     * with uninitialized memory which will fail validation, due to missing that flag.
1901     */
1902    if (flags == 0 && ctx->mod.minor_version >= 7 && ctx->num_uavs > 0)
1903       flags |= (1ull << 33);
1904 
1905    return flags;
1906 }
1907 
1908 static const struct dxil_mdnode *
emit_entrypoint(struct ntd_context * ctx,const struct dxil_func * func,const char * name,const struct dxil_mdnode * signatures,const struct dxil_mdnode * resources,const struct dxil_mdnode * shader_props)1909 emit_entrypoint(struct ntd_context *ctx,
1910                 const struct dxil_func *func, const char *name,
1911                 const struct dxil_mdnode *signatures,
1912                 const struct dxil_mdnode *resources,
1913                 const struct dxil_mdnode *shader_props)
1914 {
1915    char truncated_name[254] = { 0 };
1916    strncpy(truncated_name, name, ARRAY_SIZE(truncated_name) - 1);
1917 
1918    const struct dxil_mdnode *func_md = dxil_get_metadata_func(&ctx->mod, func);
1919    const struct dxil_mdnode *name_md = dxil_get_metadata_string(&ctx->mod, truncated_name);
1920    const struct dxil_mdnode *nodes[] = {
1921       func_md,
1922       name_md,
1923       signatures,
1924       resources,
1925       shader_props
1926    };
1927    return dxil_get_metadata_node(&ctx->mod, nodes,
1928                                  ARRAY_SIZE(nodes));
1929 }
1930 
1931 static const struct dxil_mdnode *
emit_resources(struct ntd_context * ctx)1932 emit_resources(struct ntd_context *ctx)
1933 {
1934    bool emit_resources = false;
1935    const struct dxil_mdnode *resources_nodes[] = {
1936       NULL, NULL, NULL, NULL
1937    };
1938 
1939 #define ARRAY_AND_SIZE(arr) arr.data, util_dynarray_num_elements(&arr, const struct dxil_mdnode *)
1940 
1941    if (ctx->srv_metadata_nodes.size) {
1942       resources_nodes[0] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->srv_metadata_nodes));
1943       emit_resources = true;
1944    }
1945 
1946    if (ctx->uav_metadata_nodes.size) {
1947       resources_nodes[1] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->uav_metadata_nodes));
1948       emit_resources = true;
1949    }
1950 
1951    if (ctx->cbv_metadata_nodes.size) {
1952       resources_nodes[2] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->cbv_metadata_nodes));
1953       emit_resources = true;
1954    }
1955 
1956    if (ctx->sampler_metadata_nodes.size) {
1957       resources_nodes[3] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->sampler_metadata_nodes));
1958       emit_resources = true;
1959    }
1960 
1961 #undef ARRAY_AND_SIZE
1962 
1963    return emit_resources ?
1964       dxil_get_metadata_node(&ctx->mod, resources_nodes, ARRAY_SIZE(resources_nodes)): NULL;
1965 }
1966 
1967 static bool
emit_tag(struct ntd_context * ctx,enum dxil_shader_tag tag,const struct dxil_mdnode * value_node)1968 emit_tag(struct ntd_context *ctx, enum dxil_shader_tag tag,
1969          const struct dxil_mdnode *value_node)
1970 {
1971    const struct dxil_mdnode *tag_node = dxil_get_metadata_int32(&ctx->mod, tag);
1972    if (!tag_node || !value_node)
1973       return false;
1974    assert(ctx->num_shader_property_nodes <= ARRAY_SIZE(ctx->shader_property_nodes) - 2);
1975    ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = tag_node;
1976    ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = value_node;
1977 
1978    return true;
1979 }
1980 
1981 static bool
emit_metadata(struct ntd_context * ctx)1982 emit_metadata(struct ntd_context *ctx)
1983 {
1984    /* DXIL versions are 1.x for shader model 6.x */
1985    assert(ctx->mod.major_version == 6);
1986    unsigned dxilMajor = 1;
1987    unsigned dxilMinor = ctx->mod.minor_version;
1988    unsigned valMajor = ctx->mod.major_validator;
1989    unsigned valMinor = ctx->mod.minor_validator;
1990    if (!emit_llvm_ident(&ctx->mod) ||
1991        !emit_named_version(&ctx->mod, "dx.version", dxilMajor, dxilMinor) ||
1992        !emit_named_version(&ctx->mod, "dx.valver", valMajor, valMinor) ||
1993        !emit_dx_shader_model(&ctx->mod))
1994       return false;
1995 
1996    const struct dxil_func_def *main_func_def = ctx->main_func_def;
1997    if (!main_func_def)
1998       return false;
1999    const struct dxil_func *main_func = main_func_def->func;
2000 
2001    const struct dxil_mdnode *resources_node = emit_resources(ctx);
2002 
2003    const struct dxil_mdnode *main_entrypoint = dxil_get_metadata_func(&ctx->mod, main_func);
2004    const struct dxil_mdnode *node27 = dxil_get_metadata_node(&ctx->mod, NULL, 0);
2005 
2006    const struct dxil_mdnode *node4 = dxil_get_metadata_int32(&ctx->mod, 0);
2007    const struct dxil_mdnode *nodes_4_27_27[] = {
2008       node4, node27, node27
2009    };
2010    const struct dxil_mdnode *node28 = dxil_get_metadata_node(&ctx->mod, nodes_4_27_27,
2011                                                       ARRAY_SIZE(nodes_4_27_27));
2012 
2013    const struct dxil_mdnode *node29 = dxil_get_metadata_node(&ctx->mod, &node28, 1);
2014 
2015    const struct dxil_mdnode *node3 = dxil_get_metadata_int32(&ctx->mod, 1);
2016    const struct dxil_mdnode *main_type_annotation_nodes[] = {
2017       node3, main_entrypoint, node29
2018    };
2019    const struct dxil_mdnode *main_type_annotation = dxil_get_metadata_node(&ctx->mod, main_type_annotation_nodes,
2020                                                                            ARRAY_SIZE(main_type_annotation_nodes));
2021 
2022    if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {
2023       if (!emit_tag(ctx, DXIL_SHADER_TAG_GS_STATE, emit_gs_state(ctx)))
2024          return false;
2025    } else if (ctx->mod.shader_kind == DXIL_HULL_SHADER) {
2026       ctx->tess_input_control_point_count = 32;
2027       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
2028          if (nir_is_arrayed_io(var, MESA_SHADER_TESS_CTRL)) {
2029             ctx->tess_input_control_point_count = glsl_array_size(var->type);
2030             break;
2031          }
2032       }
2033 
2034       if (!emit_tag(ctx, DXIL_SHADER_TAG_HS_STATE, emit_hs_state(ctx)))
2035          return false;
2036    } else if (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
2037       if (!emit_tag(ctx, DXIL_SHADER_TAG_DS_STATE, emit_ds_state(ctx)))
2038          return false;
2039    } else if (ctx->mod.shader_kind == DXIL_COMPUTE_SHADER) {
2040       if (!emit_tag(ctx, DXIL_SHADER_TAG_NUM_THREADS, emit_threads(ctx)))
2041          return false;
2042       if (ctx->mod.minor_version >= 6 &&
2043           ctx->shader->info.subgroup_size >= SUBGROUP_SIZE_REQUIRE_4) {
2044          if (ctx->mod.minor_version < 8) {
2045             if (!emit_tag(ctx, DXIL_SHADER_TAG_WAVE_SIZE, emit_wave_size(ctx)))
2046                return false;
2047          } else {
2048             if (!emit_tag(ctx, DXIL_SHADER_TAG_WAVE_SIZE_RANGE, emit_wave_size_range(ctx)))
2049                return false;
2050          }
2051       }
2052    }
2053 
2054    uint64_t flags = get_module_flags(ctx);
2055    if (flags != 0) {
2056       if (!emit_tag(ctx, DXIL_SHADER_TAG_FLAGS, dxil_get_metadata_int64(&ctx->mod, flags)))
2057          return false;
2058    }
2059    const struct dxil_mdnode *shader_properties = NULL;
2060    if (ctx->num_shader_property_nodes > 0) {
2061       shader_properties = dxil_get_metadata_node(&ctx->mod, ctx->shader_property_nodes,
2062                                                  ctx->num_shader_property_nodes);
2063       if (!shader_properties)
2064          return false;
2065    }
2066 
2067    nir_function_impl *entry_func_impl = nir_shader_get_entrypoint(ctx->shader);
2068    const struct dxil_mdnode *dx_entry_point = emit_entrypoint(ctx, main_func,
2069        entry_func_impl->function->name, get_signatures(&ctx->mod), resources_node, shader_properties);
2070    if (!dx_entry_point)
2071       return false;
2072 
2073    if (resources_node) {
2074       const struct dxil_mdnode *dx_resources = resources_node;
2075       dxil_add_metadata_named_node(&ctx->mod, "dx.resources",
2076                                        &dx_resources, 1);
2077    }
2078 
2079    if (ctx->mod.minor_version >= 2 &&
2080        dxil_nir_analyze_io_dependencies(&ctx->mod, ctx->shader)) {
2081       const struct dxil_type *i32_type = dxil_module_get_int_type(&ctx->mod, 32);
2082       if (!i32_type)
2083          return false;
2084 
2085       const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, i32_type, ctx->mod.serialized_dependency_table_size);
2086       if (!array_type)
2087          return false;
2088 
2089       const struct dxil_value **array_entries = malloc(sizeof(const struct value *) * ctx->mod.serialized_dependency_table_size);
2090       if (!array_entries)
2091          return false;
2092 
2093       for (uint32_t i = 0; i < ctx->mod.serialized_dependency_table_size; ++i)
2094          array_entries[i] = dxil_module_get_int32_const(&ctx->mod, ctx->mod.serialized_dependency_table[i]);
2095       const struct dxil_value *array_val = dxil_module_get_array_const(&ctx->mod, array_type, array_entries);
2096       free((void *)array_entries);
2097 
2098       const struct dxil_mdnode *view_id_state_val = dxil_get_metadata_value(&ctx->mod, array_type, array_val);
2099       if (!view_id_state_val)
2100          return false;
2101 
2102       const struct dxil_mdnode *view_id_state_node = dxil_get_metadata_node(&ctx->mod, &view_id_state_val, 1);
2103 
2104       dxil_add_metadata_named_node(&ctx->mod, "dx.viewIdState", &view_id_state_node, 1);
2105    }
2106 
2107    const struct dxil_mdnode *dx_type_annotations[] = { main_type_annotation };
2108    return dxil_add_metadata_named_node(&ctx->mod, "dx.typeAnnotations",
2109                                        dx_type_annotations,
2110                                        ARRAY_SIZE(dx_type_annotations)) &&
2111           dxil_add_metadata_named_node(&ctx->mod, "dx.entryPoints",
2112                                        &dx_entry_point, 1);
2113 }
2114 
2115 static const struct dxil_value *
bitcast_to_int(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)2116 bitcast_to_int(struct ntd_context *ctx, unsigned bit_size,
2117                const struct dxil_value *value)
2118 {
2119    const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size);
2120    if (!type)
2121       return NULL;
2122 
2123    return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
2124 }
2125 
2126 static const struct dxil_value *
bitcast_to_float(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)2127 bitcast_to_float(struct ntd_context *ctx, unsigned bit_size,
2128                  const struct dxil_value *value)
2129 {
2130    const struct dxil_type *type = dxil_module_get_float_type(&ctx->mod, bit_size);
2131    if (!type)
2132       return NULL;
2133 
2134    return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
2135 }
2136 
2137 static bool
is_phi_src(nir_def * ssa)2138 is_phi_src(nir_def *ssa)
2139 {
2140    nir_foreach_use(src, ssa)
2141       if (nir_src_parent_instr(src)->type == nir_instr_type_phi)
2142          return true;
2143    return false;
2144 }
2145 
2146 static void
store_ssa_def(struct ntd_context * ctx,nir_def * ssa,unsigned chan,const struct dxil_value * value)2147 store_ssa_def(struct ntd_context *ctx, nir_def *ssa, unsigned chan,
2148               const struct dxil_value *value)
2149 {
2150    assert(ssa->index < ctx->num_defs);
2151    assert(chan < ssa->num_components);
2152    /* Insert bitcasts for phi srcs in the parent block */
2153    if (is_phi_src(ssa)) {
2154       /* Prefer ints over floats if it could be both or if we have no type info */
2155       nir_alu_type expect_type =
2156          BITSET_TEST(ctx->int_types, ssa->index) ? nir_type_int :
2157          (BITSET_TEST(ctx->float_types, ssa->index) ? nir_type_float :
2158           nir_type_int);
2159       assert(ssa->bit_size != 1 || expect_type == nir_type_int);
2160       if (ssa->bit_size != 1 && expect_type != dxil_type_to_nir_type(dxil_value_get_type(value)))
2161          value = dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST,
2162                                 expect_type == nir_type_int ?
2163                                  dxil_module_get_int_type(&ctx->mod, ssa->bit_size) :
2164                                  dxil_module_get_float_type(&ctx->mod, ssa->bit_size), value);
2165       if (ssa->bit_size == 64) {
2166          if (expect_type == nir_type_int)
2167             ctx->mod.feats.int64_ops = true;
2168          if (expect_type == nir_type_float)
2169             ctx->mod.feats.doubles = true;
2170       }
2171    }
2172    ctx->defs[ssa->index].chans[chan] = value;
2173 }
2174 
2175 static void
store_def(struct ntd_context * ctx,nir_def * def,unsigned chan,const struct dxil_value * value)2176 store_def(struct ntd_context *ctx, nir_def *def, unsigned chan,
2177            const struct dxil_value *value)
2178 {
2179    const struct dxil_type *type = dxil_value_get_type(value);
2180    if (type == ctx->mod.float64_type)
2181       ctx->mod.feats.doubles = true;
2182    if (type == ctx->mod.float16_type ||
2183        type == ctx->mod.int16_type)
2184       ctx->mod.feats.min_precision = true;
2185    if (type == ctx->mod.int64_type)
2186       ctx->mod.feats.int64_ops = true;
2187    store_ssa_def(ctx, def, chan, value);
2188 }
2189 
2190 static void
store_alu_dest(struct ntd_context * ctx,nir_alu_instr * alu,unsigned chan,const struct dxil_value * value)2191 store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan,
2192                const struct dxil_value *value)
2193 {
2194    store_def(ctx, &alu->def, chan, value);
2195 }
2196 
2197 static const struct dxil_value *
get_src_ssa(struct ntd_context * ctx,const nir_def * ssa,unsigned chan)2198 get_src_ssa(struct ntd_context *ctx, const nir_def *ssa, unsigned chan)
2199 {
2200    assert(ssa->index < ctx->num_defs);
2201    assert(chan < ssa->num_components);
2202    assert(ctx->defs[ssa->index].chans[chan]);
2203    return ctx->defs[ssa->index].chans[chan];
2204 }
2205 
2206 static const struct dxil_value *
get_src(struct ntd_context * ctx,nir_src * src,unsigned chan,nir_alu_type type)2207 get_src(struct ntd_context *ctx, nir_src *src, unsigned chan,
2208         nir_alu_type type)
2209 {
2210    const struct dxil_value *value = get_src_ssa(ctx, src->ssa, chan);
2211 
2212    const int bit_size = nir_src_bit_size(*src);
2213 
2214    switch (nir_alu_type_get_base_type(type)) {
2215    case nir_type_int:
2216    case nir_type_uint: {
2217       const struct dxil_type *expect_type =  dxil_module_get_int_type(&ctx->mod, bit_size);
2218       /* nohing to do */
2219       if (dxil_value_type_equal_to(value, expect_type)) {
2220          assert(bit_size != 64 || ctx->mod.feats.int64_ops);
2221          return value;
2222       }
2223       if (bit_size == 64) {
2224          assert(ctx->mod.feats.doubles);
2225          ctx->mod.feats.int64_ops = true;
2226       }
2227       if (bit_size == 16)
2228          ctx->mod.feats.native_low_precision = true;
2229       assert(dxil_value_type_bitsize_equal_to(value, bit_size));
2230       return bitcast_to_int(ctx,  bit_size, value);
2231       }
2232 
2233    case nir_type_float:
2234       assert(nir_src_bit_size(*src) >= 16);
2235       if (dxil_value_type_equal_to(value, dxil_module_get_float_type(&ctx->mod, bit_size))) {
2236          assert(nir_src_bit_size(*src) != 64 || ctx->mod.feats.doubles);
2237          return value;
2238       }
2239       if (bit_size == 64) {
2240          assert(ctx->mod.feats.int64_ops);
2241          ctx->mod.feats.doubles = true;
2242       }
2243       if (bit_size == 16)
2244          ctx->mod.feats.native_low_precision = true;
2245       assert(dxil_value_type_bitsize_equal_to(value, bit_size));
2246       return bitcast_to_float(ctx, bit_size, value);
2247 
2248    case nir_type_bool:
2249       if (!dxil_value_type_bitsize_equal_to(value, 1)) {
2250          return dxil_emit_cast(&ctx->mod, DXIL_CAST_TRUNC,
2251                                dxil_module_get_int_type(&ctx->mod, 1), value);
2252       }
2253       return value;
2254 
2255    default:
2256       unreachable("unexpected nir_alu_type");
2257    }
2258 }
2259 
2260 static const struct dxil_value *
get_alu_src(struct ntd_context * ctx,nir_alu_instr * alu,unsigned src)2261 get_alu_src(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)
2262 {
2263    unsigned chan = alu->src[src].swizzle[0];
2264    return get_src(ctx, &alu->src[src].src, chan,
2265                   nir_op_infos[alu->op].input_types[src]);
2266 }
2267 
2268 static bool
emit_binop(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)2269 emit_binop(struct ntd_context *ctx, nir_alu_instr *alu,
2270            enum dxil_bin_opcode opcode,
2271            const struct dxil_value *op0, const struct dxil_value *op1)
2272 {
2273    bool is_float_op = nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float;
2274 
2275    enum dxil_opt_flags flags = 0;
2276    if (is_float_op && !alu->exact)
2277       flags |= DXIL_UNSAFE_ALGEBRA;
2278 
2279    const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, flags);
2280    if (!v)
2281       return false;
2282    store_alu_dest(ctx, alu, 0, v);
2283    return true;
2284 }
2285 
2286 static bool
emit_shift(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)2287 emit_shift(struct ntd_context *ctx, nir_alu_instr *alu,
2288            enum dxil_bin_opcode opcode,
2289            const struct dxil_value *op0, const struct dxil_value *op1)
2290 {
2291    unsigned op0_bit_size = nir_src_bit_size(alu->src[0].src);
2292    unsigned op1_bit_size = nir_src_bit_size(alu->src[1].src);
2293 
2294    uint64_t shift_mask = op0_bit_size - 1;
2295    if (!nir_src_is_const(alu->src[1].src)) {
2296       if (op0_bit_size != op1_bit_size) {
2297          const struct dxil_type *type =
2298             dxil_module_get_int_type(&ctx->mod, op0_bit_size);
2299          enum dxil_cast_opcode cast_op =
2300             op1_bit_size < op0_bit_size ? DXIL_CAST_ZEXT : DXIL_CAST_TRUNC;
2301          op1 = dxil_emit_cast(&ctx->mod, cast_op, type, op1);
2302       }
2303       op1 = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND,
2304                             op1,
2305                             dxil_module_get_int_const(&ctx->mod, shift_mask, op0_bit_size),
2306                             0);
2307    } else {
2308       uint64_t val = nir_scalar_as_uint(
2309          nir_scalar_chase_alu_src(nir_get_scalar(&alu->def, 0), 1));
2310       op1 = dxil_module_get_int_const(&ctx->mod, val & shift_mask, op0_bit_size);
2311    }
2312 
2313    const struct dxil_value *v =
2314       dxil_emit_binop(&ctx->mod, opcode, op0, op1, 0);
2315    if (!v)
2316       return false;
2317    store_alu_dest(ctx, alu, 0, v);
2318    return true;
2319 }
2320 
2321 static bool
emit_cmp(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_cmp_pred pred,const struct dxil_value * op0,const struct dxil_value * op1)2322 emit_cmp(struct ntd_context *ctx, nir_alu_instr *alu,
2323          enum dxil_cmp_pred pred,
2324          const struct dxil_value *op0, const struct dxil_value *op1)
2325 {
2326    const struct dxil_value *v = dxil_emit_cmp(&ctx->mod, pred, op0, op1);
2327    if (!v)
2328       return false;
2329    store_alu_dest(ctx, alu, 0, v);
2330    return true;
2331 }
2332 
2333 static enum dxil_cast_opcode
get_cast_op(nir_alu_instr * alu)2334 get_cast_op(nir_alu_instr *alu)
2335 {
2336    unsigned dst_bits = alu->def.bit_size;
2337    unsigned src_bits = nir_src_bit_size(alu->src[0].src);
2338 
2339    switch (alu->op) {
2340    /* bool -> int */
2341    case nir_op_b2i16:
2342    case nir_op_b2i32:
2343    case nir_op_b2i64:
2344       return DXIL_CAST_ZEXT;
2345 
2346    /* float -> float */
2347    case nir_op_f2f16_rtz:
2348    case nir_op_f2f16:
2349    case nir_op_f2fmp:
2350    case nir_op_f2f32:
2351    case nir_op_f2f64:
2352       assert(dst_bits != src_bits);
2353       if (dst_bits < src_bits)
2354          return DXIL_CAST_FPTRUNC;
2355       else
2356          return DXIL_CAST_FPEXT;
2357 
2358    /* int -> int */
2359    case nir_op_i2i1:
2360    case nir_op_i2i16:
2361    case nir_op_i2imp:
2362    case nir_op_i2i32:
2363    case nir_op_i2i64:
2364       assert(dst_bits != src_bits);
2365       if (dst_bits < src_bits)
2366          return DXIL_CAST_TRUNC;
2367       else
2368          return DXIL_CAST_SEXT;
2369 
2370    /* uint -> uint */
2371    case nir_op_u2u1:
2372    case nir_op_u2u16:
2373    case nir_op_u2u32:
2374    case nir_op_u2u64:
2375       assert(dst_bits != src_bits);
2376       if (dst_bits < src_bits)
2377          return DXIL_CAST_TRUNC;
2378       else
2379          return DXIL_CAST_ZEXT;
2380 
2381    /* float -> int */
2382    case nir_op_f2i16:
2383    case nir_op_f2imp:
2384    case nir_op_f2i32:
2385    case nir_op_f2i64:
2386       return DXIL_CAST_FPTOSI;
2387 
2388    /* float -> uint */
2389    case nir_op_f2u16:
2390    case nir_op_f2ump:
2391    case nir_op_f2u32:
2392    case nir_op_f2u64:
2393       return DXIL_CAST_FPTOUI;
2394 
2395    /* int -> float */
2396    case nir_op_i2f16:
2397    case nir_op_i2fmp:
2398    case nir_op_i2f32:
2399    case nir_op_i2f64:
2400       return DXIL_CAST_SITOFP;
2401 
2402    /* uint -> float */
2403    case nir_op_u2f16:
2404    case nir_op_u2fmp:
2405    case nir_op_u2f32:
2406    case nir_op_u2f64:
2407       return DXIL_CAST_UITOFP;
2408 
2409    default:
2410       unreachable("unexpected cast op");
2411    }
2412 }
2413 
2414 static const struct dxil_type *
get_cast_dest_type(struct ntd_context * ctx,nir_alu_instr * alu)2415 get_cast_dest_type(struct ntd_context *ctx, nir_alu_instr *alu)
2416 {
2417    unsigned dst_bits = alu->def.bit_size;
2418    switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type)) {
2419    case nir_type_bool:
2420       assert(dst_bits == 1);
2421       FALLTHROUGH;
2422    case nir_type_int:
2423    case nir_type_uint:
2424       return dxil_module_get_int_type(&ctx->mod, dst_bits);
2425 
2426    case nir_type_float:
2427       return dxil_module_get_float_type(&ctx->mod, dst_bits);
2428 
2429    default:
2430       unreachable("unknown nir_alu_type");
2431    }
2432 }
2433 
2434 static bool
is_double(nir_alu_type alu_type,unsigned bit_size)2435 is_double(nir_alu_type alu_type, unsigned bit_size)
2436 {
2437    return nir_alu_type_get_base_type(alu_type) == nir_type_float &&
2438           bit_size == 64;
2439 }
2440 
2441 static bool
emit_cast(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * value)2442 emit_cast(struct ntd_context *ctx, nir_alu_instr *alu,
2443           const struct dxil_value *value)
2444 {
2445    enum dxil_cast_opcode opcode = get_cast_op(alu);
2446    const struct dxil_type *type = get_cast_dest_type(ctx, alu);
2447    if (!type)
2448       return false;
2449 
2450    const nir_op_info *info = &nir_op_infos[alu->op];
2451    switch (opcode) {
2452    case DXIL_CAST_UITOFP:
2453    case DXIL_CAST_SITOFP:
2454       if (is_double(info->output_type, alu->def.bit_size))
2455          ctx->mod.feats.dx11_1_double_extensions = true;
2456       break;
2457    case DXIL_CAST_FPTOUI:
2458    case DXIL_CAST_FPTOSI:
2459       if (is_double(info->input_types[0], nir_src_bit_size(alu->src[0].src)))
2460          ctx->mod.feats.dx11_1_double_extensions = true;
2461       break;
2462    default:
2463       break;
2464    }
2465 
2466    if (alu->def.bit_size == 16) {
2467       switch (alu->op) {
2468       case nir_op_f2fmp:
2469       case nir_op_i2imp:
2470       case nir_op_f2imp:
2471       case nir_op_f2ump:
2472       case nir_op_i2fmp:
2473       case nir_op_u2fmp:
2474          break;
2475       default:
2476          ctx->mod.feats.native_low_precision = true;
2477       }
2478    }
2479 
2480    const struct dxil_value *v = dxil_emit_cast(&ctx->mod, opcode, type,
2481                                                value);
2482    if (!v)
2483       return false;
2484    store_alu_dest(ctx, alu, 0, v);
2485    return true;
2486 }
2487 
2488 static enum overload_type
get_overload(nir_alu_type alu_type,unsigned bit_size)2489 get_overload(nir_alu_type alu_type, unsigned bit_size)
2490 {
2491    switch (nir_alu_type_get_base_type(alu_type)) {
2492    case nir_type_int:
2493    case nir_type_uint:
2494    case nir_type_bool:
2495       switch (bit_size) {
2496       case 1: return DXIL_I1;
2497       case 16: return DXIL_I16;
2498       case 32: return DXIL_I32;
2499       case 64: return DXIL_I64;
2500       default:
2501          unreachable("unexpected bit_size");
2502       }
2503    case nir_type_float:
2504       switch (bit_size) {
2505       case 16: return DXIL_F16;
2506       case 32: return DXIL_F32;
2507       case 64: return DXIL_F64;
2508       default:
2509          unreachable("unexpected bit_size");
2510       }
2511    case nir_type_invalid:
2512       return DXIL_NONE;
2513    default:
2514       unreachable("unexpected output type");
2515    }
2516 }
2517 
2518 static enum overload_type
get_ambiguous_overload(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum overload_type default_type)2519 get_ambiguous_overload(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2520                        enum overload_type default_type)
2521 {
2522    if (BITSET_TEST(ctx->int_types, intr->def.index))
2523       return get_overload(nir_type_int, intr->def.bit_size);
2524    if (BITSET_TEST(ctx->float_types, intr->def.index))
2525       return get_overload(nir_type_float, intr->def.bit_size);
2526    return default_type;
2527 }
2528 
2529 static enum overload_type
get_ambiguous_overload_alu_type(struct ntd_context * ctx,nir_intrinsic_instr * intr,nir_alu_type alu_type)2530 get_ambiguous_overload_alu_type(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2531                                 nir_alu_type alu_type)
2532 {
2533    return get_ambiguous_overload(ctx, intr, get_overload(alu_type, intr->def.bit_size));
2534 }
2535 
2536 static bool
emit_unary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op)2537 emit_unary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2538                  enum dxil_intr intr, const struct dxil_value *op)
2539 {
2540    const nir_op_info *info = &nir_op_infos[alu->op];
2541    unsigned src_bits = nir_src_bit_size(alu->src[0].src);
2542    enum overload_type overload = get_overload(info->input_types[0], src_bits);
2543 
2544    const struct dxil_value *v = emit_unary_call(ctx, overload, intr, op);
2545    if (!v)
2546       return false;
2547    store_alu_dest(ctx, alu, 0, v);
2548    return true;
2549 }
2550 
2551 static bool
emit_binary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)2552 emit_binary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2553                   enum dxil_intr intr,
2554                   const struct dxil_value *op0, const struct dxil_value *op1)
2555 {
2556    const nir_op_info *info = &nir_op_infos[alu->op];
2557    assert(info->output_type == info->input_types[0]);
2558    assert(info->output_type == info->input_types[1]);
2559    unsigned dst_bits = alu->def.bit_size;
2560    assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2561    assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2562    enum overload_type overload = get_overload(info->output_type, dst_bits);
2563 
2564    const struct dxil_value *v = emit_binary_call(ctx, overload, intr,
2565                                                  op0, op1);
2566    if (!v)
2567       return false;
2568    store_alu_dest(ctx, alu, 0, v);
2569    return true;
2570 }
2571 
2572 static bool
emit_tertiary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)2573 emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2574                     enum dxil_intr intr,
2575                     const struct dxil_value *op0,
2576                     const struct dxil_value *op1,
2577                     const struct dxil_value *op2)
2578 {
2579    const nir_op_info *info = &nir_op_infos[alu->op];
2580    unsigned dst_bits = alu->def.bit_size;
2581    assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2582    assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2583    assert(nir_src_bit_size(alu->src[2].src) == dst_bits);
2584 
2585    assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[0], dst_bits));
2586    assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[1], dst_bits));
2587    assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[2], dst_bits));
2588 
2589    enum overload_type overload = get_overload(info->output_type, dst_bits);
2590 
2591    const struct dxil_value *v = emit_tertiary_call(ctx, overload, intr,
2592                                                    op0, op1, op2);
2593    if (!v)
2594       return false;
2595    store_alu_dest(ctx, alu, 0, v);
2596    return true;
2597 }
2598 
2599 static bool
emit_derivative(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_intr dxil_intr)2600 emit_derivative(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2601                  enum dxil_intr dxil_intr)
2602 {
2603    const struct dxil_value *src = get_src(ctx, &intr->src[0], 0, nir_type_float);
2604    enum overload_type overload = get_overload(nir_type_float, intr->src[0].ssa->bit_size);
2605    const struct dxil_value *v = emit_unary_call(ctx, overload, dxil_intr, src);
2606    if (!v)
2607       return false;
2608    store_def(ctx, &intr->def, 0, v);
2609    return true;
2610 }
2611 
2612 static bool
emit_bitfield_insert(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * base,const struct dxil_value * insert,const struct dxil_value * offset,const struct dxil_value * width)2613 emit_bitfield_insert(struct ntd_context *ctx, nir_alu_instr *alu,
2614                      const struct dxil_value *base,
2615                      const struct dxil_value *insert,
2616                      const struct dxil_value *offset,
2617                      const struct dxil_value *width)
2618 {
2619    /* DXIL is width, offset, insert, base, NIR is base, insert, offset, width */
2620    const struct dxil_value *v = emit_quaternary_call(ctx, DXIL_I32, DXIL_INTR_BFI,
2621                                                      width, offset, insert, base);
2622    if (!v)
2623       return false;
2624 
2625    /* DXIL uses the 5 LSB from width/offset. Special-case width >= 32 == copy insert. */
2626    const struct dxil_value *compare_width = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_SGE,
2627       width, dxil_module_get_int32_const(&ctx->mod, 32));
2628    v = dxil_emit_select(&ctx->mod, compare_width, insert, v);
2629    store_alu_dest(ctx, alu, 0, v);
2630    return true;
2631 }
2632 
2633 static bool
emit_dot4add_packed(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * src0,const struct dxil_value * src1,const struct dxil_value * accum)2634 emit_dot4add_packed(struct ntd_context *ctx, nir_alu_instr *alu,
2635                     enum dxil_intr intr,
2636                     const struct dxil_value *src0,
2637                     const struct dxil_value *src1,
2638                     const struct dxil_value *accum)
2639 {
2640    const struct dxil_func *f = dxil_get_function(&ctx->mod, "dx.op.dot4AddPacked", DXIL_I32);
2641    if (!f)
2642       return false;
2643    const struct dxil_value *srcs[] = { dxil_module_get_int32_const(&ctx->mod, intr), accum, src0, src1 };
2644    const struct dxil_value *v = dxil_emit_call(&ctx->mod, f, srcs, ARRAY_SIZE(srcs));
2645    if (!v)
2646       return false;
2647 
2648    store_alu_dest(ctx, alu, 0, v);
2649    return true;
2650 }
2651 
emit_select(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * sel,const struct dxil_value * val_true,const struct dxil_value * val_false)2652 static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu,
2653                         const struct dxil_value *sel,
2654                         const struct dxil_value *val_true,
2655                         const struct dxil_value *val_false)
2656 {
2657    assert(sel);
2658    assert(val_true);
2659    assert(val_false);
2660 
2661    const struct dxil_value *v = dxil_emit_select(&ctx->mod, sel, val_true, val_false);
2662    if (!v)
2663       return false;
2664 
2665    store_alu_dest(ctx, alu, 0, v);
2666    return true;
2667 }
2668 
2669 static bool
emit_b2f16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2670 emit_b2f16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2671 {
2672    assert(val);
2673 
2674    struct dxil_module *m = &ctx->mod;
2675 
2676    const struct dxil_value *c1 = dxil_module_get_float16_const(m, 0x3C00);
2677    const struct dxil_value *c0 = dxil_module_get_float16_const(m, 0);
2678 
2679    if (!c0 || !c1)
2680       return false;
2681 
2682    return emit_select(ctx, alu, val, c1, c0);
2683 }
2684 
2685 static bool
emit_b2f32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2686 emit_b2f32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2687 {
2688    assert(val);
2689 
2690    struct dxil_module *m = &ctx->mod;
2691 
2692    const struct dxil_value *c1 = dxil_module_get_float_const(m, 1.0f);
2693    const struct dxil_value *c0 = dxil_module_get_float_const(m, 0.0f);
2694 
2695    if (!c0 || !c1)
2696       return false;
2697 
2698    return emit_select(ctx, alu, val, c1, c0);
2699 }
2700 
2701 static bool
emit_b2f64(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2702 emit_b2f64(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2703 {
2704    assert(val);
2705 
2706    struct dxil_module *m = &ctx->mod;
2707 
2708    const struct dxil_value *c1 = dxil_module_get_double_const(m, 1.0);
2709    const struct dxil_value *c0 = dxil_module_get_double_const(m, 0.0);
2710 
2711    if (!c0 || !c1)
2712       return false;
2713 
2714    ctx->mod.feats.doubles = 1;
2715    return emit_select(ctx, alu, val, c1, c0);
2716 }
2717 
2718 static bool
emit_f16tof32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val,bool shift)2719 emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val, bool shift)
2720 {
2721    if (shift) {
2722       val = dxil_emit_binop(&ctx->mod, DXIL_BINOP_LSHR, val,
2723          dxil_module_get_int32_const(&ctx->mod, 16), 0);
2724       if (!val)
2725          return false;
2726    }
2727 
2728    const struct dxil_func *func = dxil_get_function(&ctx->mod,
2729                                                     "dx.op.legacyF16ToF32",
2730                                                     DXIL_NONE);
2731    if (!func)
2732       return false;
2733 
2734    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F16TOF32);
2735    if (!opcode)
2736       return false;
2737 
2738    const struct dxil_value *args[] = {
2739      opcode,
2740      val
2741    };
2742 
2743    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2744    if (!v)
2745       return false;
2746    store_alu_dest(ctx, alu, 0, v);
2747    return true;
2748 }
2749 
2750 static bool
emit_f32tof16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val0,const struct dxil_value * val1)2751 emit_f32tof16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val0, const struct dxil_value *val1)
2752 {
2753    const struct dxil_func *func = dxil_get_function(&ctx->mod,
2754                                                     "dx.op.legacyF32ToF16",
2755                                                     DXIL_NONE);
2756    if (!func)
2757       return false;
2758 
2759    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F32TOF16);
2760    if (!opcode)
2761       return false;
2762 
2763    const struct dxil_value *args[] = {
2764      opcode,
2765      val0
2766    };
2767 
2768    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2769    if (!v)
2770       return false;
2771 
2772    if (!nir_src_is_const(alu->src[1].src) || nir_src_as_int(alu->src[1].src) != 0) {
2773       args[1] = val1;
2774       const struct dxil_value *v_high = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2775       if (!v_high)
2776          return false;
2777 
2778       v_high = dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL, v_high,
2779          dxil_module_get_int32_const(&ctx->mod, 16), 0);
2780       if (!v_high)
2781          return false;
2782 
2783       v = dxil_emit_binop(&ctx->mod, DXIL_BINOP_OR, v, v_high, 0);
2784       if (!v)
2785          return false;
2786    }
2787 
2788    store_alu_dest(ctx, alu, 0, v);
2789    return true;
2790 }
2791 
2792 static bool
emit_vec(struct ntd_context * ctx,nir_alu_instr * alu,unsigned num_inputs)2793 emit_vec(struct ntd_context *ctx, nir_alu_instr *alu, unsigned num_inputs)
2794 {
2795    for (unsigned i = 0; i < num_inputs; i++) {
2796       const struct dxil_value *src =
2797          get_src_ssa(ctx, alu->src[i].src.ssa, alu->src[i].swizzle[0]);
2798       if (!src)
2799          return false;
2800 
2801       store_alu_dest(ctx, alu, i, src);
2802    }
2803    return true;
2804 }
2805 
2806 static bool
emit_make_double(struct ntd_context * ctx,nir_alu_instr * alu)2807 emit_make_double(struct ntd_context *ctx, nir_alu_instr *alu)
2808 {
2809    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.makeDouble", DXIL_F64);
2810    if (!func)
2811       return false;
2812 
2813    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_MAKE_DOUBLE);
2814    if (!opcode)
2815       return false;
2816 
2817    const struct dxil_value *args[3] = {
2818       opcode,
2819       get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_uint32),
2820       get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[1], nir_type_uint32),
2821    };
2822    if (!args[1] || !args[2])
2823       return false;
2824 
2825    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2826    if (!v)
2827       return false;
2828    store_def(ctx, &alu->def, 0, v);
2829    return true;
2830 }
2831 
2832 static bool
emit_split_double(struct ntd_context * ctx,nir_alu_instr * alu)2833 emit_split_double(struct ntd_context *ctx, nir_alu_instr *alu)
2834 {
2835    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.splitDouble", DXIL_F64);
2836    if (!func)
2837       return false;
2838 
2839    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SPLIT_DOUBLE);
2840    if (!opcode)
2841       return false;
2842 
2843    const struct dxil_value *args[] = {
2844       opcode,
2845       get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_float64)
2846    };
2847    if (!args[1])
2848       return false;
2849 
2850    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2851    if (!v)
2852       return false;
2853 
2854    const struct dxil_value *hi = dxil_emit_extractval(&ctx->mod, v, 0);
2855    const struct dxil_value *lo = dxil_emit_extractval(&ctx->mod, v, 1);
2856    if (!hi || !lo)
2857       return false;
2858 
2859    store_def(ctx, &alu->def, 0, hi);
2860    store_def(ctx, &alu->def, 1, lo);
2861    return true;
2862 }
2863 
2864 static bool
emit_alu(struct ntd_context * ctx,nir_alu_instr * alu)2865 emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
2866 {
2867    /* handle vec-instructions first; they are the only ones that produce
2868     * vector results.
2869     */
2870    switch (alu->op) {
2871    case nir_op_vec2:
2872    case nir_op_vec3:
2873    case nir_op_vec4:
2874    case nir_op_vec8:
2875    case nir_op_vec16:
2876       return emit_vec(ctx, alu, nir_op_infos[alu->op].num_inputs);
2877    case nir_op_mov: {
2878          assert(alu->def.num_components == 1);
2879          store_ssa_def(ctx, &alu->def, 0, get_src_ssa(ctx,
2880                         alu->src->src.ssa, alu->src->swizzle[0]));
2881          return true;
2882       }
2883    case nir_op_pack_double_2x32_dxil:
2884       return emit_make_double(ctx, alu);
2885    case nir_op_unpack_double_2x32_dxil:
2886       return emit_split_double(ctx, alu);
2887    case nir_op_bcsel: {
2888       /* Handled here to avoid type forced bitcast to int, since bcsel is used for ints and floats.
2889        * Ideally, the back-typing got both sources to match, but if it didn't, explicitly get src1's type */
2890       const struct dxil_value *src1 = get_src_ssa(ctx, alu->src[1].src.ssa, alu->src[1].swizzle[0]);
2891       nir_alu_type src1_type = dxil_type_to_nir_type(dxil_value_get_type(src1));
2892       return emit_select(ctx, alu,
2893                          get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_bool),
2894                          src1,
2895                          get_src(ctx, &alu->src[2].src, alu->src[2].swizzle[0], src1_type));
2896    }
2897    default:
2898       /* silence warnings */
2899       ;
2900    }
2901 
2902    /* other ops should be scalar */
2903    const struct dxil_value *src[4];
2904    assert(nir_op_infos[alu->op].num_inputs <= 4);
2905    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
2906       src[i] = get_alu_src(ctx, alu, i);
2907       if (!src[i])
2908          return false;
2909    }
2910 
2911    switch (alu->op) {
2912    case nir_op_iadd:
2913    case nir_op_fadd: return emit_binop(ctx, alu, DXIL_BINOP_ADD, src[0], src[1]);
2914 
2915    case nir_op_isub:
2916    case nir_op_fsub: return emit_binop(ctx, alu, DXIL_BINOP_SUB, src[0], src[1]);
2917 
2918    case nir_op_imul:
2919    case nir_op_fmul: return emit_binop(ctx, alu, DXIL_BINOP_MUL, src[0], src[1]);
2920 
2921    case nir_op_fdiv:
2922       if (alu->def.bit_size == 64)
2923          ctx->mod.feats.dx11_1_double_extensions = 1;
2924       return emit_binop(ctx, alu, DXIL_BINOP_SDIV, src[0], src[1]);
2925 
2926    case nir_op_idiv:
2927    case nir_op_udiv:
2928       if (nir_src_is_const(alu->src[1].src)) {
2929          /* It's illegal to emit a literal divide by 0 in DXIL */
2930          nir_scalar divisor = nir_scalar_chase_alu_src(nir_get_scalar(&alu->def, 0), 1);
2931          if (nir_scalar_as_int(divisor) == 0) {
2932             store_alu_dest(ctx, alu, 0,
2933                            dxil_module_get_int_const(&ctx->mod, 0, alu->def.bit_size));
2934             return true;
2935          }
2936       }
2937       return emit_binop(ctx, alu, alu->op == nir_op_idiv ? DXIL_BINOP_SDIV : DXIL_BINOP_UDIV, src[0], src[1]);
2938 
2939    case nir_op_irem: return emit_binop(ctx, alu, DXIL_BINOP_SREM, src[0], src[1]);
2940    case nir_op_imod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2941    case nir_op_umod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2942    case nir_op_ishl: return emit_shift(ctx, alu, DXIL_BINOP_SHL, src[0], src[1]);
2943    case nir_op_ishr: return emit_shift(ctx, alu, DXIL_BINOP_ASHR, src[0], src[1]);
2944    case nir_op_ushr: return emit_shift(ctx, alu, DXIL_BINOP_LSHR, src[0], src[1]);
2945    case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]);
2946    case nir_op_ior:  return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]);
2947    case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]);
2948    case nir_op_inot: {
2949       unsigned bit_size = alu->def.bit_size;
2950       intmax_t val = bit_size == 1 ? 1 : -1;
2951       const struct dxil_value *negative_one = dxil_module_get_int_const(&ctx->mod, val, bit_size);
2952       return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], negative_one);
2953    }
2954    case nir_op_ieq:  return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]);
2955    case nir_op_ine:  return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]);
2956    case nir_op_ige:  return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]);
2957    case nir_op_uge:  return emit_cmp(ctx, alu, DXIL_ICMP_UGE, src[0], src[1]);
2958    case nir_op_ilt:  return emit_cmp(ctx, alu, DXIL_ICMP_SLT, src[0], src[1]);
2959    case nir_op_ult:  return emit_cmp(ctx, alu, DXIL_ICMP_ULT, src[0], src[1]);
2960    case nir_op_feq:  return emit_cmp(ctx, alu, DXIL_FCMP_OEQ, src[0], src[1]);
2961    case nir_op_fneu: return emit_cmp(ctx, alu, DXIL_FCMP_UNE, src[0], src[1]);
2962    case nir_op_flt:  return emit_cmp(ctx, alu, DXIL_FCMP_OLT, src[0], src[1]);
2963    case nir_op_fge:  return emit_cmp(ctx, alu, DXIL_FCMP_OGE, src[0], src[1]);
2964    case nir_op_ftrunc: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_Z, src[0]);
2965    case nir_op_fabs: return emit_unary_intin(ctx, alu, DXIL_INTR_FABS, src[0]);
2966    case nir_op_fcos: return emit_unary_intin(ctx, alu, DXIL_INTR_FCOS, src[0]);
2967    case nir_op_fsin: return emit_unary_intin(ctx, alu, DXIL_INTR_FSIN, src[0]);
2968    case nir_op_fceil: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_PI, src[0]);
2969    case nir_op_fexp2: return emit_unary_intin(ctx, alu, DXIL_INTR_FEXP2, src[0]);
2970    case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]);
2971    case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]);
2972    case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]);
2973    case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]);
2974    case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]);
2975 
2976    case nir_op_fround_even: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NE, src[0]);
2977    case nir_op_frcp: {
2978       const struct dxil_value *one;
2979       switch (alu->def.bit_size) {
2980       case 16:
2981          one = dxil_module_get_float16_const(&ctx->mod, 0x3C00);
2982          break;
2983       case 32:
2984          one = dxil_module_get_float_const(&ctx->mod, 1.0f);
2985          break;
2986       case 64:
2987          one = dxil_module_get_double_const(&ctx->mod, 1.0);
2988          break;
2989       default: unreachable("Invalid float size");
2990       }
2991       return emit_binop(ctx, alu, DXIL_BINOP_SDIV, one, src[0]);
2992    }
2993    case nir_op_fsat: return emit_unary_intin(ctx, alu, DXIL_INTR_SATURATE, src[0]);
2994    case nir_op_bit_count: return emit_unary_intin(ctx, alu, DXIL_INTR_COUNTBITS, src[0]);
2995    case nir_op_bitfield_reverse: return emit_unary_intin(ctx, alu, DXIL_INTR_BFREV, src[0]);
2996    case nir_op_ufind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_HI, src[0]);
2997    case nir_op_ifind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_SHI, src[0]);
2998    case nir_op_find_lsb: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_LO, src[0]);
2999    case nir_op_imax: return emit_binary_intin(ctx, alu, DXIL_INTR_IMAX, src[0], src[1]);
3000    case nir_op_imin: return emit_binary_intin(ctx, alu, DXIL_INTR_IMIN, src[0], src[1]);
3001    case nir_op_umax: return emit_binary_intin(ctx, alu, DXIL_INTR_UMAX, src[0], src[1]);
3002    case nir_op_umin: return emit_binary_intin(ctx, alu, DXIL_INTR_UMIN, src[0], src[1]);
3003    case nir_op_frsq: return emit_unary_intin(ctx, alu, DXIL_INTR_RSQRT, src[0]);
3004    case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]);
3005    case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]);
3006    case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]);
3007    case nir_op_ffma:
3008       if (alu->def.bit_size == 64)
3009          ctx->mod.feats.dx11_1_double_extensions = 1;
3010       return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]);
3011 
3012    case nir_op_ibfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_IBFE, src[2], src[1], src[0]);
3013    case nir_op_ubfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_UBFE, src[2], src[1], src[0]);
3014    case nir_op_bitfield_insert: return emit_bitfield_insert(ctx, alu, src[0], src[1], src[2], src[3]);
3015 
3016    case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0], false);
3017    case nir_op_unpack_half_2x16_split_y: return emit_f16tof32(ctx, alu, src[0], true);
3018    case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0], src[1]);
3019 
3020    case nir_op_sdot_4x8_iadd: return emit_dot4add_packed(ctx, alu, DXIL_INTR_DOT4_ADD_I8_PACKED, src[0], src[1], src[2]);
3021    case nir_op_udot_4x8_uadd: return emit_dot4add_packed(ctx, alu, DXIL_INTR_DOT4_ADD_U8_PACKED, src[0], src[1], src[2]);
3022 
3023    case nir_op_i2i1:
3024    case nir_op_u2u1:
3025    case nir_op_b2i16:
3026    case nir_op_i2i16:
3027    case nir_op_i2imp:
3028    case nir_op_f2i16:
3029    case nir_op_f2imp:
3030    case nir_op_f2u16:
3031    case nir_op_f2ump:
3032    case nir_op_u2u16:
3033    case nir_op_u2f16:
3034    case nir_op_u2fmp:
3035    case nir_op_i2f16:
3036    case nir_op_i2fmp:
3037    case nir_op_f2f16_rtz:
3038    case nir_op_f2f16:
3039    case nir_op_f2fmp:
3040    case nir_op_b2i32:
3041    case nir_op_f2f32:
3042    case nir_op_f2i32:
3043    case nir_op_f2u32:
3044    case nir_op_i2f32:
3045    case nir_op_i2i32:
3046    case nir_op_u2f32:
3047    case nir_op_u2u32:
3048    case nir_op_b2i64:
3049    case nir_op_f2f64:
3050    case nir_op_f2i64:
3051    case nir_op_f2u64:
3052    case nir_op_i2f64:
3053    case nir_op_i2i64:
3054    case nir_op_u2f64:
3055    case nir_op_u2u64:
3056       return emit_cast(ctx, alu, src[0]);
3057 
3058    case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]);
3059    case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]);
3060    case nir_op_b2f64: return emit_b2f64(ctx, alu, src[0]);
3061    default:
3062       log_nir_instr_unsupported(ctx->logger, "Unimplemented ALU instruction",
3063                                 &alu->instr);
3064       return false;
3065    }
3066 }
3067 
3068 static const struct dxil_value *
load_ubo(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * offset,enum overload_type overload)3069 load_ubo(struct ntd_context *ctx, const struct dxil_value *handle,
3070          const struct dxil_value *offset, enum overload_type overload)
3071 {
3072    assert(handle && offset);
3073 
3074    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CBUFFER_LOAD_LEGACY);
3075    if (!opcode)
3076       return NULL;
3077 
3078    const struct dxil_value *args[] = {
3079       opcode, handle, offset
3080    };
3081 
3082    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cbufferLoadLegacy", overload);
3083    if (!func)
3084       return NULL;
3085    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3086 }
3087 
3088 static bool
emit_barrier_impl(struct ntd_context * ctx,nir_variable_mode modes,mesa_scope execution_scope,mesa_scope mem_scope)3089 emit_barrier_impl(struct ntd_context *ctx, nir_variable_mode modes, mesa_scope execution_scope, mesa_scope mem_scope)
3090 {
3091    const struct dxil_value *opcode, *mode;
3092    const struct dxil_func *func;
3093    uint32_t flags = 0;
3094 
3095    if (execution_scope == SCOPE_WORKGROUP)
3096       flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP;
3097 
3098    bool is_compute = ctx->mod.shader_kind == DXIL_COMPUTE_SHADER;
3099 
3100    if ((modes & (nir_var_mem_ssbo | nir_var_mem_global | nir_var_image)) &&
3101        (mem_scope > SCOPE_WORKGROUP || !is_compute)) {
3102       flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL;
3103    } else {
3104       flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP;
3105    }
3106 
3107    if ((modes & nir_var_mem_shared) && is_compute)
3108       flags |= DXIL_BARRIER_MODE_GROUPSHARED_MEM_FENCE;
3109 
3110    func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE);
3111    if (!func)
3112       return false;
3113 
3114    opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER);
3115    if (!opcode)
3116       return false;
3117 
3118    mode = dxil_module_get_int32_const(&ctx->mod, flags);
3119    if (!mode)
3120       return false;
3121 
3122    const struct dxil_value *args[] = { opcode, mode };
3123 
3124    return dxil_emit_call_void(&ctx->mod, func,
3125                               args, ARRAY_SIZE(args));
3126 }
3127 
3128 static bool
emit_barrier(struct ntd_context * ctx,nir_intrinsic_instr * intr)3129 emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3130 {
3131    return emit_barrier_impl(ctx,
3132       nir_intrinsic_memory_modes(intr),
3133       nir_intrinsic_execution_scope(intr),
3134       nir_intrinsic_memory_scope(intr));
3135 }
3136 
3137 static bool
emit_load_global_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3138 emit_load_global_invocation_id(struct ntd_context *ctx,
3139                                     nir_intrinsic_instr *intr)
3140 {
3141    nir_component_mask_t comps = nir_def_components_read(&intr->def);
3142 
3143    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3144       if (comps & (1 << i)) {
3145          const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
3146          if (!idx)
3147             return false;
3148          const struct dxil_value *globalid = emit_threadid_call(ctx, idx);
3149 
3150          if (!globalid)
3151             return false;
3152 
3153          store_def(ctx, &intr->def, i, globalid);
3154       }
3155    }
3156    return true;
3157 }
3158 
3159 static bool
emit_load_local_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3160 emit_load_local_invocation_id(struct ntd_context *ctx,
3161                               nir_intrinsic_instr *intr)
3162 {
3163    nir_component_mask_t comps = nir_def_components_read(&intr->def);
3164 
3165    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3166       if (comps & (1 << i)) {
3167          const struct dxil_value
3168             *idx = dxil_module_get_int32_const(&ctx->mod, i);
3169          if (!idx)
3170             return false;
3171          const struct dxil_value
3172             *threadidingroup = emit_threadidingroup_call(ctx, idx);
3173          if (!threadidingroup)
3174             return false;
3175          store_def(ctx, &intr->def, i, threadidingroup);
3176       }
3177    }
3178    return true;
3179 }
3180 
3181 static bool
emit_load_local_invocation_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)3182 emit_load_local_invocation_index(struct ntd_context *ctx,
3183                                  nir_intrinsic_instr *intr)
3184 {
3185    const struct dxil_value
3186       *flattenedthreadidingroup = emit_flattenedthreadidingroup_call(ctx);
3187    if (!flattenedthreadidingroup)
3188       return false;
3189    store_def(ctx, &intr->def, 0, flattenedthreadidingroup);
3190 
3191    return true;
3192 }
3193 
3194 static bool
emit_load_local_workgroup_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)3195 emit_load_local_workgroup_id(struct ntd_context *ctx,
3196                               nir_intrinsic_instr *intr)
3197 {
3198    nir_component_mask_t comps = nir_def_components_read(&intr->def);
3199 
3200    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3201       if (comps & (1 << i)) {
3202          const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
3203          if (!idx)
3204             return false;
3205          const struct dxil_value *groupid = emit_groupid_call(ctx, idx);
3206          if (!groupid)
3207             return false;
3208          store_def(ctx, &intr->def, i, groupid);
3209       }
3210    }
3211    return true;
3212 }
3213 
3214 static const struct dxil_value *
call_unary_external_function(struct ntd_context * ctx,const char * name,int32_t dxil_intr,enum overload_type overload)3215 call_unary_external_function(struct ntd_context *ctx,
3216                              const char *name,
3217                              int32_t dxil_intr,
3218                              enum overload_type overload)
3219 {
3220    const struct dxil_func *func =
3221       dxil_get_function(&ctx->mod, name, overload);
3222    if (!func)
3223       return false;
3224 
3225    const struct dxil_value *opcode =
3226       dxil_module_get_int32_const(&ctx->mod, dxil_intr);
3227    if (!opcode)
3228       return false;
3229 
3230    const struct dxil_value *args[] = {opcode};
3231 
3232    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3233 }
3234 
3235 static bool
emit_load_unary_external_function(struct ntd_context * ctx,nir_intrinsic_instr * intr,const char * name,int32_t dxil_intr,nir_alu_type type)3236 emit_load_unary_external_function(struct ntd_context *ctx,
3237                                   nir_intrinsic_instr *intr, const char *name,
3238                                   int32_t dxil_intr,
3239                                   nir_alu_type type)
3240 {
3241    const struct dxil_value *value = call_unary_external_function(ctx, name, dxil_intr,
3242                                                                  get_overload(type, intr->def.bit_size));
3243    store_def(ctx, &intr->def, 0, value);
3244 
3245    return true;
3246 }
3247 
3248 static bool
emit_load_sample_mask_in(struct ntd_context * ctx,nir_intrinsic_instr * intr)3249 emit_load_sample_mask_in(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3250 {
3251    const struct dxil_value *value = call_unary_external_function(ctx,
3252       "dx.op.coverage", DXIL_INTR_COVERAGE, DXIL_I32);
3253 
3254    /* Mask coverage with (1 << sample index). Note, done as an AND to handle extrapolation cases. */
3255    if (ctx->mod.info.has_per_sample_input) {
3256       value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND, value,
3257          dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL,
3258             dxil_module_get_int32_const(&ctx->mod, 1),
3259             call_unary_external_function(ctx, "dx.op.sampleIndex", DXIL_INTR_SAMPLE_INDEX, DXIL_I32), 0), 0);
3260    }
3261 
3262    store_def(ctx, &intr->def, 0, value);
3263    return true;
3264 }
3265 
3266 static bool
emit_load_tess_coord(struct ntd_context * ctx,nir_intrinsic_instr * intr)3267 emit_load_tess_coord(struct ntd_context *ctx,
3268                      nir_intrinsic_instr *intr)
3269 {
3270    const struct dxil_func *func =
3271       dxil_get_function(&ctx->mod, "dx.op.domainLocation", DXIL_F32);
3272    if (!func)
3273       return false;
3274 
3275    const struct dxil_value *opcode =
3276       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DOMAIN_LOCATION);
3277    if (!opcode)
3278       return false;
3279 
3280    unsigned num_coords = ctx->shader->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES ? 3 : 2;
3281    for (unsigned i = 0; i < num_coords; ++i) {
3282       unsigned component_idx = i;
3283 
3284       const struct dxil_value *component = dxil_module_get_int8_const(&ctx->mod, component_idx);
3285       if (!component)
3286          return false;
3287 
3288       const struct dxil_value *args[] = { opcode, component };
3289 
3290       const struct dxil_value *value =
3291          dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3292       store_def(ctx, &intr->def, i, value);
3293    }
3294 
3295    for (unsigned i = num_coords; i < intr->def.num_components; ++i) {
3296       const struct dxil_value *value = dxil_module_get_float_const(&ctx->mod, 0.0f);
3297       store_def(ctx, &intr->def, i, value);
3298    }
3299 
3300    return true;
3301 }
3302 
3303 static const struct dxil_value *
get_int32_undef(struct dxil_module * m)3304 get_int32_undef(struct dxil_module *m)
3305 {
3306    const struct dxil_type *int32_type =
3307       dxil_module_get_int_type(m, 32);
3308    if (!int32_type)
3309       return NULL;
3310 
3311    return dxil_module_get_undef(m, int32_type);
3312 }
3313 
3314 static const struct dxil_value *
get_resource_handle(struct ntd_context * ctx,nir_src * src,enum dxil_resource_class class,enum dxil_resource_kind kind)3315 get_resource_handle(struct ntd_context *ctx, nir_src *src, enum dxil_resource_class class,
3316                     enum dxil_resource_kind kind)
3317 {
3318    /* This source might be one of:
3319     * 1. Constant resource index - just look it up in precomputed handle arrays
3320     *    If it's null in that array, create a handle
3321     * 2. A handle from load_vulkan_descriptor - just get the stored SSA value
3322     * 3. Dynamic resource index - create a handle for it here
3323     */
3324    assert(src->ssa->num_components == 1 && src->ssa->bit_size == 32);
3325    nir_const_value *const_block_index = nir_src_as_const_value(*src);
3326    const struct dxil_value *handle_entry = NULL;
3327    if (const_block_index) {
3328       assert(ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN);
3329       switch (kind) {
3330       case DXIL_RESOURCE_KIND_CBUFFER:
3331          handle_entry = ctx->cbv_handles[const_block_index->u32];
3332          break;
3333       case DXIL_RESOURCE_KIND_RAW_BUFFER:
3334          if (class == DXIL_RESOURCE_CLASS_UAV)
3335             handle_entry = ctx->ssbo_handles[const_block_index->u32];
3336          else
3337             handle_entry = ctx->srv_handles[const_block_index->u32];
3338          break;
3339       case DXIL_RESOURCE_KIND_SAMPLER:
3340          handle_entry = ctx->sampler_handles[const_block_index->u32];
3341          break;
3342       default:
3343          if (class == DXIL_RESOURCE_CLASS_UAV)
3344             handle_entry = ctx->image_handles[const_block_index->u32];
3345          else
3346             handle_entry = ctx->srv_handles[const_block_index->u32];
3347          break;
3348       }
3349    }
3350 
3351    if (handle_entry)
3352       return handle_entry;
3353 
3354    if (nir_src_as_deref(*src) ||
3355        ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
3356       return get_src_ssa(ctx, src->ssa, 0);
3357    }
3358 
3359    unsigned space = 0;
3360    if (ctx->opts->environment == DXIL_ENVIRONMENT_GL &&
3361        class == DXIL_RESOURCE_CLASS_UAV) {
3362       if (kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
3363          space = 2;
3364       else
3365          space = 1;
3366    }
3367 
3368    /* The base binding here will almost always be zero. The only cases where we end
3369     * up in this type of dynamic indexing are:
3370     * 1. GL UBOs
3371     * 2. GL SSBOs
3372     * 3. CL SSBOs
3373     * In all cases except GL UBOs, the resources are a single zero-based array.
3374     * In that case, the base is 1, because uniforms use 0 and cannot by dynamically
3375     * indexed. All other cases should either fall into static indexing (first early return),
3376     * deref-based dynamic handle creation (images, or Vulkan textures/samplers), or
3377     * load_vulkan_descriptor handle creation.
3378     */
3379    unsigned base_binding = 0;
3380    if (ctx->shader->info.first_ubo_is_default_ubo &&
3381        class == DXIL_RESOURCE_CLASS_CBV)
3382       base_binding = 1;
3383 
3384    const struct dxil_value *value = get_src(ctx, src, 0, nir_type_uint);
3385    const struct dxil_value *handle = emit_createhandle_call_dynamic(ctx, class,
3386       space, base_binding, value, !const_block_index);
3387 
3388    return handle;
3389 }
3390 
3391 static const struct dxil_value *
create_image_handle(struct ntd_context * ctx,nir_intrinsic_instr * image_intr)3392 create_image_handle(struct ntd_context *ctx, nir_intrinsic_instr *image_intr)
3393 {
3394    const struct dxil_value *unannotated_handle =
3395       emit_createhandle_heap(ctx, get_src(ctx, &image_intr->src[0], 0, nir_type_uint32), false, true /*TODO: divergence*/);
3396    const struct dxil_value *res_props =
3397       dxil_module_get_uav_res_props_const(&ctx->mod, image_intr);
3398 
3399    if (!unannotated_handle || !res_props)
3400       return NULL;
3401 
3402    return emit_annotate_handle(ctx, unannotated_handle, res_props);
3403 }
3404 
3405 static const struct dxil_value *
create_srv_handle(struct ntd_context * ctx,nir_tex_instr * tex,nir_src * src)3406 create_srv_handle(struct ntd_context *ctx, nir_tex_instr *tex, nir_src *src)
3407 {
3408    const struct dxil_value *unannotated_handle =
3409       emit_createhandle_heap(ctx, get_src(ctx, src, 0, nir_type_uint32), false, true /*TODO: divergence*/);
3410    const struct dxil_value *res_props =
3411       dxil_module_get_srv_res_props_const(&ctx->mod, tex);
3412 
3413    if (!unannotated_handle || !res_props)
3414       return NULL;
3415 
3416    return emit_annotate_handle(ctx, unannotated_handle, res_props);
3417 }
3418 
3419 static const struct dxil_value *
create_sampler_handle(struct ntd_context * ctx,bool is_shadow,nir_src * src)3420 create_sampler_handle(struct ntd_context *ctx, bool is_shadow, nir_src *src)
3421 {
3422    const struct dxil_value *unannotated_handle =
3423       emit_createhandle_heap(ctx, get_src(ctx, src, 0, nir_type_uint32), true, true /*TODO: divergence*/);
3424    const struct dxil_value *res_props =
3425       dxil_module_get_sampler_res_props_const(&ctx->mod, is_shadow);
3426 
3427    if (!unannotated_handle || !res_props)
3428       return NULL;
3429 
3430    return emit_annotate_handle(ctx, unannotated_handle, res_props);
3431 }
3432 
3433 static bool
emit_load_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)3434 emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3435 {
3436    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3437 
3438    enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
3439    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
3440       nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
3441       if (var && var->data.access & ACCESS_NON_WRITEABLE)
3442          class = DXIL_RESOURCE_CLASS_SRV;
3443    }
3444 
3445    const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
3446    const struct dxil_value *offset =
3447       get_src(ctx, &intr->src[1], 0, nir_type_uint);
3448    if (!int32_undef || !handle || !offset)
3449       return false;
3450 
3451    assert(nir_src_bit_size(intr->src[0]) == 32);
3452    assert(nir_intrinsic_dest_components(intr) <= 4);
3453 
3454    const struct dxil_value *coord[2] = {
3455       offset,
3456       int32_undef
3457    };
3458 
3459    enum overload_type overload = get_ambiguous_overload_alu_type(ctx, intr, nir_type_uint);
3460    const struct dxil_value *load = ctx->mod.minor_version >= 2 ?
3461       emit_raw_bufferload_call(ctx, handle, coord,
3462                                overload,
3463                                nir_intrinsic_dest_components(intr),
3464                                intr->def.bit_size / 8) :
3465       emit_bufferload_call(ctx, handle, coord, overload);
3466    if (!load)
3467       return false;
3468 
3469    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
3470       const struct dxil_value *val =
3471          dxil_emit_extractval(&ctx->mod, load, i);
3472       if (!val)
3473          return false;
3474       store_def(ctx, &intr->def, i, val);
3475    }
3476    if (intr->def.bit_size == 16)
3477       ctx->mod.feats.native_low_precision = true;
3478    return true;
3479 }
3480 
3481 static bool
emit_store_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)3482 emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3483 {
3484    const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[1], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
3485    const struct dxil_value *offset =
3486       get_src(ctx, &intr->src[2], 0, nir_type_uint);
3487    if (!handle || !offset)
3488       return false;
3489 
3490    unsigned num_components = nir_src_num_components(intr->src[0]);
3491    assert(num_components <= 4);
3492    if (nir_src_bit_size(intr->src[0]) == 16)
3493       ctx->mod.feats.native_low_precision = true;
3494 
3495    nir_alu_type type =
3496       dxil_type_to_nir_type(dxil_value_get_type(get_src_ssa(ctx, intr->src[0].ssa, 0)));
3497    const struct dxil_value *value[4] = { 0 };
3498    for (unsigned i = 0; i < num_components; ++i) {
3499       value[i] = get_src(ctx, &intr->src[0], i, type);
3500       if (!value[i])
3501          return false;
3502    }
3503 
3504    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3505    if (!int32_undef)
3506       return false;
3507 
3508    const struct dxil_value *coord[2] = {
3509       offset,
3510       int32_undef
3511    };
3512 
3513    enum overload_type overload = get_overload(type, intr->src[0].ssa->bit_size);
3514    if (num_components < 4) {
3515       const struct dxil_value *value_undef = dxil_module_get_undef(&ctx->mod, dxil_value_get_type(value[0]));
3516       if (!value_undef)
3517          return false;
3518 
3519       for (int i = num_components; i < 4; ++i)
3520          value[i] = value_undef;
3521    }
3522 
3523    const struct dxil_value *write_mask =
3524       dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
3525    if (!write_mask)
3526       return false;
3527 
3528    return ctx->mod.minor_version >= 2 ?
3529       emit_raw_bufferstore_call(ctx, handle, coord, value, write_mask, overload, intr->src[0].ssa->bit_size / 8) :
3530       emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
3531 }
3532 
3533 static bool
emit_load_ubo_vec4(struct ntd_context * ctx,nir_intrinsic_instr * intr)3534 emit_load_ubo_vec4(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3535 {
3536    const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER);
3537    const struct dxil_value *offset =
3538       get_src(ctx, &intr->src[1], 0, nir_type_uint);
3539 
3540    if (!handle || !offset)
3541       return false;
3542 
3543    enum overload_type overload = get_ambiguous_overload_alu_type(ctx, intr, nir_type_uint);
3544    const struct dxil_value *agg = load_ubo(ctx, handle, offset, overload);
3545    if (!agg)
3546       return false;
3547 
3548    unsigned first_component = nir_intrinsic_has_component(intr) ?
3549       nir_intrinsic_component(intr) : 0;
3550    for (unsigned i = 0; i < intr->def.num_components; i++)
3551       store_def(ctx, &intr->def, i,
3552                  dxil_emit_extractval(&ctx->mod, agg, i + first_component));
3553 
3554    if (intr->def.bit_size == 16)
3555       ctx->mod.feats.native_low_precision = true;
3556    return true;
3557 }
3558 
3559 /* Need to add patch-ness as a matching parameter, since driver_location is *not* unique
3560  * between control points and patch variables in HS/DS
3561  */
3562 static nir_variable *
find_patch_matching_variable_by_driver_location(nir_shader * s,nir_variable_mode mode,unsigned driver_location,bool patch)3563 find_patch_matching_variable_by_driver_location(nir_shader *s, nir_variable_mode mode, unsigned driver_location, bool patch)
3564 {
3565    nir_foreach_variable_with_modes(var, s, mode) {
3566       if (var->data.driver_location == driver_location &&
3567           var->data.patch == patch)
3568          return var;
3569    }
3570    return NULL;
3571 }
3572 
3573 static bool
emit_store_output_via_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3574 emit_store_output_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3575 {
3576    assert(intr->intrinsic == nir_intrinsic_store_output ||
3577           ctx->mod.shader_kind == DXIL_HULL_SHADER);
3578    bool is_patch_constant = intr->intrinsic == nir_intrinsic_store_output &&
3579       ctx->mod.shader_kind == DXIL_HULL_SHADER;
3580    nir_alu_type out_type = nir_intrinsic_src_type(intr);
3581    enum overload_type overload = get_overload(out_type, intr->src[0].ssa->bit_size);
3582    const struct dxil_func *func = dxil_get_function(&ctx->mod, is_patch_constant ?
3583       "dx.op.storePatchConstant" : "dx.op.storeOutput",
3584       overload);
3585 
3586    if (!func)
3587       return false;
3588 
3589    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, is_patch_constant ?
3590       DXIL_INTR_STORE_PATCH_CONSTANT : DXIL_INTR_STORE_OUTPUT);
3591    uint8_t *io_mappings = is_patch_constant ? ctx->mod.patch_mappings : ctx->mod.output_mappings;
3592    uint8_t io_index = io_mappings[nir_intrinsic_base(intr)];
3593    const struct dxil_value *output_id = dxil_module_get_int32_const(&ctx->mod, io_index);
3594    unsigned row_index = intr->intrinsic == nir_intrinsic_store_output ? 1 : 2;
3595 
3596    /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3597     * generation, so muck with them here too.
3598     */
3599    nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3600    bool is_tess_level = is_patch_constant &&
3601                         (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3602                          semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3603 
3604    const struct dxil_value *row = NULL;
3605    const struct dxil_value *col = NULL;
3606    if (is_tess_level)
3607       col = dxil_module_get_int8_const(&ctx->mod, 0);
3608    else
3609       row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3610 
3611    bool success = true;
3612    uint32_t writemask = nir_intrinsic_write_mask(intr);
3613 
3614    nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_out, nir_intrinsic_base(intr), is_patch_constant);
3615    unsigned var_base_component = var->data.location_frac;
3616    unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3617 
3618    if (ctx->mod.minor_validator >= 5) {
3619       struct dxil_signature_record *sig_rec = is_patch_constant ?
3620          &ctx->mod.patch_consts[io_index] :
3621          &ctx->mod.outputs[io_index];
3622       unsigned comp_size = intr->src[0].ssa->bit_size == 64 ? 2 : 1;
3623       unsigned comp_mask = 0;
3624       if (is_tess_level)
3625          comp_mask = 1;
3626       else if (comp_size == 1)
3627          comp_mask = writemask << var_base_component;
3628       else {
3629          for (unsigned i = 0; i < intr->num_components; ++i)
3630             if ((writemask & (1 << i)))
3631                comp_mask |= 3 << ((i + var_base_component) * comp_size);
3632       }
3633       for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3634          sig_rec->elements[r].never_writes_mask &= ~comp_mask;
3635 
3636       if (!nir_src_is_const(intr->src[row_index])) {
3637          struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3638             &ctx->mod.psv_patch_consts[io_index] :
3639             &ctx->mod.psv_outputs[io_index];
3640          psv_rec->dynamic_mask_and_stream |= comp_mask;
3641       }
3642    }
3643 
3644    for (unsigned i = 0; i < intr->num_components && success; ++i) {
3645       if (writemask & (1 << i)) {
3646          if (is_tess_level)
3647             row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3648          else
3649             col = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3650          const struct dxil_value *value = get_src(ctx, &intr->src[0], i, out_type);
3651          if (!col || !row || !value)
3652             return false;
3653 
3654          const struct dxil_value *args[] = {
3655             opcode, output_id, row, col, value
3656          };
3657          success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3658       }
3659    }
3660 
3661    return success;
3662 }
3663 
3664 static bool
emit_load_input_via_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3665 emit_load_input_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3666 {
3667    bool attr_at_vertex = false;
3668    if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER &&
3669       ctx->opts->interpolate_at_vertex &&
3670       ctx->opts->provoking_vertex != 0 &&
3671       (nir_intrinsic_dest_type(intr) & nir_type_float)) {
3672       nir_variable *var = nir_find_variable_with_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr));
3673 
3674       attr_at_vertex = var && var->data.interpolation == INTERP_MODE_FLAT;
3675    }
3676 
3677    bool is_patch_constant = (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER &&
3678                              intr->intrinsic == nir_intrinsic_load_input) ||
3679                             (ctx->mod.shader_kind == DXIL_HULL_SHADER &&
3680                              intr->intrinsic == nir_intrinsic_load_output);
3681    bool is_output_control_point = intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3682 
3683    unsigned opcode_val;
3684    const char *func_name;
3685    if (attr_at_vertex) {
3686       opcode_val = DXIL_INTR_ATTRIBUTE_AT_VERTEX;
3687       func_name = "dx.op.attributeAtVertex";
3688       if (ctx->mod.minor_validator >= 6)
3689          ctx->mod.feats.barycentrics = 1;
3690    } else if (is_patch_constant) {
3691       opcode_val = DXIL_INTR_LOAD_PATCH_CONSTANT;
3692       func_name = "dx.op.loadPatchConstant";
3693    } else if (is_output_control_point) {
3694       opcode_val = DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT;
3695       func_name = "dx.op.loadOutputControlPoint";
3696    } else {
3697       opcode_val = DXIL_INTR_LOAD_INPUT;
3698       func_name = "dx.op.loadInput";
3699    }
3700 
3701    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3702    if (!opcode)
3703       return false;
3704 
3705    uint8_t *io_mappings =
3706       is_patch_constant ? ctx->mod.patch_mappings :
3707       is_output_control_point ? ctx->mod.output_mappings :
3708       ctx->mod.input_mappings;
3709    uint8_t io_index = io_mappings[nir_intrinsic_base(intr)];
3710    const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, io_index);
3711    if (!input_id)
3712       return false;
3713 
3714    bool is_per_vertex =
3715       intr->intrinsic == nir_intrinsic_load_per_vertex_input ||
3716       intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3717    int row_index = is_per_vertex ? 1 : 0;
3718    const struct dxil_value *vertex_id = NULL;
3719    if (!is_patch_constant) {
3720       if (is_per_vertex) {
3721          vertex_id = get_src(ctx, &intr->src[0], 0, nir_type_int);
3722       } else if (attr_at_vertex) {
3723          vertex_id = dxil_module_get_int8_const(&ctx->mod, ctx->opts->provoking_vertex);
3724       } else {
3725          const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
3726          if (!int32_type)
3727             return false;
3728 
3729          vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);
3730       }
3731       if (!vertex_id)
3732          return false;
3733    }
3734 
3735    /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3736     * generation, so muck with them here too.
3737     */
3738    nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3739    bool is_tess_level = is_patch_constant &&
3740                         (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3741                          semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3742 
3743    const struct dxil_value *row = NULL;
3744    const struct dxil_value *comp = NULL;
3745    if (is_tess_level)
3746       comp = dxil_module_get_int8_const(&ctx->mod, 0);
3747    else
3748       row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3749 
3750    nir_alu_type out_type = nir_intrinsic_dest_type(intr);
3751    enum overload_type overload = get_overload(out_type, intr->def.bit_size);
3752 
3753    const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, overload);
3754 
3755    if (!func)
3756       return false;
3757 
3758    nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), is_patch_constant);
3759    unsigned var_base_component = var ? var->data.location_frac : 0;
3760    unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3761 
3762    if (ctx->mod.minor_validator >= 5 &&
3763        !is_output_control_point &&
3764        intr->intrinsic != nir_intrinsic_load_output) {
3765       struct dxil_signature_record *sig_rec = is_patch_constant ?
3766          &ctx->mod.patch_consts[io_index] :
3767          &ctx->mod.inputs[io_index];
3768       unsigned comp_size = intr->def.bit_size == 64 ? 2 : 1;
3769       unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3770       comp_mask <<= (var_base_component * comp_size);
3771       if (is_tess_level)
3772          comp_mask = 1;
3773       for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3774          sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3775 
3776       if (!nir_src_is_const(intr->src[row_index])) {
3777          struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3778             &ctx->mod.psv_patch_consts[io_index] :
3779             &ctx->mod.psv_inputs[io_index];
3780          psv_rec->dynamic_mask_and_stream |= comp_mask;
3781       }
3782    }
3783 
3784    for (unsigned i = 0; i < intr->num_components; ++i) {
3785       if (is_tess_level)
3786          row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3787       else
3788          comp = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3789 
3790       if (!row || !comp)
3791          return false;
3792 
3793       const struct dxil_value *args[] = {
3794          opcode, input_id, row, comp, vertex_id
3795       };
3796 
3797       unsigned num_args = ARRAY_SIZE(args) - (is_patch_constant ? 1 : 0);
3798       const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3799       if (!retval)
3800          return false;
3801       store_def(ctx, &intr->def, i, retval);
3802    }
3803    return true;
3804 }
3805 
3806 static bool
emit_load_interpolated_input(struct ntd_context * ctx,nir_intrinsic_instr * intr)3807 emit_load_interpolated_input(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3808 {
3809    nir_intrinsic_instr *barycentric = nir_src_as_intrinsic(intr->src[0]);
3810 
3811    const struct dxil_value *args[6] = { 0 };
3812 
3813    unsigned opcode_val;
3814    const char *func_name;
3815    unsigned num_args;
3816    switch (barycentric->intrinsic) {
3817    case nir_intrinsic_load_barycentric_at_offset:
3818       opcode_val = DXIL_INTR_EVAL_SNAPPED;
3819       func_name = "dx.op.evalSnapped";
3820       num_args = 6;
3821       for (unsigned i = 0; i < 2; ++i) {
3822          const struct dxil_value *float_offset = get_src(ctx, &barycentric->src[0], i, nir_type_float);
3823          /* GLSL uses [-0.5f, 0.5f), DXIL uses (-8, 7) */
3824          const struct dxil_value *offset_16 = dxil_emit_binop(&ctx->mod,
3825             DXIL_BINOP_MUL, float_offset, dxil_module_get_float_const(&ctx->mod, 16.0f), 0);
3826          args[i + 4] = dxil_emit_cast(&ctx->mod, DXIL_CAST_FPTOSI,
3827             dxil_module_get_int_type(&ctx->mod, 32), offset_16);
3828       }
3829       break;
3830    case nir_intrinsic_load_barycentric_pixel:
3831       opcode_val = DXIL_INTR_EVAL_SNAPPED;
3832       func_name = "dx.op.evalSnapped";
3833       num_args = 6;
3834       args[4] = args[5] = dxil_module_get_int32_const(&ctx->mod, 0);
3835       break;
3836    case nir_intrinsic_load_barycentric_at_sample:
3837       opcode_val = DXIL_INTR_EVAL_SAMPLE_INDEX;
3838       func_name = "dx.op.evalSampleIndex";
3839       num_args = 5;
3840       args[4] = get_src(ctx, &barycentric->src[0], 0, nir_type_int);
3841       break;
3842    case nir_intrinsic_load_barycentric_centroid:
3843       opcode_val = DXIL_INTR_EVAL_CENTROID;
3844       func_name = "dx.op.evalCentroid";
3845       num_args = 4;
3846       break;
3847    default:
3848       unreachable("Unsupported interpolation barycentric intrinsic");
3849    }
3850    uint8_t io_index = ctx->mod.input_mappings[nir_intrinsic_base(intr)];
3851    args[0] = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3852    args[1] = dxil_module_get_int32_const(&ctx->mod, io_index);
3853    args[2] = get_src(ctx, &intr->src[1], 0, nir_type_int);
3854 
3855    const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, DXIL_F32);
3856 
3857    if (!func)
3858       return false;
3859 
3860    nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), false);
3861    unsigned var_base_component = var ? var->data.location_frac : 0;
3862    unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3863 
3864    if (ctx->mod.minor_validator >= 5) {
3865       struct dxil_signature_record *sig_rec = &ctx->mod.inputs[io_index];
3866       unsigned comp_size = intr->def.bit_size == 64 ? 2 : 1;
3867       unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3868       comp_mask <<= (var_base_component * comp_size);
3869       for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3870          sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3871 
3872       if (!nir_src_is_const(intr->src[1])) {
3873          struct dxil_psv_signature_element *psv_rec = &ctx->mod.psv_inputs[io_index];
3874          psv_rec->dynamic_mask_and_stream |= comp_mask;
3875       }
3876    }
3877 
3878    for (unsigned i = 0; i < intr->num_components; ++i) {
3879       args[3] = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3880 
3881       const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3882       if (!retval)
3883          return false;
3884       store_def(ctx, &intr->def, i, retval);
3885    }
3886    return true;
3887 }
3888 
3889 static const struct dxil_value *
deref_to_gep(struct ntd_context * ctx,nir_deref_instr * deref)3890 deref_to_gep(struct ntd_context *ctx, nir_deref_instr *deref)
3891 {
3892    nir_deref_path path;
3893    nir_deref_path_init(&path, deref, ctx->ralloc_ctx);
3894    assert(path.path[0]->deref_type == nir_deref_type_var);
3895    uint32_t count = 0;
3896    while (path.path[count])
3897       ++count;
3898 
3899    const struct dxil_value **gep_indices = ralloc_array(ctx->ralloc_ctx,
3900                                                        const struct dxil_value *,
3901                                                        count + 1);
3902    nir_variable *var = path.path[0]->var;
3903    const struct dxil_value **var_array;
3904    switch (deref->modes) {
3905    case nir_var_mem_constant: var_array = ctx->consts; break;
3906    case nir_var_mem_shared: var_array = ctx->sharedvars; break;
3907    case nir_var_function_temp: var_array = ctx->scratchvars; break;
3908    default: unreachable("Invalid deref mode");
3909    }
3910    gep_indices[0] = var_array[var->data.driver_location];
3911 
3912    for (uint32_t i = 0; i < count; ++i)
3913       gep_indices[i + 1] = get_src_ssa(ctx, &path.path[i]->def, 0);
3914 
3915    return dxil_emit_gep_inbounds(&ctx->mod, gep_indices, count + 1);
3916 }
3917 
3918 static bool
emit_load_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3919 emit_load_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3920 {
3921    const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3922    if (!ptr)
3923       return false;
3924 
3925    const struct dxil_value *retval =
3926       dxil_emit_load(&ctx->mod, ptr, intr->def.bit_size / 8, false);
3927    if (!retval)
3928       return false;
3929 
3930    store_def(ctx, &intr->def, 0, retval);
3931    return true;
3932 }
3933 
3934 static bool
emit_store_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3935 emit_store_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3936 {
3937    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
3938    const struct dxil_value *ptr = deref_to_gep(ctx, deref);
3939    if (!ptr)
3940       return false;
3941 
3942    const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, nir_get_nir_type_for_glsl_type(deref->type));
3943    return dxil_emit_store(&ctx->mod, value, ptr, nir_src_bit_size(intr->src[1]) / 8, false);
3944 }
3945 
3946 static bool
emit_atomic_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3947 emit_atomic_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3948 {
3949    const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3950    if (!ptr)
3951       return false;
3952 
3953    const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3954    if (!value)
3955       return false;
3956 
3957    enum dxil_rmw_op dxil_op = nir_atomic_to_dxil_rmw(nir_intrinsic_atomic_op(intr));
3958    const struct dxil_value *retval = dxil_emit_atomicrmw(&ctx->mod, value, ptr, dxil_op, false,
3959                                                          DXIL_ATOMIC_ORDERING_ACQREL,
3960                                                          DXIL_SYNC_SCOPE_CROSSTHREAD);
3961    if (!retval)
3962       return false;
3963 
3964    store_def(ctx, &intr->def, 0, retval);
3965    return true;
3966 }
3967 
3968 static bool
emit_atomic_deref_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)3969 emit_atomic_deref_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3970 {
3971    const struct dxil_value *ptr = deref_to_gep(ctx, nir_src_as_deref(intr->src[0]));
3972    if (!ptr)
3973       return false;
3974 
3975    const struct dxil_value *cmp = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3976    const struct dxil_value *value = get_src(ctx, &intr->src[2], 0, nir_type_uint);
3977    if (!value)
3978       return false;
3979 
3980    const struct dxil_value *retval = dxil_emit_cmpxchg(&ctx->mod, cmp, value, ptr, false,
3981                                                        DXIL_ATOMIC_ORDERING_ACQREL,
3982                                                        DXIL_SYNC_SCOPE_CROSSTHREAD);
3983    if (!retval)
3984       return false;
3985 
3986    store_def(ctx, &intr->def, 0, retval);
3987    return true;
3988 }
3989 
3990 static bool
emit_discard_if_with_value(struct ntd_context * ctx,const struct dxil_value * value)3991 emit_discard_if_with_value(struct ntd_context *ctx, const struct dxil_value *value)
3992 {
3993    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DISCARD);
3994    if (!opcode)
3995       return false;
3996 
3997    const struct dxil_value *args[] = {
3998      opcode,
3999      value
4000    };
4001 
4002    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.discard", DXIL_NONE);
4003    if (!func)
4004       return false;
4005 
4006    return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4007 }
4008 
4009 static bool
emit_discard_if(struct ntd_context * ctx,nir_intrinsic_instr * intr)4010 emit_discard_if(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4011 {
4012    const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_bool);
4013    if (!value)
4014       return false;
4015 
4016    return emit_discard_if_with_value(ctx, value);
4017 }
4018 
4019 static bool
emit_discard(struct ntd_context * ctx)4020 emit_discard(struct ntd_context *ctx)
4021 {
4022    const struct dxil_value *value = dxil_module_get_int1_const(&ctx->mod, true);
4023    return emit_discard_if_with_value(ctx, value);
4024 }
4025 
4026 static bool
emit_emit_vertex(struct ntd_context * ctx,nir_intrinsic_instr * intr)4027 emit_emit_vertex(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4028 {
4029    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_EMIT_STREAM);
4030    const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
4031    if (!opcode || !stream_id)
4032       return false;
4033 
4034    const struct dxil_value *args[] = {
4035      opcode,
4036      stream_id
4037    };
4038 
4039    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.emitStream", DXIL_NONE);
4040    if (!func)
4041       return false;
4042 
4043    return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4044 }
4045 
4046 static bool
emit_end_primitive(struct ntd_context * ctx,nir_intrinsic_instr * intr)4047 emit_end_primitive(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4048 {
4049    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CUT_STREAM);
4050    const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
4051    if (!opcode || !stream_id)
4052       return false;
4053 
4054    const struct dxil_value *args[] = {
4055      opcode,
4056      stream_id
4057    };
4058 
4059    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cutStream", DXIL_NONE);
4060    if (!func)
4061       return false;
4062 
4063    return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
4064 }
4065 
4066 static bool
emit_image_store(struct ntd_context * ctx,nir_intrinsic_instr * intr)4067 emit_image_store(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4068 {
4069    const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_store ?
4070       create_image_handle(ctx, intr) :
4071       get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4072    if (!handle)
4073       return false;
4074 
4075    bool is_array = false;
4076    if (intr->intrinsic == nir_intrinsic_image_deref_store)
4077       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4078    else
4079       is_array = nir_intrinsic_image_array(intr);
4080 
4081    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4082    if (!int32_undef)
4083       return false;
4084 
4085    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4086    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_store ?
4087       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4088       nir_intrinsic_image_dim(intr);
4089    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4090    if (is_array)
4091       ++num_coords;
4092 
4093    assert(num_coords <= nir_src_num_components(intr->src[1]));
4094    for (unsigned i = 0; i < num_coords; ++i) {
4095       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4096       if (!coord[i])
4097          return false;
4098    }
4099 
4100    nir_alu_type in_type = nir_intrinsic_src_type(intr);
4101    enum overload_type overload = get_overload(in_type, 32);
4102 
4103    assert(nir_src_bit_size(intr->src[3]) == 32);
4104    unsigned num_components = nir_src_num_components(intr->src[3]);
4105    assert(num_components <= 4);
4106    const struct dxil_value *value[4];
4107    for (unsigned i = 0; i < num_components; ++i) {
4108       value[i] = get_src(ctx, &intr->src[3], i, in_type);
4109       if (!value[i])
4110          return false;
4111    }
4112 
4113    for (int i = num_components; i < 4; ++i)
4114       value[i] = dxil_module_get_undef(&ctx->mod, dxil_value_get_type(value[0]));
4115 
4116    const struct dxil_value *write_mask =
4117       dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
4118    if (!write_mask)
4119       return false;
4120 
4121    if (image_dim == GLSL_SAMPLER_DIM_BUF) {
4122       coord[1] = int32_undef;
4123       return emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
4124    } else
4125       return emit_texturestore_call(ctx, handle, coord, value, write_mask, overload);
4126 }
4127 
4128 static bool
emit_image_load(struct ntd_context * ctx,nir_intrinsic_instr * intr)4129 emit_image_load(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4130 {
4131    const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_load ?
4132       create_image_handle(ctx, intr) :
4133       get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4134    if (!handle)
4135       return false;
4136 
4137    bool is_array = false;
4138    if (intr->intrinsic == nir_intrinsic_image_deref_load)
4139       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4140    else
4141       is_array = nir_intrinsic_image_array(intr);
4142 
4143    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4144    if (!int32_undef)
4145       return false;
4146 
4147    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4148    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_load ?
4149       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4150       nir_intrinsic_image_dim(intr);
4151    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4152    if (is_array)
4153       ++num_coords;
4154 
4155    assert(num_coords <= nir_src_num_components(intr->src[1]));
4156    for (unsigned i = 0; i < num_coords; ++i) {
4157       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4158       if (!coord[i])
4159          return false;
4160    }
4161 
4162    nir_alu_type out_type = nir_intrinsic_dest_type(intr);
4163    enum overload_type overload = get_overload(out_type, 32);
4164 
4165    const struct dxil_value *load_result;
4166    if (image_dim == GLSL_SAMPLER_DIM_BUF) {
4167       coord[1] = int32_undef;
4168       load_result = emit_bufferload_call(ctx, handle, coord, overload);
4169    } else
4170       load_result = emit_textureload_call(ctx, handle, coord, overload);
4171 
4172    if (!load_result)
4173       return false;
4174 
4175    assert(intr->def.bit_size == 32);
4176    unsigned num_components = intr->def.num_components;
4177    assert(num_components <= 4);
4178    for (unsigned i = 0; i < num_components; ++i) {
4179       const struct dxil_value *component = dxil_emit_extractval(&ctx->mod, load_result, i);
4180       if (!component)
4181          return false;
4182       store_def(ctx, &intr->def, i, component);
4183    }
4184 
4185    if (util_format_get_nr_components(nir_intrinsic_format(intr)) > 1)
4186       ctx->mod.feats.typed_uav_load_additional_formats = true;
4187 
4188    return true;
4189 }
4190 
4191 static bool
emit_image_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4192 emit_image_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4193 {
4194    const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_atomic ?
4195       create_image_handle(ctx, intr) :
4196       get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4197    if (!handle)
4198       return false;
4199 
4200    bool is_array = false;
4201    if (intr->intrinsic == nir_intrinsic_image_deref_atomic)
4202       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4203    else
4204       is_array = nir_intrinsic_image_array(intr);
4205 
4206    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4207    if (!int32_undef)
4208       return false;
4209 
4210    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4211    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_atomic ?
4212       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4213       nir_intrinsic_image_dim(intr);
4214    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4215    if (is_array)
4216       ++num_coords;
4217 
4218    assert(num_coords <= nir_src_num_components(intr->src[1]));
4219    for (unsigned i = 0; i < num_coords; ++i) {
4220       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4221       if (!coord[i])
4222          return false;
4223    }
4224 
4225    nir_atomic_op nir_op = nir_intrinsic_atomic_op(intr);
4226    enum dxil_atomic_op dxil_op = nir_atomic_to_dxil_atomic(nir_op);
4227    nir_alu_type type = nir_atomic_op_type(nir_op);
4228    const struct dxil_value *value = get_src(ctx, &intr->src[3], 0, type);
4229    if (!value)
4230       return false;
4231 
4232    const struct dxil_value *retval =
4233       emit_atomic_binop(ctx, handle, dxil_op, coord, value);
4234 
4235    if (!retval)
4236       return false;
4237 
4238    store_def(ctx, &intr->def, 0, retval);
4239    return true;
4240 }
4241 
4242 static bool
emit_image_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)4243 emit_image_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4244 {
4245    const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_atomic_swap ?
4246       create_image_handle(ctx, intr) :
4247       get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4248    if (!handle)
4249       return false;
4250 
4251    bool is_array = false;
4252    if (intr->intrinsic == nir_intrinsic_image_deref_atomic_swap)
4253       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
4254    else
4255       is_array = nir_intrinsic_image_array(intr);
4256 
4257    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4258    if (!int32_undef)
4259       return false;
4260 
4261    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
4262    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ?
4263       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4264       nir_intrinsic_image_dim(intr);
4265    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
4266    if (is_array)
4267       ++num_coords;
4268 
4269    assert(num_coords <= nir_src_num_components(intr->src[1]));
4270    for (unsigned i = 0; i < num_coords; ++i) {
4271       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
4272       if (!coord[i])
4273          return false;
4274    }
4275 
4276    const struct dxil_value *cmpval = get_src(ctx, &intr->src[3], 0, nir_type_uint);
4277    const struct dxil_value *newval = get_src(ctx, &intr->src[4], 0, nir_type_uint);
4278    if (!cmpval || !newval)
4279       return false;
4280 
4281    const struct dxil_value *retval =
4282       emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
4283 
4284    if (!retval)
4285       return false;
4286 
4287    store_def(ctx, &intr->def, 0, retval);
4288    return true;
4289 }
4290 
4291 struct texop_parameters {
4292    const struct dxil_value *tex;
4293    const struct dxil_value *sampler;
4294    const struct dxil_value *bias, *lod_or_sample, *min_lod;
4295    const struct dxil_value *coord[4], *offset[3], *dx[3], *dy[3];
4296    const struct dxil_value *cmp;
4297    enum overload_type overload;
4298 };
4299 
4300 static const struct dxil_value *
emit_texture_size(struct ntd_context * ctx,struct texop_parameters * params)4301 emit_texture_size(struct ntd_context *ctx, struct texop_parameters *params)
4302 {
4303    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.getDimensions", DXIL_NONE);
4304    if (!func)
4305       return false;
4306 
4307    const struct dxil_value *args[] = {
4308       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_SIZE),
4309       params->tex,
4310       params->lod_or_sample
4311    };
4312 
4313    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4314 }
4315 
4316 static bool
emit_image_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)4317 emit_image_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4318 {
4319    const struct dxil_value *handle = intr->intrinsic == nir_intrinsic_bindless_image_size ?
4320       create_image_handle(ctx, intr) :
4321       get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
4322    if (!handle)
4323       return false;
4324 
4325    enum glsl_sampler_dim sampler_dim = intr->intrinsic == nir_intrinsic_image_deref_size ?
4326       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type) :
4327       nir_intrinsic_image_dim(intr);
4328    const struct dxil_value *lod = sampler_dim == GLSL_SAMPLER_DIM_BUF ?
4329       dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32)) :
4330       get_src(ctx, &intr->src[1], 0, nir_type_uint);
4331    if (!lod)
4332       return false;
4333 
4334    struct texop_parameters params = {
4335       .tex = handle,
4336       .lod_or_sample = lod
4337    };
4338    const struct dxil_value *dimensions = emit_texture_size(ctx, &params);
4339    if (!dimensions)
4340       return false;
4341 
4342    for (unsigned i = 0; i < intr->def.num_components; ++i) {
4343       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, i);
4344       store_def(ctx, &intr->def, i, retval);
4345    }
4346 
4347    return true;
4348 }
4349 
4350 static bool
emit_get_ssbo_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)4351 emit_get_ssbo_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4352 {
4353    enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
4354    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
4355       nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
4356       if (var && var->data.access & ACCESS_NON_WRITEABLE)
4357          class = DXIL_RESOURCE_CLASS_SRV;
4358    }
4359 
4360    const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
4361    if (!handle)
4362       return false;
4363 
4364    struct texop_parameters params = {
4365       .tex = handle,
4366       .lod_or_sample = dxil_module_get_undef(
4367                         &ctx->mod, dxil_module_get_int_type(&ctx->mod, 32))
4368    };
4369 
4370    const struct dxil_value *dimensions = emit_texture_size(ctx, &params);
4371    if (!dimensions)
4372       return false;
4373 
4374    const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, 0);
4375    store_def(ctx, &intr->def, 0, retval);
4376 
4377    return true;
4378 }
4379 
4380 static bool
emit_ssbo_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4381 emit_ssbo_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4382 {
4383    nir_atomic_op nir_op = nir_intrinsic_atomic_op(intr);
4384    enum dxil_atomic_op dxil_op = nir_atomic_to_dxil_atomic(nir_op);
4385    nir_alu_type type = nir_atomic_op_type(nir_op);
4386    const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
4387    const struct dxil_value *offset =
4388       get_src(ctx, &intr->src[1], 0, nir_type_uint);
4389    const struct dxil_value *value =
4390       get_src(ctx, &intr->src[2], 0, type);
4391 
4392    if (!value || !handle || !offset)
4393       return false;
4394 
4395    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4396    if (!int32_undef)
4397       return false;
4398 
4399    const struct dxil_value *coord[3] = {
4400       offset, int32_undef, int32_undef
4401    };
4402 
4403    const struct dxil_value *retval =
4404       emit_atomic_binop(ctx, handle, dxil_op, coord, value);
4405 
4406    if (!retval)
4407       return false;
4408 
4409    store_def(ctx, &intr->def, 0, retval);
4410    return true;
4411 }
4412 
4413 static bool
emit_ssbo_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)4414 emit_ssbo_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4415 {
4416    const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
4417    const struct dxil_value *offset =
4418       get_src(ctx, &intr->src[1], 0, nir_type_uint);
4419    const struct dxil_value *cmpval =
4420       get_src(ctx, &intr->src[2], 0, nir_type_int);
4421    const struct dxil_value *newval =
4422       get_src(ctx, &intr->src[3], 0, nir_type_int);
4423 
4424    if (!cmpval || !newval || !handle || !offset)
4425       return false;
4426 
4427    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
4428    if (!int32_undef)
4429       return false;
4430 
4431    const struct dxil_value *coord[3] = {
4432       offset, int32_undef, int32_undef
4433    };
4434 
4435    const struct dxil_value *retval =
4436       emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
4437 
4438    if (!retval)
4439       return false;
4440 
4441    store_def(ctx, &intr->def, 0, retval);
4442    return true;
4443 }
4444 
4445 static bool
emit_vulkan_resource_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)4446 emit_vulkan_resource_index(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4447 {
4448    unsigned int binding = nir_intrinsic_binding(intr);
4449 
4450    bool const_index = nir_src_is_const(intr->src[0]);
4451    if (const_index) {
4452       binding += nir_src_as_const_value(intr->src[0])->u32;
4453    }
4454 
4455    const struct dxil_value *index_value = dxil_module_get_int32_const(&ctx->mod, binding);
4456    if (!index_value)
4457       return false;
4458 
4459    if (!const_index) {
4460       const struct dxil_value *offset = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4461       if (!offset)
4462          return false;
4463 
4464       index_value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, index_value, offset, 0);
4465       if (!index_value)
4466          return false;
4467    }
4468 
4469    store_def(ctx, &intr->def, 0, index_value);
4470    store_def(ctx, &intr->def, 1, dxil_module_get_int32_const(&ctx->mod, 0));
4471    return true;
4472 }
4473 
4474 static bool
emit_load_vulkan_descriptor(struct ntd_context * ctx,nir_intrinsic_instr * intr)4475 emit_load_vulkan_descriptor(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4476 {
4477    nir_intrinsic_instr* index = nir_src_as_intrinsic(intr->src[0]);
4478    const struct dxil_value *handle = NULL;
4479 
4480    enum dxil_resource_class resource_class;
4481    enum dxil_resource_kind resource_kind;
4482    switch (nir_intrinsic_desc_type(intr)) {
4483    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4484       resource_class = DXIL_RESOURCE_CLASS_CBV;
4485       resource_kind = DXIL_RESOURCE_KIND_CBUFFER;
4486       break;
4487    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4488       resource_class = DXIL_RESOURCE_CLASS_UAV;
4489       resource_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
4490       break;
4491    default:
4492       unreachable("unknown descriptor type");
4493       return false;
4494    }
4495 
4496    if (index && index->intrinsic == nir_intrinsic_vulkan_resource_index) {
4497       unsigned binding = nir_intrinsic_binding(index);
4498       unsigned space = nir_intrinsic_desc_set(index);
4499 
4500       /* The descriptor_set field for variables is only 5 bits. We shouldn't have intrinsics trying to go beyond that. */
4501       assert(space < 32);
4502 
4503       nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
4504       if (resource_class == DXIL_RESOURCE_CLASS_UAV &&
4505           (var->data.access & ACCESS_NON_WRITEABLE))
4506          resource_class = DXIL_RESOURCE_CLASS_SRV;
4507 
4508       const struct dxil_value *index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4509       if (!index_value)
4510          return false;
4511 
4512       handle = emit_createhandle_call_dynamic(ctx, resource_class, space, binding, index_value, false);
4513    } else {
4514       const struct dxil_value *heap_index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4515       if (!heap_index_value)
4516          return false;
4517       const struct dxil_value *unannotated_handle = emit_createhandle_heap(ctx, heap_index_value, false, true);
4518       const struct dxil_value *res_props = dxil_module_get_buffer_res_props_const(&ctx->mod, resource_class, resource_kind);
4519       if (!unannotated_handle || !res_props)
4520          return false;
4521       handle = emit_annotate_handle(ctx, unannotated_handle, res_props);
4522    }
4523 
4524    store_ssa_def(ctx, &intr->def, 0, handle);
4525    store_def(ctx, &intr->def, 1, get_src(ctx, &intr->src[0], 1, nir_type_uint32));
4526 
4527    return true;
4528 }
4529 
4530 static bool
emit_load_sample_pos_from_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4531 emit_load_sample_pos_from_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4532 {
4533    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.renderTargetGetSamplePosition", DXIL_NONE);
4534    if (!func)
4535       return false;
4536 
4537    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION);
4538    if (!opcode)
4539       return false;
4540 
4541    const struct dxil_value *args[] = {
4542       opcode,
4543       get_src(ctx, &intr->src[0], 0, nir_type_uint32),
4544    };
4545    if (!args[1])
4546       return false;
4547 
4548    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4549    if (!v)
4550       return false;
4551 
4552    for (unsigned i = 0; i < 2; ++i) {
4553       /* GL coords go from 0 -> 1, D3D from -0.5 -> 0.5 */
4554       const struct dxil_value *coord = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
4555          dxil_emit_extractval(&ctx->mod, v, i),
4556          dxil_module_get_float_const(&ctx->mod, 0.5f), 0);
4557       store_def(ctx, &intr->def, i, coord);
4558    }
4559    return true;
4560 }
4561 
4562 static bool
emit_load_sample_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4563 emit_load_sample_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4564 {
4565    assert(ctx->mod.info.has_per_sample_input ||
4566           intr->intrinsic == nir_intrinsic_load_sample_id_no_per_sample);
4567 
4568    if (ctx->mod.info.has_per_sample_input)
4569       return emit_load_unary_external_function(ctx, intr, "dx.op.sampleIndex",
4570                                                DXIL_INTR_SAMPLE_INDEX, nir_type_int);
4571 
4572    store_def(ctx, &intr->def, 0, dxil_module_get_int32_const(&ctx->mod, 0));
4573    return true;
4574 }
4575 
4576 static bool
emit_read_first_invocation(struct ntd_context * ctx,nir_intrinsic_instr * intr)4577 emit_read_first_invocation(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4578 {
4579    ctx->mod.feats.wave_ops = 1;
4580    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveReadLaneFirst",
4581                                                     get_overload(nir_type_uint, intr->def.bit_size));
4582    const struct dxil_value *args[] = {
4583       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_READ_LANE_FIRST),
4584       get_src(ctx, intr->src, 0, nir_type_uint),
4585    };
4586    if (!func || !args[0] || !args[1])
4587       return false;
4588 
4589    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4590    if (!ret)
4591       return false;
4592    store_def(ctx, &intr->def, 0, ret);
4593    return true;
4594 }
4595 
4596 static bool
emit_read_invocation(struct ntd_context * ctx,nir_intrinsic_instr * intr)4597 emit_read_invocation(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4598 {
4599    ctx->mod.feats.wave_ops = 1;
4600    bool quad = intr->intrinsic == nir_intrinsic_quad_broadcast;
4601    const struct dxil_func *func = dxil_get_function(&ctx->mod, quad ? "dx.op.quadReadLaneAt" : "dx.op.waveReadLaneAt",
4602                                                     get_overload(nir_type_uint, intr->def.bit_size));
4603    const struct dxil_value *args[] = {
4604       dxil_module_get_int32_const(&ctx->mod, quad ? DXIL_INTR_QUAD_READ_LANE_AT : DXIL_INTR_WAVE_READ_LANE_AT),
4605       get_src(ctx, &intr->src[0], 0, nir_type_uint),
4606       get_src(ctx, &intr->src[1], 0, nir_type_uint),
4607    };
4608    if (!func || !args[0] || !args[1] || !args[2])
4609       return false;
4610 
4611    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4612    if (!ret)
4613       return false;
4614    store_def(ctx, &intr->def, 0, ret);
4615    return true;
4616 }
4617 
4618 static bool
emit_vote_eq(struct ntd_context * ctx,nir_intrinsic_instr * intr)4619 emit_vote_eq(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4620 {
4621    ctx->mod.feats.wave_ops = 1;
4622    nir_alu_type alu_type = intr->intrinsic == nir_intrinsic_vote_ieq ? nir_type_int : nir_type_float;
4623    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveAllEqual",
4624                                                     get_overload(alu_type, intr->src[0].ssa->bit_size));
4625    const struct dxil_value *args[] = {
4626       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_ALL_EQUAL),
4627       get_src(ctx, intr->src, 0, alu_type),
4628    };
4629    if (!func || !args[0] || !args[1])
4630       return false;
4631 
4632    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4633    if (!ret)
4634       return false;
4635    store_def(ctx, &intr->def, 0, ret);
4636    return true;
4637 }
4638 
4639 static bool
emit_vote(struct ntd_context * ctx,nir_intrinsic_instr * intr)4640 emit_vote(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4641 {
4642    ctx->mod.feats.wave_ops = 1;
4643    bool any = intr->intrinsic == nir_intrinsic_vote_any;
4644    const struct dxil_func *func = dxil_get_function(&ctx->mod,
4645                                                     any ? "dx.op.waveAnyTrue" : "dx.op.waveAllTrue",
4646                                                     DXIL_NONE);
4647    const struct dxil_value *args[] = {
4648       dxil_module_get_int32_const(&ctx->mod, any ? DXIL_INTR_WAVE_ANY_TRUE : DXIL_INTR_WAVE_ALL_TRUE),
4649       get_src(ctx, intr->src, 0, nir_type_bool),
4650    };
4651    if (!func || !args[0] || !args[1])
4652       return false;
4653 
4654    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4655    if (!ret)
4656       return false;
4657    store_def(ctx, &intr->def, 0, ret);
4658    return true;
4659 }
4660 
4661 static bool
emit_ballot(struct ntd_context * ctx,nir_intrinsic_instr * intr)4662 emit_ballot(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4663 {
4664    ctx->mod.feats.wave_ops = 1;
4665    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveBallot", DXIL_NONE);
4666    const struct dxil_value *args[] = {
4667       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_BALLOT),
4668       get_src(ctx, intr->src, 0, nir_type_bool),
4669    };
4670    if (!func || !args[0] || !args[1])
4671       return false;
4672 
4673    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4674    if (!ret)
4675       return false;
4676    for (uint32_t i = 0; i < 4; ++i)
4677       store_def(ctx, &intr->def, i, dxil_emit_extractval(&ctx->mod, ret, i));
4678    return true;
4679 }
4680 
4681 static bool
emit_quad_op(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_quad_op_kind op)4682 emit_quad_op(struct ntd_context *ctx, nir_intrinsic_instr *intr, enum dxil_quad_op_kind op)
4683 {
4684    ctx->mod.feats.wave_ops = 1;
4685    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quadOp",
4686                                                     get_overload(nir_type_uint, intr->def.bit_size));
4687    const struct dxil_value *args[] = {
4688       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_QUAD_OP),
4689       get_src(ctx, intr->src, 0, nir_type_uint),
4690       dxil_module_get_int8_const(&ctx->mod, op),
4691    };
4692    if (!func || !args[0] || !args[1] || !args[2])
4693       return false;
4694 
4695    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4696    if (!ret)
4697       return false;
4698    store_def(ctx, &intr->def, 0, ret);
4699    return true;
4700 }
4701 
4702 static enum dxil_wave_bit_op_kind
get_reduce_bit_op(nir_op op)4703 get_reduce_bit_op(nir_op op)
4704 {
4705    switch (op) {
4706    case nir_op_ior: return DXIL_WAVE_BIT_OP_OR;
4707    case nir_op_ixor: return DXIL_WAVE_BIT_OP_XOR;
4708    case nir_op_iand: return DXIL_WAVE_BIT_OP_AND;
4709    default:
4710       unreachable("Invalid bit op");
4711    }
4712 }
4713 
4714 static bool
emit_reduce_bitwise(struct ntd_context * ctx,nir_intrinsic_instr * intr)4715 emit_reduce_bitwise(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4716 {
4717    enum dxil_wave_bit_op_kind wave_bit_op = get_reduce_bit_op(nir_intrinsic_reduction_op(intr));
4718    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.waveActiveBit",
4719                                                     get_overload(nir_type_uint, intr->def.bit_size));
4720    const struct dxil_value *args[] = {
4721       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_WAVE_ACTIVE_BIT),
4722       get_src(ctx, intr->src, 0, nir_type_uint),
4723       dxil_module_get_int8_const(&ctx->mod, wave_bit_op),
4724    };
4725    if (!func || !args[0] || !args[1] || !args[2])
4726       return false;
4727 
4728    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4729    if (!ret)
4730       return false;
4731    store_def(ctx, &intr->def, 0, ret);
4732    return true;
4733 }
4734 
4735 static enum dxil_wave_op_kind
get_reduce_op(nir_op op)4736 get_reduce_op(nir_op op)
4737 {
4738    switch (op) {
4739    case nir_op_iadd:
4740    case nir_op_fadd:
4741       return DXIL_WAVE_OP_SUM;
4742    case nir_op_imul:
4743    case nir_op_fmul:
4744       return DXIL_WAVE_OP_PRODUCT;
4745    case nir_op_imax:
4746    case nir_op_umax:
4747    case nir_op_fmax:
4748       return DXIL_WAVE_OP_MAX;
4749    case nir_op_imin:
4750    case nir_op_umin:
4751    case nir_op_fmin:
4752       return DXIL_WAVE_OP_MIN;
4753    default:
4754       unreachable("Unexpected reduction op");
4755    }
4756 }
4757 
4758 static bool
emit_reduce(struct ntd_context * ctx,nir_intrinsic_instr * intr)4759 emit_reduce(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4760 {
4761    ctx->mod.feats.wave_ops = 1;
4762    bool is_prefix = intr->intrinsic == nir_intrinsic_exclusive_scan;
4763    nir_op reduction_op = (nir_op)nir_intrinsic_reduction_op(intr);
4764    switch (reduction_op) {
4765    case nir_op_ior:
4766    case nir_op_ixor:
4767    case nir_op_iand:
4768       assert(!is_prefix);
4769       return emit_reduce_bitwise(ctx, intr);
4770    default:
4771       break;
4772    }
4773    nir_alu_type alu_type = nir_op_infos[reduction_op].input_types[0];
4774    enum dxil_wave_op_kind wave_op = get_reduce_op(reduction_op);
4775    const struct dxil_func *func = dxil_get_function(&ctx->mod, is_prefix ? "dx.op.wavePrefixOp" : "dx.op.waveActiveOp",
4776                                                     get_overload(alu_type, intr->def.bit_size));
4777    bool is_unsigned = alu_type == nir_type_uint;
4778    const struct dxil_value *args[] = {
4779       dxil_module_get_int32_const(&ctx->mod, is_prefix ? DXIL_INTR_WAVE_PREFIX_OP : DXIL_INTR_WAVE_ACTIVE_OP),
4780       get_src(ctx, intr->src, 0, alu_type),
4781       dxil_module_get_int8_const(&ctx->mod, wave_op),
4782       dxil_module_get_int8_const(&ctx->mod, is_unsigned),
4783    };
4784    if (!func || !args[0] || !args[1] || !args[2] || !args[3])
4785       return false;
4786 
4787    const struct dxil_value *ret = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4788    if (!ret)
4789       return false;
4790    store_def(ctx, &intr->def, 0, ret);
4791    return true;
4792 }
4793 
4794 static bool
emit_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4795 emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4796 {
4797    switch (intr->intrinsic) {
4798    case nir_intrinsic_load_global_invocation_id:
4799       return emit_load_global_invocation_id(ctx, intr);
4800    case nir_intrinsic_load_local_invocation_id:
4801       return emit_load_local_invocation_id(ctx, intr);
4802    case nir_intrinsic_load_local_invocation_index:
4803       return emit_load_local_invocation_index(ctx, intr);
4804    case nir_intrinsic_load_workgroup_id:
4805       return emit_load_local_workgroup_id(ctx, intr);
4806    case nir_intrinsic_load_ssbo:
4807       return emit_load_ssbo(ctx, intr);
4808    case nir_intrinsic_store_ssbo:
4809       return emit_store_ssbo(ctx, intr);
4810    case nir_intrinsic_load_deref:
4811       return emit_load_deref(ctx, intr);
4812    case nir_intrinsic_store_deref:
4813       return emit_store_deref(ctx, intr);
4814    case nir_intrinsic_deref_atomic:
4815       return emit_atomic_deref(ctx, intr);
4816    case nir_intrinsic_deref_atomic_swap:
4817       return emit_atomic_deref_swap(ctx, intr);
4818    case nir_intrinsic_load_ubo_vec4:
4819       return emit_load_ubo_vec4(ctx, intr);
4820    case nir_intrinsic_load_primitive_id:
4821       return emit_load_unary_external_function(ctx, intr, "dx.op.primitiveID",
4822                                                DXIL_INTR_PRIMITIVE_ID, nir_type_int);
4823    case nir_intrinsic_load_sample_id:
4824    case nir_intrinsic_load_sample_id_no_per_sample:
4825       return emit_load_sample_id(ctx, intr);
4826    case nir_intrinsic_load_invocation_id:
4827       switch (ctx->mod.shader_kind) {
4828       case DXIL_HULL_SHADER:
4829          return emit_load_unary_external_function(ctx, intr, "dx.op.outputControlPointID",
4830                                                   DXIL_INTR_OUTPUT_CONTROL_POINT_ID, nir_type_int);
4831       case DXIL_GEOMETRY_SHADER:
4832          return emit_load_unary_external_function(ctx, intr, "dx.op.gsInstanceID",
4833                                                   DXIL_INTR_GS_INSTANCE_ID, nir_type_int);
4834       default:
4835          unreachable("Unexpected shader kind for invocation ID");
4836       }
4837    case nir_intrinsic_load_view_index:
4838       ctx->mod.feats.view_id = true;
4839       return emit_load_unary_external_function(ctx, intr, "dx.op.viewID",
4840                                                DXIL_INTR_VIEW_ID, nir_type_int);
4841    case nir_intrinsic_load_sample_mask_in:
4842       return emit_load_sample_mask_in(ctx, intr);
4843    case nir_intrinsic_load_tess_coord:
4844       return emit_load_tess_coord(ctx, intr);
4845    case nir_intrinsic_terminate_if:
4846    case nir_intrinsic_demote_if:
4847       return emit_discard_if(ctx, intr);
4848    case nir_intrinsic_terminate:
4849    case nir_intrinsic_demote:
4850       return emit_discard(ctx);
4851    case nir_intrinsic_emit_vertex:
4852       return emit_emit_vertex(ctx, intr);
4853    case nir_intrinsic_end_primitive:
4854       return emit_end_primitive(ctx, intr);
4855    case nir_intrinsic_barrier:
4856       return emit_barrier(ctx, intr);
4857    case nir_intrinsic_ssbo_atomic:
4858       return emit_ssbo_atomic(ctx, intr);
4859    case nir_intrinsic_ssbo_atomic_swap:
4860       return emit_ssbo_atomic_comp_swap(ctx, intr);
4861    case nir_intrinsic_image_deref_atomic:
4862    case nir_intrinsic_image_atomic:
4863    case nir_intrinsic_bindless_image_atomic:
4864       return emit_image_atomic(ctx, intr);
4865    case nir_intrinsic_image_deref_atomic_swap:
4866    case nir_intrinsic_image_atomic_swap:
4867    case nir_intrinsic_bindless_image_atomic_swap:
4868       return emit_image_atomic_comp_swap(ctx, intr);
4869    case nir_intrinsic_image_store:
4870    case nir_intrinsic_image_deref_store:
4871    case nir_intrinsic_bindless_image_store:
4872       return emit_image_store(ctx, intr);
4873    case nir_intrinsic_image_load:
4874    case nir_intrinsic_image_deref_load:
4875    case nir_intrinsic_bindless_image_load:
4876       return emit_image_load(ctx, intr);
4877    case nir_intrinsic_image_size:
4878    case nir_intrinsic_image_deref_size:
4879    case nir_intrinsic_bindless_image_size:
4880       return emit_image_size(ctx, intr);
4881    case nir_intrinsic_get_ssbo_size:
4882       return emit_get_ssbo_size(ctx, intr);
4883    case nir_intrinsic_load_input:
4884    case nir_intrinsic_load_per_vertex_input:
4885    case nir_intrinsic_load_output:
4886    case nir_intrinsic_load_per_vertex_output:
4887       return emit_load_input_via_intrinsic(ctx, intr);
4888    case nir_intrinsic_store_output:
4889    case nir_intrinsic_store_per_vertex_output:
4890       return emit_store_output_via_intrinsic(ctx, intr);
4891 
4892    case nir_intrinsic_load_barycentric_at_offset:
4893    case nir_intrinsic_load_barycentric_at_sample:
4894    case nir_intrinsic_load_barycentric_centroid:
4895    case nir_intrinsic_load_barycentric_pixel:
4896       /* Emit nothing, we only support these as inputs to load_interpolated_input */
4897       return true;
4898    case nir_intrinsic_load_interpolated_input:
4899       return emit_load_interpolated_input(ctx, intr);
4900       break;
4901 
4902    case nir_intrinsic_vulkan_resource_index:
4903       return emit_vulkan_resource_index(ctx, intr);
4904    case nir_intrinsic_load_vulkan_descriptor:
4905       return emit_load_vulkan_descriptor(ctx, intr);
4906 
4907    case nir_intrinsic_load_sample_pos_from_id:
4908       return emit_load_sample_pos_from_id(ctx, intr);
4909 
4910    case nir_intrinsic_is_helper_invocation:
4911       return emit_load_unary_external_function(
4912          ctx, intr, "dx.op.isHelperLane", DXIL_INTR_IS_HELPER_LANE, nir_type_int);
4913    case nir_intrinsic_elect:
4914       ctx->mod.feats.wave_ops = 1;
4915       return emit_load_unary_external_function(
4916          ctx, intr, "dx.op.waveIsFirstLane", DXIL_INTR_WAVE_IS_FIRST_LANE, nir_type_invalid);
4917    case nir_intrinsic_load_subgroup_size:
4918       ctx->mod.feats.wave_ops = 1;
4919       return emit_load_unary_external_function(
4920          ctx, intr, "dx.op.waveGetLaneCount", DXIL_INTR_WAVE_GET_LANE_COUNT, nir_type_invalid);
4921    case nir_intrinsic_load_subgroup_invocation:
4922       ctx->mod.feats.wave_ops = 1;
4923       return emit_load_unary_external_function(
4924          ctx, intr, "dx.op.waveGetLaneIndex", DXIL_INTR_WAVE_GET_LANE_INDEX, nir_type_invalid);
4925 
4926    case nir_intrinsic_vote_feq:
4927    case nir_intrinsic_vote_ieq:
4928       return emit_vote_eq(ctx, intr);
4929    case nir_intrinsic_vote_any:
4930    case nir_intrinsic_vote_all:
4931       return emit_vote(ctx, intr);
4932 
4933    case nir_intrinsic_ballot:
4934       return emit_ballot(ctx, intr);
4935 
4936    case nir_intrinsic_read_first_invocation:
4937       return emit_read_first_invocation(ctx, intr);
4938    case nir_intrinsic_read_invocation:
4939    case nir_intrinsic_shuffle:
4940    case nir_intrinsic_quad_broadcast:
4941       return emit_read_invocation(ctx, intr);
4942 
4943    case nir_intrinsic_quad_swap_horizontal:
4944       return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_X);
4945    case nir_intrinsic_quad_swap_vertical:
4946       return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_Y);
4947    case nir_intrinsic_quad_swap_diagonal:
4948       return emit_quad_op(ctx, intr, QUAD_READ_ACROSS_DIAGONAL);
4949 
4950    case nir_intrinsic_reduce:
4951    case nir_intrinsic_exclusive_scan:
4952       return emit_reduce(ctx, intr);
4953 
4954    case nir_intrinsic_ddx:
4955    case nir_intrinsic_ddx_coarse: return emit_derivative(ctx, intr, DXIL_INTR_DDX_COARSE);
4956    case nir_intrinsic_ddx_fine: return emit_derivative(ctx, intr, DXIL_INTR_DDX_FINE);
4957    case nir_intrinsic_ddy:
4958    case nir_intrinsic_ddy_coarse: return emit_derivative(ctx, intr, DXIL_INTR_DDY_COARSE);
4959    case nir_intrinsic_ddy_fine: return emit_derivative(ctx, intr, DXIL_INTR_DDY_FINE);
4960 
4961    case nir_intrinsic_load_first_vertex:
4962       ctx->mod.feats.extended_command_info = true;
4963       return emit_load_unary_external_function(ctx, intr, "dx.op.startVertexLocation",
4964                                                DXIL_INTR_START_VERTEX_LOCATION, nir_type_int);
4965    case nir_intrinsic_load_base_instance:
4966       ctx->mod.feats.extended_command_info = true;
4967       return emit_load_unary_external_function(ctx, intr, "dx.op.startInstanceLocation",
4968                                                DXIL_INTR_START_INSTANCE_LOCATION, nir_type_int);
4969 
4970    case nir_intrinsic_load_num_workgroups:
4971    case nir_intrinsic_load_workgroup_size:
4972    default:
4973       log_nir_instr_unsupported(
4974          ctx->logger, "Unimplemented intrinsic instruction", &intr->instr);
4975       return false;
4976    }
4977 }
4978 
4979 static const struct dxil_type *
dxil_type_for_const(struct ntd_context * ctx,nir_def * def)4980 dxil_type_for_const(struct ntd_context *ctx, nir_def *def)
4981 {
4982    if (BITSET_TEST(ctx->int_types, def->index) ||
4983        !BITSET_TEST(ctx->float_types, def->index))
4984       return dxil_module_get_int_type(&ctx->mod, def->bit_size);
4985    return dxil_module_get_float_type(&ctx->mod, def->bit_size);
4986 }
4987 
4988 static bool
emit_load_const(struct ntd_context * ctx,nir_load_const_instr * load_const)4989 emit_load_const(struct ntd_context *ctx, nir_load_const_instr *load_const)
4990 {
4991    for (uint32_t i = 0; i < load_const->def.num_components; ++i) {
4992       const struct dxil_type *type = dxil_type_for_const(ctx, &load_const->def);
4993       store_ssa_def(ctx, &load_const->def, i, get_value_for_const(&ctx->mod, &load_const->value[i], type));
4994    }
4995    return true;
4996 }
4997 
4998 static bool
emit_deref(struct ntd_context * ctx,nir_deref_instr * instr)4999 emit_deref(struct ntd_context* ctx, nir_deref_instr* instr)
5000 {
5001    /* There's two possible reasons we might be walking through derefs:
5002     * 1. Computing an index to be used for a texture/sampler/image binding, which
5003     *    can only do array indexing and should compute the indices along the way with
5004     *    array-of-array sizes.
5005     * 2. Storing an index to be used in a GEP for access to a variable.
5006     */
5007    nir_variable *var = nir_deref_instr_get_variable(instr);
5008    assert(var);
5009 
5010    bool is_aoa_size =
5011       glsl_type_is_sampler(glsl_without_array(var->type)) ||
5012       glsl_type_is_image(glsl_without_array(var->type)) ||
5013       glsl_type_is_texture(glsl_without_array(var->type));
5014 
5015    if (!is_aoa_size) {
5016       /* Just store the values, we'll use these to build a GEP in the load or store */
5017       switch (instr->deref_type) {
5018       case nir_deref_type_var:
5019          store_def(ctx, &instr->def, 0, dxil_module_get_int_const(&ctx->mod, 0, instr->def.bit_size));
5020          return true;
5021       case nir_deref_type_array:
5022          store_def(ctx, &instr->def, 0, get_src(ctx, &instr->arr.index, 0, nir_type_int));
5023          return true;
5024       case nir_deref_type_struct:
5025          store_def(ctx, &instr->def, 0, dxil_module_get_int_const(&ctx->mod, instr->strct.index, 32));
5026          return true;
5027       default:
5028          unreachable("Other deref types not supported");
5029       }
5030    }
5031 
5032    /* In the CL environment, there's nothing to emit. Any references to
5033     * derefs will emit the necessary logic to handle scratch/shared GEP addressing
5034     */
5035    if (ctx->opts->environment == DXIL_ENVIRONMENT_CL)
5036       return true;
5037 
5038    const struct glsl_type *type = instr->type;
5039    const struct dxil_value *binding;
5040    unsigned binding_val = ctx->opts->environment == DXIL_ENVIRONMENT_GL ?
5041       var->data.driver_location : var->data.binding;
5042 
5043    if (instr->deref_type == nir_deref_type_var) {
5044       binding = dxil_module_get_int32_const(&ctx->mod, binding_val);
5045    } else {
5046       const struct dxil_value *base = get_src(ctx, &instr->parent, 0, nir_type_uint32);
5047       const struct dxil_value *offset = get_src(ctx, &instr->arr.index, 0, nir_type_uint32);
5048       if (!base || !offset)
5049          return false;
5050 
5051       if (glsl_type_is_array(instr->type)) {
5052          offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_MUL, offset,
5053             dxil_module_get_int32_const(&ctx->mod, glsl_get_aoa_size(instr->type)), 0);
5054          if (!offset)
5055             return false;
5056       }
5057       binding = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, base, offset, 0);
5058    }
5059 
5060    if (!binding)
5061       return false;
5062 
5063    /* Haven't finished chasing the deref chain yet, just store the value */
5064    if (glsl_type_is_array(type)) {
5065       store_def(ctx, &instr->def, 0, binding);
5066       return true;
5067    }
5068 
5069    assert(glsl_type_is_sampler(type) || glsl_type_is_image(type) || glsl_type_is_texture(type));
5070    enum dxil_resource_class res_class;
5071    if (glsl_type_is_image(type))
5072       res_class = DXIL_RESOURCE_CLASS_UAV;
5073    else if (glsl_type_is_sampler(type))
5074       res_class = DXIL_RESOURCE_CLASS_SAMPLER;
5075    else
5076       res_class = DXIL_RESOURCE_CLASS_SRV;
5077 
5078    unsigned descriptor_set = ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN ?
5079       var->data.descriptor_set : (glsl_type_is_image(type) ? 1 : 0);
5080    const struct dxil_value *handle = emit_createhandle_call_dynamic(ctx, res_class,
5081       descriptor_set, binding_val, binding, false);
5082    if (!handle)
5083       return false;
5084 
5085    store_ssa_def(ctx, &instr->def, 0, handle);
5086    return true;
5087 }
5088 
5089 static bool
emit_cond_branch(struct ntd_context * ctx,const struct dxil_value * cond,int true_block,int false_block)5090 emit_cond_branch(struct ntd_context *ctx, const struct dxil_value *cond,
5091                  int true_block, int false_block)
5092 {
5093    assert(cond);
5094    assert(true_block >= 0);
5095    assert(false_block >= 0);
5096    return dxil_emit_branch(&ctx->mod, cond, true_block, false_block);
5097 }
5098 
5099 static bool
emit_branch(struct ntd_context * ctx,int block)5100 emit_branch(struct ntd_context *ctx, int block)
5101 {
5102    assert(block >= 0);
5103    return dxil_emit_branch(&ctx->mod, NULL, block, -1);
5104 }
5105 
5106 static bool
emit_jump(struct ntd_context * ctx,nir_jump_instr * instr)5107 emit_jump(struct ntd_context *ctx, nir_jump_instr *instr)
5108 {
5109    switch (instr->type) {
5110    case nir_jump_break:
5111    case nir_jump_continue:
5112       assert(instr->instr.block->successors[0]);
5113       assert(!instr->instr.block->successors[1]);
5114       return emit_branch(ctx, instr->instr.block->successors[0]->index);
5115 
5116    default:
5117       unreachable("Unsupported jump type\n");
5118    }
5119 }
5120 
5121 struct phi_block {
5122    unsigned num_components;
5123    struct dxil_instr *comp[NIR_MAX_VEC_COMPONENTS];
5124 };
5125 
5126 static bool
emit_phi(struct ntd_context * ctx,nir_phi_instr * instr)5127 emit_phi(struct ntd_context *ctx, nir_phi_instr *instr)
5128 {
5129    const struct dxil_type *type = NULL;
5130    nir_foreach_phi_src(src, instr) {
5131       /* All sources have the same type, just use the first one */
5132       type = dxil_value_get_type(ctx->defs[src->src.ssa->index].chans[0]);
5133       break;
5134    }
5135 
5136    struct phi_block *vphi = ralloc(ctx->phis, struct phi_block);
5137    vphi->num_components = instr->def.num_components;
5138 
5139    for (unsigned i = 0; i < vphi->num_components; ++i) {
5140       struct dxil_instr *phi = vphi->comp[i] = dxil_emit_phi(&ctx->mod, type);
5141       if (!phi)
5142          return false;
5143       store_ssa_def(ctx, &instr->def, i, dxil_instr_get_return_value(phi));
5144    }
5145    _mesa_hash_table_insert(ctx->phis, instr, vphi);
5146    return true;
5147 }
5148 
5149 static bool
fixup_phi(struct ntd_context * ctx,nir_phi_instr * instr,struct phi_block * vphi)5150 fixup_phi(struct ntd_context *ctx, nir_phi_instr *instr,
5151           struct phi_block *vphi)
5152 {
5153    const struct dxil_value *values[16];
5154    unsigned blocks[16];
5155    for (unsigned i = 0; i < vphi->num_components; ++i) {
5156       size_t num_incoming = 0;
5157       nir_foreach_phi_src(src, instr) {
5158          const struct dxil_value *val = get_src_ssa(ctx, src->src.ssa, i);
5159          values[num_incoming] = val;
5160          blocks[num_incoming] = src->pred->index;
5161          ++num_incoming;
5162          if (num_incoming == ARRAY_SIZE(values)) {
5163             if (!dxil_phi_add_incoming(vphi->comp[i], values, blocks,
5164                                        num_incoming))
5165                return false;
5166             num_incoming = 0;
5167          }
5168       }
5169       if (num_incoming > 0 && !dxil_phi_add_incoming(vphi->comp[i], values,
5170                                                      blocks, num_incoming))
5171          return false;
5172    }
5173    return true;
5174 }
5175 
5176 static unsigned
get_n_src(struct ntd_context * ctx,const struct dxil_value ** values,unsigned max_components,nir_tex_src * src,nir_alu_type type)5177 get_n_src(struct ntd_context *ctx, const struct dxil_value **values,
5178           unsigned max_components, nir_tex_src *src, nir_alu_type type)
5179 {
5180    unsigned num_components = nir_src_num_components(src->src);
5181    unsigned i = 0;
5182 
5183    assert(num_components <= max_components);
5184 
5185    for (i = 0; i < num_components; ++i) {
5186       values[i] = get_src(ctx, &src->src, i, type);
5187       if (!values[i])
5188          return 0;
5189    }
5190 
5191    return num_components;
5192 }
5193 
5194 #define PAD_SRC(ctx, array, components, undef) \
5195    for (unsigned i = components; i < ARRAY_SIZE(array); ++i) { \
5196       array[i] = undef; \
5197    }
5198 
5199 static const struct dxil_value *
emit_sample(struct ntd_context * ctx,struct texop_parameters * params)5200 emit_sample(struct ntd_context *ctx, struct texop_parameters *params)
5201 {
5202    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sample", params->overload);
5203    if (!func)
5204       return NULL;
5205 
5206    const struct dxil_value *args[11] = {
5207       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE),
5208       params->tex, params->sampler,
5209       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5210       params->offset[0], params->offset[1], params->offset[2],
5211       params->min_lod
5212    };
5213 
5214    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5215 }
5216 
5217 static const struct dxil_value *
emit_sample_bias(struct ntd_context * ctx,struct texop_parameters * params)5218 emit_sample_bias(struct ntd_context *ctx, struct texop_parameters *params)
5219 {
5220    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleBias", params->overload);
5221    if (!func)
5222       return NULL;
5223 
5224    assert(params->bias != NULL);
5225 
5226    const struct dxil_value *args[12] = {
5227       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_BIAS),
5228       params->tex, params->sampler,
5229       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5230       params->offset[0], params->offset[1], params->offset[2],
5231       params->bias, params->min_lod
5232    };
5233 
5234    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5235 }
5236 
5237 static const struct dxil_value *
emit_sample_level(struct ntd_context * ctx,struct texop_parameters * params)5238 emit_sample_level(struct ntd_context *ctx, struct texop_parameters *params)
5239 {
5240    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleLevel", params->overload);
5241    if (!func)
5242       return NULL;
5243 
5244    assert(params->lod_or_sample != NULL);
5245 
5246    const struct dxil_value *args[11] = {
5247       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_LEVEL),
5248       params->tex, params->sampler,
5249       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5250       params->offset[0], params->offset[1], params->offset[2],
5251       params->lod_or_sample
5252    };
5253 
5254    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5255 }
5256 
5257 static const struct dxil_value *
emit_sample_cmp(struct ntd_context * ctx,struct texop_parameters * params)5258 emit_sample_cmp(struct ntd_context *ctx, struct texop_parameters *params)
5259 {
5260    const struct dxil_func *func;
5261    enum dxil_intr opcode;
5262 
5263    func = dxil_get_function(&ctx->mod, "dx.op.sampleCmp", DXIL_F32);
5264    opcode = DXIL_INTR_SAMPLE_CMP;
5265 
5266    if (!func)
5267       return NULL;
5268 
5269    const struct dxil_value *args[12] = {
5270       dxil_module_get_int32_const(&ctx->mod, opcode),
5271       params->tex, params->sampler,
5272       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5273       params->offset[0], params->offset[1], params->offset[2],
5274       params->cmp, params->min_lod
5275    };
5276 
5277    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5278 }
5279 
5280 static const struct dxil_value *
emit_sample_cmp_level_zero(struct ntd_context * ctx,struct texop_parameters * params)5281 emit_sample_cmp_level_zero(struct ntd_context *ctx, struct texop_parameters *params)
5282 {
5283    const struct dxil_func *func;
5284    enum dxil_intr opcode;
5285 
5286    func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevelZero", DXIL_F32);
5287    opcode = DXIL_INTR_SAMPLE_CMP_LVL_ZERO;
5288 
5289    if (!func)
5290       return NULL;
5291 
5292    const struct dxil_value *args[11] = {
5293       dxil_module_get_int32_const(&ctx->mod, opcode),
5294       params->tex, params->sampler,
5295       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5296       params->offset[0], params->offset[1], params->offset[2],
5297       params->cmp
5298    };
5299 
5300    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5301 }
5302 
5303 static const struct dxil_value *
emit_sample_cmp_level(struct ntd_context * ctx,struct texop_parameters * params)5304 emit_sample_cmp_level(struct ntd_context *ctx, struct texop_parameters *params)
5305 {
5306    ctx->mod.feats.advanced_texture_ops = true;
5307    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevel", params->overload);
5308    if (!func)
5309       return NULL;
5310 
5311    assert(params->lod_or_sample != NULL);
5312 
5313    const struct dxil_value *args[12] = {
5314       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_LEVEL),
5315       params->tex, params->sampler,
5316       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5317       params->offset[0], params->offset[1], params->offset[2],
5318       params->cmp, params->lod_or_sample
5319    };
5320 
5321    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5322 }
5323 
5324 static const struct dxil_value *
emit_sample_cmp_bias(struct ntd_context * ctx,struct texop_parameters * params)5325 emit_sample_cmp_bias(struct ntd_context *ctx, struct texop_parameters *params)
5326 {
5327    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpBias", params->overload);
5328    if (!func)
5329       return NULL;
5330 
5331    assert(params->bias != NULL);
5332    ctx->mod.feats.sample_cmp_bias_gradient = 1;
5333 
5334    const struct dxil_value *args[13] = {
5335       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_BIAS),
5336       params->tex, params->sampler,
5337       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5338       params->offset[0], params->offset[1], params->offset[2],
5339       params->cmp, params->bias, params->min_lod
5340    };
5341 
5342    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5343 }
5344 
5345 static const struct dxil_value *
emit_sample_grad(struct ntd_context * ctx,struct texop_parameters * params)5346 emit_sample_grad(struct ntd_context *ctx, struct texop_parameters *params)
5347 {
5348    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleGrad", params->overload);
5349    if (!func)
5350       return false;
5351 
5352    const struct dxil_value *args[17] = {
5353       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_GRAD),
5354       params->tex, params->sampler,
5355       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5356       params->offset[0], params->offset[1], params->offset[2],
5357       params->dx[0], params->dx[1], params->dx[2],
5358       params->dy[0], params->dy[1], params->dy[2],
5359       params->min_lod
5360    };
5361 
5362    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5363 }
5364 
5365 static const struct dxil_value *
emit_sample_cmp_grad(struct ntd_context * ctx,struct texop_parameters * params)5366 emit_sample_cmp_grad(struct ntd_context *ctx, struct texop_parameters *params)
5367 {
5368    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpGrad", params->overload);
5369    if (!func)
5370       return false;
5371 
5372    ctx->mod.feats.sample_cmp_bias_gradient = 1;
5373 
5374    const struct dxil_value *args[18] = {
5375       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_CMP_GRAD),
5376       params->tex, params->sampler,
5377       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
5378       params->offset[0], params->offset[1], params->offset[2],
5379       params->cmp,
5380       params->dx[0], params->dx[1], params->dx[2],
5381       params->dy[0], params->dy[1], params->dy[2],
5382       params->min_lod
5383    };
5384 
5385    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5386 }
5387 
5388 static const struct dxil_value *
emit_texel_fetch(struct ntd_context * ctx,struct texop_parameters * params)5389 emit_texel_fetch(struct ntd_context *ctx, struct texop_parameters *params)
5390 {
5391    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", params->overload);
5392    if (!func)
5393       return false;
5394 
5395    if (!params->lod_or_sample)
5396       params->lod_or_sample = dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32));
5397 
5398    const struct dxil_value *args[] = {
5399       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD),
5400       params->tex,
5401       params->lod_or_sample, params->coord[0], params->coord[1], params->coord[2],
5402       params->offset[0], params->offset[1], params->offset[2]
5403    };
5404 
5405    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5406 }
5407 
5408 static const struct dxil_value *
emit_texture_lod(struct ntd_context * ctx,struct texop_parameters * params,bool clamped)5409 emit_texture_lod(struct ntd_context *ctx, struct texop_parameters *params, bool clamped)
5410 {
5411    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.calculateLOD", DXIL_F32);
5412    if (!func)
5413       return false;
5414 
5415    const struct dxil_value *args[] = {
5416       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOD),
5417       params->tex,
5418       params->sampler,
5419       params->coord[0],
5420       params->coord[1],
5421       params->coord[2],
5422       dxil_module_get_int1_const(&ctx->mod, clamped ? 1 : 0)
5423    };
5424 
5425    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
5426 }
5427 
5428 static const struct dxil_value *
emit_texture_gather(struct ntd_context * ctx,struct texop_parameters * params,unsigned component)5429 emit_texture_gather(struct ntd_context *ctx, struct texop_parameters *params, unsigned component)
5430 {
5431    const struct dxil_func *func = dxil_get_function(&ctx->mod,
5432       params->cmp ? "dx.op.textureGatherCmp" : "dx.op.textureGather", params->overload);
5433    if (!func)
5434       return false;
5435 
5436    const struct dxil_value *args[] = {
5437       dxil_module_get_int32_const(&ctx->mod, params->cmp ?
5438          DXIL_INTR_TEXTURE_GATHER_CMP : DXIL_INTR_TEXTURE_GATHER),
5439       params->tex,
5440       params->sampler,
5441       params->coord[0],
5442       params->coord[1],
5443       params->coord[2],
5444       params->coord[3],
5445       params->offset[0],
5446       params->offset[1],
5447       dxil_module_get_int32_const(&ctx->mod, component),
5448       params->cmp
5449    };
5450 
5451    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args) - (params->cmp ? 0 : 1));
5452 }
5453 
5454 static bool
emit_tex(struct ntd_context * ctx,nir_tex_instr * instr)5455 emit_tex(struct ntd_context *ctx, nir_tex_instr *instr)
5456 {
5457    struct texop_parameters params;
5458    memset(&params, 0, sizeof(struct texop_parameters));
5459    if (ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN) {
5460       params.tex = ctx->srv_handles[instr->texture_index];
5461       params.sampler = ctx->sampler_handles[instr->sampler_index];
5462    }
5463 
5464    const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
5465    const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32);
5466    const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
5467    const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type);
5468 
5469    unsigned coord_components = 0, offset_components = 0, dx_components = 0, dy_components = 0;
5470    params.overload = get_overload(instr->dest_type, 32);
5471 
5472    bool lod_is_zero = false;
5473    for (unsigned i = 0; i < instr->num_srcs; i++) {
5474       nir_alu_type type = nir_tex_instr_src_type(instr, i);
5475 
5476       switch (instr->src[i].src_type) {
5477       case nir_tex_src_coord:
5478          coord_components = get_n_src(ctx, params.coord, ARRAY_SIZE(params.coord),
5479                                       &instr->src[i], type);
5480          if (!coord_components)
5481             return false;
5482          break;
5483 
5484       case nir_tex_src_offset:
5485          offset_components = get_n_src(ctx, params.offset, ARRAY_SIZE(params.offset),
5486                                        &instr->src[i],  nir_type_int);
5487          if (!offset_components)
5488             return false;
5489 
5490          /* Dynamic offsets were only allowed with gather, until "advanced texture ops" in SM7 */
5491          if (!nir_src_is_const(instr->src[i].src) && instr->op != nir_texop_tg4)
5492             ctx->mod.feats.advanced_texture_ops = true;
5493          break;
5494 
5495       case nir_tex_src_bias:
5496          assert(instr->op == nir_texop_txb);
5497          assert(nir_src_num_components(instr->src[i].src) == 1);
5498          params.bias = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
5499          if (!params.bias)
5500             return false;
5501          break;
5502 
5503       case nir_tex_src_lod:
5504          assert(nir_src_num_components(instr->src[i].src) == 1);
5505          if (instr->op == nir_texop_txf_ms) {
5506             assert(nir_src_as_int(instr->src[i].src) == 0);
5507             break;
5508          }
5509 
5510          /* Buffers don't have a LOD */
5511          if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF)
5512             params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, type);
5513          else
5514             params.lod_or_sample = int_undef;
5515          if (!params.lod_or_sample)
5516             return false;
5517 
5518          if (nir_src_is_const(instr->src[i].src) && nir_src_as_float(instr->src[i].src) == 0.0f)
5519             lod_is_zero = true;
5520          break;
5521 
5522       case nir_tex_src_min_lod:
5523          assert(nir_src_num_components(instr->src[i].src) == 1);
5524          params.min_lod = get_src(ctx, &instr->src[i].src, 0, type);
5525          if (!params.min_lod)
5526             return false;
5527          break;
5528 
5529       case nir_tex_src_comparator:
5530          assert(nir_src_num_components(instr->src[i].src) == 1);
5531          params.cmp = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
5532          if (!params.cmp)
5533             return false;
5534          break;
5535 
5536       case nir_tex_src_ddx:
5537          dx_components = get_n_src(ctx, params.dx, ARRAY_SIZE(params.dx),
5538                                    &instr->src[i], nir_type_float);
5539          if (!dx_components)
5540             return false;
5541          break;
5542 
5543       case nir_tex_src_ddy:
5544          dy_components = get_n_src(ctx, params.dy, ARRAY_SIZE(params.dy),
5545                                    &instr->src[i], nir_type_float);
5546          if (!dy_components)
5547             return false;
5548          break;
5549 
5550       case nir_tex_src_ms_index:
5551          params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, nir_type_int);
5552          if (!params.lod_or_sample)
5553             return false;
5554          break;
5555 
5556       case nir_tex_src_texture_deref:
5557          assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
5558          params.tex = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
5559          break;
5560 
5561       case nir_tex_src_sampler_deref:
5562          assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
5563          params.sampler = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
5564          break;
5565 
5566       case nir_tex_src_texture_offset:
5567          params.tex = emit_createhandle_call_dynamic(ctx, DXIL_RESOURCE_CLASS_SRV,
5568             0, instr->texture_index,
5569             dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
5570                get_src(ctx, &instr->src[i].src, 0, nir_type_uint),
5571                dxil_module_get_int32_const(&ctx->mod, instr->texture_index), 0),
5572             instr->texture_non_uniform);
5573          break;
5574 
5575       case nir_tex_src_sampler_offset:
5576          if (nir_tex_instr_need_sampler(instr)) {
5577             params.sampler = emit_createhandle_call_dynamic(ctx, DXIL_RESOURCE_CLASS_SAMPLER,
5578                0, instr->sampler_index,
5579                dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
5580                   get_src(ctx, &instr->src[i].src, 0, nir_type_uint),
5581                   dxil_module_get_int32_const(&ctx->mod, instr->sampler_index), 0),
5582                instr->sampler_non_uniform);
5583          }
5584          break;
5585 
5586       case nir_tex_src_texture_handle:
5587          params.tex = create_srv_handle(ctx, instr, &instr->src[i].src);
5588          break;
5589 
5590       case nir_tex_src_sampler_handle:
5591          if (nir_tex_instr_need_sampler(instr))
5592             params.sampler = create_sampler_handle(ctx, instr->is_shadow, &instr->src[i].src);
5593          break;
5594 
5595       case nir_tex_src_projector:
5596          unreachable("Texture projector should have been lowered");
5597 
5598       default:
5599          fprintf(stderr, "texture source: %d\n", instr->src[i].src_type);
5600          unreachable("unknown texture source");
5601       }
5602    }
5603 
5604    assert(params.tex != NULL);
5605    assert(instr->op == nir_texop_txf ||
5606           instr->op == nir_texop_txf_ms ||
5607           nir_tex_instr_is_query(instr) ||
5608           params.sampler != NULL);
5609 
5610    PAD_SRC(ctx, params.coord, coord_components, float_undef);
5611    PAD_SRC(ctx, params.offset, offset_components, int_undef);
5612    if (!params.min_lod) params.min_lod = float_undef;
5613 
5614    const struct dxil_value *sample = NULL;
5615    switch (instr->op) {
5616    case nir_texop_txb:
5617       if (params.cmp != NULL && ctx->mod.minor_version >= 8)
5618          sample = emit_sample_cmp_bias(ctx, &params);
5619       else
5620          sample = emit_sample_bias(ctx, &params);
5621       break;
5622 
5623    case nir_texop_tex:
5624       if (params.cmp != NULL) {
5625          sample = emit_sample_cmp(ctx, &params);
5626          break;
5627       } else if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) {
5628          sample = emit_sample(ctx, &params);
5629          break;
5630       }
5631       params.lod_or_sample = dxil_module_get_float_const(&ctx->mod, 0);
5632       lod_is_zero = true;
5633       FALLTHROUGH;
5634    case nir_texop_txl:
5635       if (lod_is_zero && params.cmp != NULL && ctx->mod.minor_version < 7) {
5636          /* Prior to SM 6.7, if the level is constant 0.0, ignore the LOD argument,
5637           * so level-less DXIL instructions are used. This is needed to avoid emitting
5638           * dx.op.sampleCmpLevel, which would not be available.
5639           */
5640          sample = emit_sample_cmp_level_zero(ctx, &params);
5641       } else {
5642          if (params.cmp != NULL)
5643             sample = emit_sample_cmp_level(ctx, &params);
5644          else
5645             sample = emit_sample_level(ctx, &params);
5646       }
5647       break;
5648 
5649    case nir_texop_txd:
5650       PAD_SRC(ctx, params.dx, dx_components, float_undef);
5651       PAD_SRC(ctx, params.dy, dy_components,float_undef);
5652       if (params.cmp != NULL && ctx->mod.minor_version >= 8)
5653          sample = emit_sample_cmp_grad(ctx, &params);
5654       else
5655          sample = emit_sample_grad(ctx, &params);
5656       break;
5657 
5658    case nir_texop_txf:
5659    case nir_texop_txf_ms:
5660       if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
5661          params.coord[1] = int_undef;
5662          sample = emit_bufferload_call(ctx, params.tex, params.coord, params.overload);
5663       } else {
5664          PAD_SRC(ctx, params.coord, coord_components, int_undef);
5665          sample = emit_texel_fetch(ctx, &params);
5666       }
5667       break;
5668 
5669    case nir_texop_txs:
5670       sample = emit_texture_size(ctx, &params);
5671       break;
5672 
5673    case nir_texop_tg4:
5674       sample = emit_texture_gather(ctx, &params, instr->component);
5675       break;
5676 
5677    case nir_texop_lod:
5678       sample = emit_texture_lod(ctx, &params, true);
5679       store_def(ctx, &instr->def, 0, sample);
5680       sample = emit_texture_lod(ctx, &params, false);
5681       store_def(ctx, &instr->def, 1, sample);
5682       return true;
5683 
5684    case nir_texop_query_levels: {
5685       params.lod_or_sample = dxil_module_get_int_const(&ctx->mod, 0, 32);
5686       sample = emit_texture_size(ctx, &params);
5687       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
5688       store_def(ctx, &instr->def, 0, retval);
5689       return true;
5690    }
5691 
5692    case nir_texop_texture_samples: {
5693       params.lod_or_sample = int_undef;
5694       sample = emit_texture_size(ctx, &params);
5695       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
5696       store_def(ctx, &instr->def, 0, retval);
5697       return true;
5698    }
5699 
5700    default:
5701       fprintf(stderr, "texture op: %d\n", instr->op);
5702       unreachable("unknown texture op");
5703    }
5704 
5705    if (!sample)
5706       return false;
5707 
5708    for (unsigned i = 0; i < instr->def.num_components; ++i) {
5709       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, i);
5710       store_def(ctx, &instr->def, i, retval);
5711    }
5712 
5713    return true;
5714 }
5715 
5716 static bool
emit_undefined(struct ntd_context * ctx,nir_undef_instr * undef)5717 emit_undefined(struct ntd_context *ctx, nir_undef_instr *undef)
5718 {
5719    for (unsigned i = 0; i < undef->def.num_components; ++i)
5720       store_ssa_def(ctx, &undef->def, i, dxil_module_get_int32_const(&ctx->mod, 0));
5721    return true;
5722 }
5723 
emit_instr(struct ntd_context * ctx,struct nir_instr * instr)5724 static bool emit_instr(struct ntd_context *ctx, struct nir_instr* instr)
5725 {
5726    switch (instr->type) {
5727    case nir_instr_type_alu:
5728       return emit_alu(ctx, nir_instr_as_alu(instr));
5729    case nir_instr_type_intrinsic:
5730       return emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
5731    case nir_instr_type_load_const:
5732       return emit_load_const(ctx, nir_instr_as_load_const(instr));
5733    case nir_instr_type_deref:
5734       return emit_deref(ctx, nir_instr_as_deref(instr));
5735    case nir_instr_type_jump:
5736       return emit_jump(ctx, nir_instr_as_jump(instr));
5737    case nir_instr_type_phi:
5738       return emit_phi(ctx, nir_instr_as_phi(instr));
5739    case nir_instr_type_tex:
5740       return emit_tex(ctx, nir_instr_as_tex(instr));
5741    case nir_instr_type_undef:
5742       return emit_undefined(ctx, nir_instr_as_undef(instr));
5743    default:
5744       log_nir_instr_unsupported(ctx->logger, "Unimplemented instruction type",
5745                                 instr);
5746       return false;
5747    }
5748 }
5749 
5750 
5751 static bool
emit_block(struct ntd_context * ctx,struct nir_block * block)5752 emit_block(struct ntd_context *ctx, struct nir_block *block)
5753 {
5754    assert(block->index < ctx->mod.cur_emitting_func->num_basic_block_ids);
5755    ctx->mod.cur_emitting_func->basic_block_ids[block->index] = ctx->mod.cur_emitting_func->curr_block;
5756 
5757    nir_foreach_instr(instr, block) {
5758       TRACE_CONVERSION(instr);
5759 
5760       if (!emit_instr(ctx, instr))  {
5761          return false;
5762       }
5763    }
5764    return true;
5765 }
5766 
5767 static bool
5768 emit_cf_list(struct ntd_context *ctx, struct exec_list *list);
5769 
5770 static bool
emit_if(struct ntd_context * ctx,struct nir_if * if_stmt)5771 emit_if(struct ntd_context *ctx, struct nir_if *if_stmt)
5772 {
5773    assert(nir_src_num_components(if_stmt->condition) == 1);
5774    const struct dxil_value *cond = get_src(ctx, &if_stmt->condition, 0,
5775                                            nir_type_bool);
5776    if (!cond)
5777       return false;
5778 
5779    /* prepare blocks */
5780    nir_block *then_block = nir_if_first_then_block(if_stmt);
5781    assert(nir_if_last_then_block(if_stmt)->successors[0]);
5782    assert(!nir_if_last_then_block(if_stmt)->successors[1]);
5783    int then_succ = nir_if_last_then_block(if_stmt)->successors[0]->index;
5784 
5785    nir_block *else_block = NULL;
5786    int else_succ = -1;
5787    if (!exec_list_is_empty(&if_stmt->else_list)) {
5788       else_block = nir_if_first_else_block(if_stmt);
5789       assert(nir_if_last_else_block(if_stmt)->successors[0]);
5790       assert(!nir_if_last_else_block(if_stmt)->successors[1]);
5791       else_succ = nir_if_last_else_block(if_stmt)->successors[0]->index;
5792    }
5793 
5794    if (!emit_cond_branch(ctx, cond, then_block->index,
5795                          else_block ? else_block->index : then_succ))
5796       return false;
5797 
5798    /* handle then-block */
5799    if (!emit_cf_list(ctx, &if_stmt->then_list) ||
5800        (!nir_block_ends_in_jump(nir_if_last_then_block(if_stmt)) &&
5801         !emit_branch(ctx, then_succ)))
5802       return false;
5803 
5804    if (else_block) {
5805       /* handle else-block */
5806       if (!emit_cf_list(ctx, &if_stmt->else_list) ||
5807           (!nir_block_ends_in_jump(nir_if_last_else_block(if_stmt)) &&
5808            !emit_branch(ctx, else_succ)))
5809          return false;
5810    }
5811 
5812    return true;
5813 }
5814 
5815 static bool
emit_loop(struct ntd_context * ctx,nir_loop * loop)5816 emit_loop(struct ntd_context *ctx, nir_loop *loop)
5817 {
5818    assert(!nir_loop_has_continue_construct(loop));
5819    nir_block *first_block = nir_loop_first_block(loop);
5820    nir_block *last_block = nir_loop_last_block(loop);
5821 
5822    assert(last_block->successors[0]);
5823    assert(!last_block->successors[1]);
5824 
5825    if (!emit_branch(ctx, first_block->index))
5826       return false;
5827 
5828    if (!emit_cf_list(ctx, &loop->body))
5829       return false;
5830 
5831    /* If the loop's last block doesn't explicitly jump somewhere, then there's
5832     * an implicit continue that should take it back to the first loop block
5833     */
5834    nir_instr *last_instr = nir_block_last_instr(last_block);
5835    if ((!last_instr || last_instr->type != nir_instr_type_jump) &&
5836        !emit_branch(ctx, first_block->index))
5837       return false;
5838 
5839    return true;
5840 }
5841 
5842 static bool
emit_cf_list(struct ntd_context * ctx,struct exec_list * list)5843 emit_cf_list(struct ntd_context *ctx, struct exec_list *list)
5844 {
5845    foreach_list_typed(nir_cf_node, node, node, list) {
5846       switch (node->type) {
5847       case nir_cf_node_block:
5848          if (!emit_block(ctx, nir_cf_node_as_block(node)))
5849             return false;
5850          break;
5851 
5852       case nir_cf_node_if:
5853          if (!emit_if(ctx, nir_cf_node_as_if(node)))
5854             return false;
5855          break;
5856 
5857       case nir_cf_node_loop:
5858          if (!emit_loop(ctx, nir_cf_node_as_loop(node)))
5859             return false;
5860          break;
5861 
5862       default:
5863          unreachable("unsupported cf-list node");
5864          break;
5865       }
5866    }
5867    return true;
5868 }
5869 
5870 static void
insert_sorted_by_binding(struct exec_list * var_list,nir_variable * new_var)5871 insert_sorted_by_binding(struct exec_list *var_list, nir_variable *new_var)
5872 {
5873    nir_foreach_variable_in_list(var, var_list) {
5874       if (var->data.binding > new_var->data.binding) {
5875          exec_node_insert_node_before(&var->node, &new_var->node);
5876          return;
5877       }
5878    }
5879    exec_list_push_tail(var_list, &new_var->node);
5880 }
5881 
5882 
5883 static void
sort_uniforms_by_binding_and_remove_structs(nir_shader * s)5884 sort_uniforms_by_binding_and_remove_structs(nir_shader *s)
5885 {
5886    struct exec_list new_list;
5887    exec_list_make_empty(&new_list);
5888 
5889    nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) {
5890       exec_node_remove(&var->node);
5891       const struct glsl_type *type = glsl_without_array(var->type);
5892       if (!glsl_type_is_struct(type))
5893          insert_sorted_by_binding(&new_list, var);
5894    }
5895    exec_list_append(&s->variables, &new_list);
5896 }
5897 
5898 static bool
emit_cbvs(struct ntd_context * ctx)5899 emit_cbvs(struct ntd_context *ctx)
5900 {
5901    if (ctx->opts->environment != DXIL_ENVIRONMENT_GL) {
5902       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ubo) {
5903          if (!emit_ubo_var(ctx, var))
5904             return false;
5905       }
5906    } else {
5907       if (ctx->shader->info.num_ubos) {
5908          const unsigned ubo_size = 16384 /*4096 vec4's*/;
5909          uint array_base = ctx->shader->info.first_ubo_is_default_ubo ? 1 : 0;
5910          bool has_ubo0 = ctx->shader->num_uniforms > 0 && ctx->shader->info.first_ubo_is_default_ubo;
5911          bool has_state_vars = ctx->opts->last_ubo_is_not_arrayed;
5912          unsigned ubo1_array_size = ctx->shader->info.num_ubos - array_base -
5913             (has_state_vars ? 1 : 0);
5914 
5915          if (has_ubo0 &&
5916              !emit_cbv(ctx, 0, 0, ubo_size, 1, "__ubo_uniforms"))
5917             return false;
5918          if (ubo1_array_size &&
5919              !emit_cbv(ctx, array_base, 0, ubo_size, ubo1_array_size, "__ubos"))
5920             return false;
5921          if (has_state_vars &&
5922              !emit_cbv(ctx, ctx->shader->info.num_ubos - 1, 0, ubo_size, 1, "__ubo_state_vars"))
5923             return false;
5924       }
5925    }
5926 
5927    return true;
5928 }
5929 
5930 static bool
emit_scratch(struct ntd_context * ctx,nir_function_impl * impl)5931 emit_scratch(struct ntd_context *ctx, nir_function_impl *impl)
5932 {
5933    uint32_t index = 0;
5934    nir_foreach_function_temp_variable(var, impl)
5935       var->data.driver_location = index++;
5936 
5937    if (ctx->scratchvars)
5938       ralloc_free((void *)ctx->scratchvars);
5939 
5940    ctx->scratchvars = ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, index);
5941 
5942    nir_foreach_function_temp_variable(var, impl) {
5943       const struct dxil_type *type = get_type_for_glsl_type(&ctx->mod, var->type);
5944       const struct dxil_value *length = dxil_module_get_int32_const(&ctx->mod, 1);
5945       const struct dxil_value *ptr = dxil_emit_alloca(&ctx->mod, type, length, 16);
5946       if (!ptr)
5947          return false;
5948 
5949       ctx->scratchvars[var->data.driver_location] = ptr;
5950    }
5951 
5952    return true;
5953 }
5954 
5955 static bool
emit_function(struct ntd_context * ctx,nir_function * func,nir_function_impl * impl)5956 emit_function(struct ntd_context *ctx, nir_function *func, nir_function_impl *impl)
5957 {
5958    assert(func->num_params == 0);
5959    nir_metadata_require(impl, nir_metadata_block_index);
5960 
5961    const char *attr_keys[2] = { NULL };
5962    const char *attr_values[2] = { NULL };
5963    if (ctx->shader->info.float_controls_execution_mode &
5964        (FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32 | FLOAT_CONTROLS_DENORM_PRESERVE_FP32))
5965       attr_keys[0] = "fp32-denorm-mode";
5966    if (ctx->shader->info.float_controls_execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32)
5967       attr_values[0] = "ftz";
5968    else if (ctx->shader->info.float_controls_execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32)
5969       attr_values[0] = "preserve";
5970 
5971    const struct dxil_type *void_type = dxil_module_get_void_type(&ctx->mod);
5972    const struct dxil_type *func_type = dxil_module_add_function_type(&ctx->mod, void_type, NULL, 0);
5973    struct dxil_func_def *func_def = dxil_add_function_def(&ctx->mod, func->name, func_type, impl->num_blocks, attr_keys, attr_values);
5974    if (!func_def)
5975       return false;
5976 
5977    if (func->is_entrypoint)
5978       ctx->main_func_def = func_def;
5979    else if (func == ctx->tess_ctrl_patch_constant_func)
5980       ctx->tess_ctrl_patch_constant_func_def = func_def;
5981 
5982    ctx->defs = rzalloc_array(ctx->ralloc_ctx, struct dxil_def, impl->ssa_alloc);
5983    ctx->float_types = rzalloc_array(ctx->ralloc_ctx, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc));
5984    ctx->int_types = rzalloc_array(ctx->ralloc_ctx, BITSET_WORD, BITSET_WORDS(impl->ssa_alloc));
5985    if (!ctx->defs || !ctx->float_types || !ctx->int_types)
5986       return false;
5987    ctx->num_defs = impl->ssa_alloc;
5988 
5989    ctx->phis = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
5990    if (!ctx->phis)
5991       return false;
5992 
5993    nir_gather_types(impl, ctx->float_types, ctx->int_types);
5994 
5995    if (!emit_scratch(ctx, impl))
5996       return false;
5997 
5998    if (!emit_static_indexing_handles(ctx))
5999       return false;
6000 
6001    if (!emit_cf_list(ctx, &impl->body))
6002       return false;
6003 
6004    hash_table_foreach(ctx->phis, entry) {
6005       if (!fixup_phi(ctx, (nir_phi_instr *)entry->key,
6006                      (struct phi_block *)entry->data))
6007          return false;
6008    }
6009 
6010    if (!dxil_emit_ret_void(&ctx->mod))
6011       return false;
6012 
6013    ralloc_free(ctx->defs);
6014    ctx->defs = NULL;
6015    _mesa_hash_table_destroy(ctx->phis, NULL);
6016    return true;
6017 }
6018 
6019 static bool
emit_module(struct ntd_context * ctx,const struct nir_to_dxil_options * opts)6020 emit_module(struct ntd_context *ctx, const struct nir_to_dxil_options *opts)
6021 {
6022    /* The validator forces us to emit resources in a specific order:
6023     * CBVs, Samplers, SRVs, UAVs. While we are at it also remove
6024     * stale struct uniforms, they are lowered but might not have been removed */
6025    sort_uniforms_by_binding_and_remove_structs(ctx->shader);
6026 
6027    /* CBVs */
6028    if (!emit_cbvs(ctx))
6029       return false;
6030 
6031    /* Samplers */
6032    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
6033       unsigned count = glsl_type_get_sampler_count(var->type);
6034       assert(count == 0 || glsl_type_is_bare_sampler(glsl_without_array(var->type)));
6035       if (count > 0 && !emit_sampler(ctx, var, count))
6036          return false;
6037    }
6038 
6039    /* SRVs */
6040    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
6041       unsigned count = glsl_type_get_texture_count(var->type);
6042       assert(count == 0 || glsl_type_is_texture(glsl_without_array(var->type)));
6043       if (count > 0 && !emit_srv(ctx, var, count))
6044          return false;
6045    }
6046 
6047    /* Handle read-only SSBOs as SRVs */
6048    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6049       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
6050          if ((var->data.access & ACCESS_NON_WRITEABLE) != 0) {
6051             unsigned count = 1;
6052             if (glsl_type_is_array(var->type))
6053                count = glsl_get_length(var->type);
6054             if (!emit_srv(ctx, var, count))
6055                return false;
6056          }
6057       }
6058    }
6059 
6060    if (!emit_shared_vars(ctx))
6061       return false;
6062    if (!emit_global_consts(ctx))
6063       return false;
6064 
6065    /* UAVs */
6066    if (ctx->shader->info.stage == MESA_SHADER_KERNEL) {
6067       if (!emit_globals(ctx, opts->num_kernel_globals))
6068          return false;
6069 
6070    } else if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6071       /* Handle read/write SSBOs as UAVs */
6072       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
6073          if ((var->data.access & ACCESS_NON_WRITEABLE) == 0) {
6074             unsigned count = 1;
6075             if (glsl_type_is_array(var->type))
6076                count = glsl_get_length(var->type);
6077             if (!emit_uav(ctx, var->data.binding, var->data.descriptor_set,
6078                         count, DXIL_COMP_TYPE_INVALID, 1,
6079                         DXIL_RESOURCE_KIND_RAW_BUFFER, var->data.access, var->name))
6080                return false;
6081 
6082          }
6083       }
6084    } else {
6085       for (unsigned i = 0; i < ctx->shader->info.num_ssbos; ++i) {
6086          char name[64];
6087          snprintf(name, sizeof(name), "__ssbo%d", i);
6088          if (!emit_uav(ctx, i, 0, 1, DXIL_COMP_TYPE_INVALID, 1,
6089                        DXIL_RESOURCE_KIND_RAW_BUFFER, 0, name))
6090             return false;
6091       }
6092       /* To work around a WARP bug, bind these descriptors a second time in descriptor
6093        * space 2. Space 0 will be used for static indexing, while space 2 will be used
6094        * for dynamic indexing. Space 0 will be individual SSBOs in the DXIL shader, while
6095        * space 2 will be a single array.
6096        */
6097       if (ctx->shader->info.num_ssbos &&
6098           !emit_uav(ctx, 0, 2, ctx->shader->info.num_ssbos, DXIL_COMP_TYPE_INVALID, 1,
6099                     DXIL_RESOURCE_KIND_RAW_BUFFER, 0, "__ssbo_dynamic"))
6100          return false;
6101    }
6102 
6103    nir_foreach_image_variable(var, ctx->shader) {
6104       if (!emit_uav_var(ctx, var, glsl_type_get_image_count(var->type)))
6105          return false;
6106    }
6107 
6108    ctx->mod.info.has_per_sample_input =
6109       BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
6110       ctx->shader->info.fs.uses_sample_shading ||
6111       ctx->shader->info.fs.uses_sample_qualifier;
6112    if (!ctx->mod.info.has_per_sample_input && ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
6113       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in | nir_var_system_value) {
6114          if (var->data.sample) {
6115             ctx->mod.info.has_per_sample_input = true;
6116             break;
6117          }
6118       }
6119    }
6120 
6121    /* From the Vulkan spec 1.3.238, section 15.8:
6122     * When Sample Shading is enabled, the x and y components of FragCoord reflect the location
6123     * of one of the samples corresponding to the shader invocation.
6124     *
6125     * In other words, if the fragment shader is executing per-sample, then the position variable
6126     * should always be per-sample,
6127     *
6128     * Also:
6129     * The Centroid interpolation decoration is ignored, but allowed, on FragCoord.
6130     */
6131    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
6132       nir_variable *pos_var = nir_find_variable_with_location(ctx->shader, nir_var_shader_in, VARYING_SLOT_POS);
6133       if (pos_var) {
6134          if (ctx->mod.info.has_per_sample_input)
6135             pos_var->data.sample = true;
6136          pos_var->data.centroid = false;
6137       }
6138    }
6139 
6140    unsigned input_clip_size = ctx->mod.shader_kind == DXIL_PIXEL_SHADER ?
6141       ctx->shader->info.clip_distance_array_size : ctx->opts->input_clip_size;
6142    preprocess_signatures(&ctx->mod, ctx->shader, input_clip_size);
6143 
6144    nir_foreach_function_with_impl(func, impl, ctx->shader) {
6145       if (!emit_function(ctx, func, impl))
6146          return false;
6147    }
6148 
6149    if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
6150       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_out) {
6151          if (var->data.location == FRAG_RESULT_STENCIL) {
6152             ctx->mod.feats.stencil_ref = true;
6153          }
6154       }
6155    } else if (ctx->shader->info.stage == MESA_SHADER_VERTEX ||
6156               ctx->shader->info.stage == MESA_SHADER_TESS_EVAL) {
6157       if (ctx->shader->info.outputs_written &
6158           (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER))
6159          ctx->mod.feats.array_layer_from_vs_or_ds = true;
6160    } else if (ctx->shader->info.stage == MESA_SHADER_GEOMETRY ||
6161               ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) {
6162       if (ctx->shader->info.inputs_read &
6163           (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER))
6164          ctx->mod.feats.array_layer_from_vs_or_ds = true;
6165    }
6166 
6167    if (ctx->mod.feats.native_low_precision && ctx->mod.minor_version < 2) {
6168       ctx->logger->log(ctx->logger->priv,
6169                        "Shader uses 16bit, which requires shader model 6.2, but 6.2 is unsupported\n");
6170       return false;
6171    }
6172 
6173    return emit_metadata(ctx) &&
6174           dxil_emit_module(&ctx->mod);
6175 }
6176 
6177 static unsigned int
get_dxil_shader_kind(struct nir_shader * s)6178 get_dxil_shader_kind(struct nir_shader *s)
6179 {
6180    switch (s->info.stage) {
6181    case MESA_SHADER_VERTEX:
6182       return DXIL_VERTEX_SHADER;
6183    case MESA_SHADER_TESS_CTRL:
6184       return DXIL_HULL_SHADER;
6185    case MESA_SHADER_TESS_EVAL:
6186       return DXIL_DOMAIN_SHADER;
6187    case MESA_SHADER_GEOMETRY:
6188       return DXIL_GEOMETRY_SHADER;
6189    case MESA_SHADER_FRAGMENT:
6190       return DXIL_PIXEL_SHADER;
6191    case MESA_SHADER_KERNEL:
6192    case MESA_SHADER_COMPUTE:
6193       return DXIL_COMPUTE_SHADER;
6194    default:
6195       unreachable("unknown shader stage in nir_to_dxil");
6196       return DXIL_COMPUTE_SHADER;
6197    }
6198 }
6199 
6200 static unsigned
lower_bit_size_callback(const nir_instr * instr,void * data)6201 lower_bit_size_callback(const nir_instr* instr, void *data)
6202 {
6203    if (instr->type != nir_instr_type_alu)
6204       return 0;
6205    nir_alu_instr *alu = nir_instr_as_alu(instr);
6206 
6207    if (nir_op_infos[alu->op].is_conversion)
6208       return 0;
6209 
6210    if (nir_op_is_vec_or_mov(alu->op))
6211       return 0;
6212 
6213    unsigned num_inputs = nir_op_infos[alu->op].num_inputs;
6214    const struct nir_to_dxil_options *opts = (const struct nir_to_dxil_options*)data;
6215    unsigned min_bit_size = opts->lower_int16 ? 32 : 16;
6216 
6217    unsigned ret = 0;
6218    for (unsigned i = 0; i < num_inputs; i++) {
6219       unsigned bit_size = nir_src_bit_size(alu->src[i].src);
6220       if (bit_size != 1 && bit_size < min_bit_size)
6221          ret = min_bit_size;
6222    }
6223 
6224    return ret;
6225 }
6226 
6227 static bool
vectorize_filter(unsigned align_mul,unsigned align_offset,unsigned bit_size,unsigned num_components,int64_t hole_size,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * data)6228 vectorize_filter(
6229    unsigned align_mul,
6230    unsigned align_offset,
6231    unsigned bit_size,
6232    unsigned num_components,
6233    int64_t hole_size,
6234    nir_intrinsic_instr *low, nir_intrinsic_instr *high,
6235    void *data)
6236 {
6237    return hole_size <= 0 && util_is_power_of_two_nonzero(num_components);
6238 }
6239 
6240 struct lower_mem_bit_sizes_data {
6241    const nir_shader_compiler_options *nir_options;
6242    const struct nir_to_dxil_options *dxil_options;
6243 };
6244 
6245 static nir_mem_access_size_align
lower_mem_access_bit_sizes_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size_in,uint32_t align_mul,uint32_t align_offset,bool offset_is_const,enum gl_access_qualifier access,const void * cb_data)6246 lower_mem_access_bit_sizes_cb(nir_intrinsic_op intrin,
6247                               uint8_t bytes,
6248                               uint8_t bit_size_in,
6249                               uint32_t align_mul,
6250                               uint32_t align_offset,
6251                               bool offset_is_const,
6252                               enum gl_access_qualifier access,
6253                               const void *cb_data)
6254 {
6255    const struct lower_mem_bit_sizes_data *data = cb_data;
6256    unsigned max_bit_size = 32;
6257    unsigned min_bit_size = data->dxil_options->lower_int16 ? 32 : 16;
6258    unsigned closest_bit_size = MAX2(min_bit_size, MIN2(max_bit_size, bit_size_in));
6259    if (intrin == nir_intrinsic_load_ubo) {
6260       /* UBO loads can be done at whatever (supported) bit size, but require 16 byte
6261        * alignment and can load up to 16 bytes per instruction. However this pass requires
6262        * loading 16 bytes of data to get 16-byte alignment. We're going to run lower_ubo_vec4
6263        * which can deal with unaligned vec4s, so for this pass let's just deal with bit size
6264        * and total size restrictions. */
6265       return (nir_mem_access_size_align) {
6266          .align = closest_bit_size / 8,
6267          .bit_size = closest_bit_size,
6268          .num_components = DIV_ROUND_UP(MIN2(bytes, 16) * 8, closest_bit_size),
6269          .shift = nir_mem_access_shift_method_scalar,
6270       };
6271    }
6272 
6273    assert(intrin == nir_intrinsic_load_ssbo || intrin == nir_intrinsic_store_ssbo);
6274    uint32_t align = nir_combined_align(align_mul, align_offset);
6275    if (align < min_bit_size / 8) {
6276       /* Unaligned load/store, use the minimum bit size, up to 4 components */
6277       unsigned ideal_num_components = intrin == nir_intrinsic_load_ssbo ?
6278          DIV_ROUND_UP(bytes * 8, min_bit_size) :
6279          (32 / min_bit_size);
6280       return (nir_mem_access_size_align) {
6281          .align = min_bit_size / 8,
6282          .bit_size = min_bit_size,
6283          .num_components = MIN2(4, ideal_num_components),
6284          .shift = nir_mem_access_shift_method_scalar,
6285       };
6286    }
6287 
6288    /* Increase/decrease bit size to try to get closer to the requested byte size/align */
6289    unsigned bit_size = closest_bit_size;
6290    unsigned target = MIN2(bytes, align);
6291    while (target < bit_size / 8 && bit_size > min_bit_size)
6292       bit_size /= 2;
6293    while (target > bit_size / 8 * 4 && bit_size < max_bit_size)
6294       bit_size *= 2;
6295 
6296    /* This is the best we can do */
6297    unsigned num_components = intrin == nir_intrinsic_load_ssbo ?
6298       DIV_ROUND_UP(bytes * 8, bit_size) :
6299       MAX2(1, (bytes * 8 / bit_size));
6300    return (nir_mem_access_size_align) {
6301       .align = bit_size / 8,
6302       .bit_size = bit_size,
6303       .num_components = MIN2(4, num_components),
6304       .shift = nir_mem_access_shift_method_scalar,
6305    };
6306 }
6307 
6308 static void
optimize_nir(struct nir_shader * s,const struct nir_to_dxil_options * opts)6309 optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts)
6310 {
6311    bool progress;
6312    do {
6313       progress = false;
6314       NIR_PASS_V(s, nir_lower_vars_to_ssa);
6315       NIR_PASS(progress, s, nir_lower_indirect_derefs, nir_var_function_temp, 4);
6316       NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
6317       NIR_PASS(progress, s, nir_copy_prop);
6318       NIR_PASS(progress, s, nir_opt_copy_prop_vars);
6319       NIR_PASS(progress, s, nir_lower_bit_size, lower_bit_size_callback, (void*)opts);
6320       NIR_PASS(progress, s, dxil_nir_lower_8bit_conv);
6321       if (opts->lower_int16)
6322          NIR_PASS(progress, s, dxil_nir_lower_16bit_conv);
6323       NIR_PASS(progress, s, nir_opt_remove_phis);
6324       NIR_PASS(progress, s, nir_opt_dce);
6325       NIR_PASS(progress, s, nir_opt_if,
6326                nir_opt_if_optimize_phi_true_false | nir_opt_if_avoid_64bit_phis);
6327       NIR_PASS(progress, s, nir_opt_dead_cf);
6328       NIR_PASS(progress, s, nir_opt_cse);
6329       NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
6330       NIR_PASS(progress, s, nir_opt_algebraic);
6331       NIR_PASS(progress, s, dxil_nir_algebraic);
6332       if (s->options->lower_int64_options)
6333          NIR_PASS(progress, s, nir_lower_int64);
6334       NIR_PASS(progress, s, nir_lower_alu);
6335       NIR_PASS(progress, s, nir_opt_constant_folding);
6336       NIR_PASS(progress, s, nir_opt_undef);
6337       NIR_PASS(progress, s, nir_opt_deref);
6338       NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16);
6339       NIR_PASS(progress, s, nir_lower_64bit_phis);
6340       NIR_PASS(progress, s, nir_lower_phis_to_scalar, true);
6341       NIR_PASS(progress, s, nir_opt_loop_unroll);
6342       NIR_PASS(progress, s, nir_lower_pack);
6343       NIR_PASS(progress, s, dxil_nir_remove_oob_array_accesses);
6344       NIR_PASS_V(s, nir_lower_system_values);
6345    } while (progress);
6346 
6347    do {
6348       progress = false;
6349       NIR_PASS(progress, s, nir_opt_algebraic_late);
6350    } while (progress);
6351 
6352    NIR_PASS_V(s, nir_lower_undef_to_zero);
6353 }
6354 
6355 static
dxil_fill_validation_state(struct ntd_context * ctx,struct dxil_validation_state * state)6356 void dxil_fill_validation_state(struct ntd_context *ctx,
6357                                 struct dxil_validation_state *state)
6358 {
6359    unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
6360       sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
6361    state->num_resources = ctx->resources.size / resource_element_size;
6362    state->resources.v0 = (struct dxil_resource_v0*)ctx->resources.data;
6363    if (ctx->shader->info.subgroup_size >= SUBGROUP_SIZE_REQUIRE_4) {
6364       state->state.psv1.psv0.max_expected_wave_lane_count = ctx->shader->info.subgroup_size;
6365       state->state.psv1.psv0.min_expected_wave_lane_count = ctx->shader->info.subgroup_size;
6366    } else {
6367       state->state.psv1.psv0.max_expected_wave_lane_count = UINT_MAX;
6368    }
6369    state->state.psv1.shader_stage = (uint8_t)ctx->mod.shader_kind;
6370    state->state.psv1.uses_view_id = (uint8_t)ctx->mod.feats.view_id;
6371    state->state.psv1.sig_input_elements = (uint8_t)ctx->mod.num_sig_inputs;
6372    state->state.psv1.sig_output_elements = (uint8_t)ctx->mod.num_sig_outputs;
6373    state->state.psv1.sig_patch_const_or_prim_elements = (uint8_t)ctx->mod.num_sig_patch_consts;
6374 
6375    switch (ctx->mod.shader_kind) {
6376    case DXIL_VERTEX_SHADER:
6377       state->state.psv1.psv0.vs.output_position_present = ctx->mod.info.has_out_position;
6378       break;
6379    case DXIL_PIXEL_SHADER:
6380       /* TODO: handle depth outputs */
6381       state->state.psv1.psv0.ps.depth_output = ctx->mod.info.has_out_depth;
6382       state->state.psv1.psv0.ps.sample_frequency =
6383          ctx->mod.info.has_per_sample_input;
6384       break;
6385    case DXIL_COMPUTE_SHADER:
6386       state->state.num_threads_x = MAX2(ctx->shader->info.workgroup_size[0], 1);
6387       state->state.num_threads_y = MAX2(ctx->shader->info.workgroup_size[1], 1);
6388       state->state.num_threads_z = MAX2(ctx->shader->info.workgroup_size[2], 1);
6389       break;
6390    case DXIL_GEOMETRY_SHADER:
6391       state->state.psv1.max_vertex_count = ctx->shader->info.gs.vertices_out;
6392       state->state.psv1.psv0.gs.input_primitive = dxil_get_input_primitive(ctx->shader->info.gs.input_primitive);
6393       state->state.psv1.psv0.gs.output_toplology = dxil_get_primitive_topology(ctx->shader->info.gs.output_primitive);
6394       state->state.psv1.psv0.gs.output_stream_mask = MAX2(ctx->shader->info.gs.active_stream_mask, 1);
6395       state->state.psv1.psv0.gs.output_position_present = ctx->mod.info.has_out_position;
6396       break;
6397    case DXIL_HULL_SHADER:
6398       state->state.psv1.psv0.hs.input_control_point_count = ctx->tess_input_control_point_count;
6399       state->state.psv1.psv0.hs.output_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
6400       state->state.psv1.psv0.hs.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
6401       state->state.psv1.psv0.hs.tessellator_output_primitive = get_tessellator_output_primitive(&ctx->shader->info);
6402       state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
6403       break;
6404    case DXIL_DOMAIN_SHADER:
6405       state->state.psv1.psv0.ds.input_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
6406       state->state.psv1.psv0.ds.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
6407       state->state.psv1.psv0.ds.output_position_present = ctx->mod.info.has_out_position;
6408       state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
6409       break;
6410    default:
6411       assert(0 && "Shader type not (yet) supported");
6412    }
6413 }
6414 
6415 static nir_variable *
add_sysvalue(struct ntd_context * ctx,uint8_t value,char * name,int driver_location)6416 add_sysvalue(struct ntd_context *ctx,
6417               uint8_t value, char *name,
6418               int driver_location)
6419 {
6420 
6421    nir_variable *var = rzalloc(ctx->shader, nir_variable);
6422    if (!var)
6423       return NULL;
6424    var->data.driver_location = driver_location;
6425    var->data.location = value;
6426    var->type = glsl_uint_type();
6427    var->name = name;
6428    var->data.mode = nir_var_system_value;
6429    var->data.interpolation = INTERP_MODE_FLAT;
6430    return var;
6431 }
6432 
6433 static bool
append_input_or_sysvalue(struct ntd_context * ctx,int input_loc,int sv_slot,char * name,int driver_location)6434 append_input_or_sysvalue(struct ntd_context *ctx,
6435                          int input_loc,  int sv_slot,
6436                          char *name, int driver_location)
6437 {
6438    if (input_loc >= 0) {
6439       /* Check inputs whether a variable is available the corresponds
6440        * to the sysvalue */
6441       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
6442          if (var->data.location == input_loc) {
6443             ctx->system_value[sv_slot] = var;
6444             return true;
6445          }
6446       }
6447    }
6448 
6449    ctx->system_value[sv_slot] = add_sysvalue(ctx, sv_slot, name, driver_location);
6450    if (!ctx->system_value[sv_slot])
6451       return false;
6452 
6453    nir_shader_add_variable(ctx->shader, ctx->system_value[sv_slot]);
6454    return true;
6455 }
6456 
6457 struct sysvalue_name {
6458    gl_system_value value;
6459    int slot;
6460    char *name;
6461    gl_shader_stage only_in_shader;
6462 } possible_sysvalues[] = {
6463    {SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, -1, "SV_VertexID", MESA_SHADER_NONE},
6464    {SYSTEM_VALUE_INSTANCE_ID, -1, "SV_InstanceID", MESA_SHADER_NONE},
6465    {SYSTEM_VALUE_FRONT_FACE, VARYING_SLOT_FACE, "SV_IsFrontFace", MESA_SHADER_NONE},
6466    {SYSTEM_VALUE_PRIMITIVE_ID, VARYING_SLOT_PRIMITIVE_ID, "SV_PrimitiveID", MESA_SHADER_GEOMETRY},
6467    {SYSTEM_VALUE_SAMPLE_ID, -1, "SV_SampleIndex", MESA_SHADER_NONE},
6468 };
6469 
6470 static bool
allocate_sysvalues(struct ntd_context * ctx)6471 allocate_sysvalues(struct ntd_context *ctx)
6472 {
6473    unsigned driver_location = 0;
6474    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in)
6475       driver_location = MAX2(driver_location, var->data.driver_location + 1);
6476    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_system_value)
6477       driver_location = MAX2(driver_location, var->data.driver_location + 1);
6478 
6479    if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
6480        !BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID)) {
6481       bool need_sample_id = ctx->shader->info.fs.uses_sample_shading;
6482 
6483       /* "var->data.sample = true" sometimes just mean, "I want per-sample
6484        * shading", which explains why we can end up with vars having flat
6485        * interpolation with the per-sample bit set. If there's only such
6486        * type of variables, we need to tell DXIL that we read SV_SampleIndex
6487        * to make DXIL validation happy.
6488        */
6489       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
6490          bool var_can_be_sample_rate = !var->data.centroid && var->data.interpolation != INTERP_MODE_FLAT;
6491          /* If there's an input that will actually force sample-rate shading, then we don't
6492           * need SV_SampleIndex. */
6493          if (var->data.sample && var_can_be_sample_rate) {
6494             need_sample_id = false;
6495             break;
6496          }
6497          /* If there's an input that wants to be sample-rate, but can't be, then we might
6498           * need SV_SampleIndex. */
6499          if (var->data.sample && !var_can_be_sample_rate)
6500             need_sample_id = true;
6501       }
6502 
6503       if (need_sample_id)
6504          BITSET_SET(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
6505    }
6506 
6507    for (unsigned i = 0; i < ARRAY_SIZE(possible_sysvalues); ++i) {
6508       struct sysvalue_name *info = &possible_sysvalues[i];
6509       if (info->only_in_shader != MESA_SHADER_NONE &&
6510           info->only_in_shader != ctx->shader->info.stage)
6511          continue;
6512       if (BITSET_TEST(ctx->shader->info.system_values_read, info->value)) {
6513          if (!append_input_or_sysvalue(ctx, info->slot,
6514                                        info->value, info->name,
6515                                        driver_location++))
6516             return false;
6517       }
6518    }
6519    return true;
6520 }
6521 
6522 static int
type_size_vec4(const struct glsl_type * type,bool bindless)6523 type_size_vec4(const struct glsl_type *type, bool bindless)
6524 {
6525    return glsl_count_attribute_slots(type, false);
6526 }
6527 
6528 static const unsigned dxil_validator_min_capable_version = DXIL_VALIDATOR_1_4;
6529 static const unsigned dxil_validator_max_capable_version = DXIL_VALIDATOR_1_8;
6530 static const unsigned dxil_min_shader_model = SHADER_MODEL_6_0;
6531 static const unsigned dxil_max_shader_model = SHADER_MODEL_6_8;
6532 
6533 bool
nir_to_dxil(struct nir_shader * s,const struct nir_to_dxil_options * opts,const struct dxil_logger * logger,struct blob * blob)6534 nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts,
6535             const struct dxil_logger *logger, struct blob *blob)
6536 {
6537    assert(opts);
6538    bool retval = true;
6539    debug_dxil = (int)debug_get_option_debug_dxil();
6540    blob_init(blob);
6541 
6542    if (opts->shader_model_max < dxil_min_shader_model) {
6543       debug_printf("D3D12: cannot support emitting shader models lower than %d.%d\n",
6544                    dxil_min_shader_model >> 16,
6545                    dxil_min_shader_model & 0xffff);
6546       return false;
6547    }
6548 
6549    if (opts->shader_model_max > dxil_max_shader_model) {
6550       debug_printf("D3D12: cannot support emitting higher than shader model %d.%d\n",
6551                    dxil_max_shader_model >> 16,
6552                    dxil_max_shader_model & 0xffff);
6553       return false;
6554    }
6555 
6556    if (opts->validator_version_max != NO_DXIL_VALIDATION &&
6557        opts->validator_version_max < dxil_validator_min_capable_version) {
6558       debug_printf("D3D12: Invalid validator version %d.%d, must be 1.4 or greater\n",
6559          opts->validator_version_max >> 16,
6560          opts->validator_version_max & 0xffff);
6561       return false;
6562    }
6563 
6564    /* If no validation, write a blob as if it was going to be validated by the newest understood validator.
6565     * Same if the validator is newer than we know how to write for.
6566     */
6567    uint32_t validator_version =
6568       opts->validator_version_max == NO_DXIL_VALIDATION ||
6569       opts->validator_version_max > dxil_validator_max_capable_version ?
6570       dxil_validator_max_capable_version : opts->validator_version_max;
6571 
6572    struct ntd_context *ctx = calloc(1, sizeof(*ctx));
6573    if (!ctx)
6574       return false;
6575 
6576    ctx->opts = opts;
6577    ctx->shader = s;
6578    ctx->logger = logger ? logger : &default_logger;
6579 
6580    ctx->ralloc_ctx = ralloc_context(NULL);
6581    if (!ctx->ralloc_ctx) {
6582       retval = false;
6583       goto out;
6584    }
6585 
6586    util_dynarray_init(&ctx->srv_metadata_nodes, ctx->ralloc_ctx);
6587    util_dynarray_init(&ctx->uav_metadata_nodes, ctx->ralloc_ctx);
6588    util_dynarray_init(&ctx->cbv_metadata_nodes, ctx->ralloc_ctx);
6589    util_dynarray_init(&ctx->sampler_metadata_nodes, ctx->ralloc_ctx);
6590    util_dynarray_init(&ctx->resources, ctx->ralloc_ctx);
6591    dxil_module_init(&ctx->mod, ctx->ralloc_ctx);
6592    ctx->mod.shader_kind = get_dxil_shader_kind(s);
6593    ctx->mod.major_version = 6;
6594    /* Use the highest shader model that's supported and can be validated */
6595    ctx->mod.minor_version =
6596       MIN2(opts->shader_model_max & 0xffff, validator_version & 0xffff);
6597    ctx->mod.major_validator = validator_version >> 16;
6598    ctx->mod.minor_validator = validator_version & 0xffff;
6599 
6600    if (s->info.stage <= MESA_SHADER_FRAGMENT) {
6601       uint64_t in_mask =
6602          s->info.stage == MESA_SHADER_VERTEX ?
6603          0 : (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
6604       uint64_t out_mask =
6605          s->info.stage == MESA_SHADER_FRAGMENT ?
6606          ((1ull << FRAG_RESULT_STENCIL) | (1ull << FRAG_RESULT_SAMPLE_MASK)) :
6607          (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
6608 
6609       NIR_PASS_V(s, dxil_nir_fix_io_uint_type, in_mask, out_mask);
6610    }
6611 
6612    NIR_PASS_V(s, dxil_nir_lower_fquantize2f16);
6613    NIR_PASS_V(s, nir_lower_frexp);
6614    NIR_PASS_V(s, nir_lower_flrp, 16 | 32 | 64, true);
6615    NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32);
6616    NIR_PASS_V(s, dxil_nir_ensure_position_writes);
6617    NIR_PASS_V(s, dxil_nir_lower_system_values);
6618    NIR_PASS_V(s, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_system_value | nir_var_shader_out, NULL, NULL);
6619 
6620    /* Do a round of optimization to try to vectorize loads/stores. Otherwise the addresses used for loads
6621     * might be too opaque for the pass to see that they're next to each other. */
6622    optimize_nir(s, opts);
6623 
6624 /* Vectorize UBO/SSBO accesses aggressively. This can help increase alignment to enable us to do better
6625     * chunking of loads and stores after lowering bit sizes. Ignore load/store size limitations here, we'll
6626     * address them with lower_mem_access_bit_sizes */
6627    nir_load_store_vectorize_options vectorize_opts = {
6628       .callback = vectorize_filter,
6629       .modes = nir_var_mem_ubo | nir_var_mem_ssbo,
6630    };
6631    NIR_PASS_V(s, nir_opt_load_store_vectorize, &vectorize_opts);
6632 
6633    /* Now that they're bloated to the max, address bit size restrictions and overall size limitations for
6634     * a single load/store op. */
6635    struct lower_mem_bit_sizes_data mem_size_data = { s->options, opts };
6636    nir_lower_mem_access_bit_sizes_options mem_size_options = {
6637       .modes = nir_var_mem_ubo | nir_var_mem_ssbo,
6638       .callback = lower_mem_access_bit_sizes_cb,
6639       .may_lower_unaligned_stores_to_atomics = true,
6640       .cb_data = &mem_size_data
6641    };
6642    NIR_PASS_V(s, nir_lower_mem_access_bit_sizes, &mem_size_options);
6643 
6644    /* Lastly, conver byte-address UBO loads to vec-addressed. This pass can also deal with selecting sub-
6645     * components from the load and dealing with vec-straddling loads. */
6646    NIR_PASS_V(s, nir_lower_ubo_vec4);
6647 
6648    if (opts->shader_model_max < SHADER_MODEL_6_6) {
6649       /* In a later pass, load_helper_invocation will be lowered to sample mask based fallback,
6650        * so both load- and is- will be emulated eventually.
6651        */
6652       NIR_PASS_V(s, nir_lower_is_helper_invocation);
6653    }
6654 
6655    if (ctx->mod.shader_kind == DXIL_HULL_SHADER)
6656       NIR_PASS_V(s, dxil_nir_split_tess_ctrl, &ctx->tess_ctrl_patch_constant_func);
6657 
6658    if (ctx->mod.shader_kind == DXIL_HULL_SHADER ||
6659        ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
6660       /* Make sure any derefs are gone after lower_io before updating tess level vars */
6661       NIR_PASS_V(s, nir_opt_dce);
6662       NIR_PASS_V(s, dxil_nir_fixup_tess_level_for_domain);
6663    }
6664 
6665    optimize_nir(s, opts);
6666 
6667    NIR_PASS_V(s, nir_remove_dead_variables,
6668               nir_var_function_temp | nir_var_mem_constant | nir_var_mem_shared, NULL);
6669 
6670    if (!allocate_sysvalues(ctx))
6671       return false;
6672 
6673    NIR_PASS_V(s, dxil_nir_lower_sysval_to_load_input, ctx->system_value);
6674    NIR_PASS_V(s, nir_opt_dce);
6675 
6676    /* This needs to be after any copy prop is done to prevent these movs from being erased */
6677    NIR_PASS_V(s, dxil_nir_move_consts);
6678    NIR_PASS_V(s, nir_opt_dce);
6679 
6680    NIR_PASS_V(s, dxil_nir_guess_image_formats);
6681 
6682    if (debug_dxil & DXIL_DEBUG_VERBOSE)
6683       nir_print_shader(s, stderr);
6684 
6685    if (!emit_module(ctx, opts)) {
6686       debug_printf("D3D12: dxil_container_add_module failed\n");
6687       retval = false;
6688       goto out;
6689    }
6690 
6691    if (debug_dxil & DXIL_DEBUG_DUMP_MODULE) {
6692       struct dxil_dumper *dumper = dxil_dump_create();
6693       dxil_dump_module(dumper, &ctx->mod);
6694       fprintf(stderr, "\n");
6695       dxil_dump_buf_to_file(dumper, stderr);
6696       fprintf(stderr, "\n\n");
6697       dxil_dump_free(dumper);
6698    }
6699 
6700    struct dxil_container container;
6701    dxil_container_init(&container);
6702    /* Native low precision disables min-precision */
6703    if (ctx->mod.feats.native_low_precision)
6704       ctx->mod.feats.min_precision = false;
6705    if (!dxil_container_add_features(&container, &ctx->mod.feats)) {
6706       debug_printf("D3D12: dxil_container_add_features failed\n");
6707       retval = false;
6708       goto out;
6709    }
6710 
6711    if (!dxil_container_add_io_signature(&container,
6712                                         DXIL_ISG1,
6713                                         ctx->mod.num_sig_inputs,
6714                                         ctx->mod.inputs,
6715                                         ctx->mod.minor_validator >= 7)) {
6716       debug_printf("D3D12: failed to write input signature\n");
6717       retval = false;
6718       goto out;
6719    }
6720 
6721    if (!dxil_container_add_io_signature(&container,
6722                                         DXIL_OSG1,
6723                                         ctx->mod.num_sig_outputs,
6724                                         ctx->mod.outputs,
6725                                         ctx->mod.minor_validator >= 7)) {
6726       debug_printf("D3D12: failed to write output signature\n");
6727       retval = false;
6728       goto out;
6729    }
6730 
6731    if ((ctx->mod.shader_kind == DXIL_HULL_SHADER ||
6732         ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) &&
6733        !dxil_container_add_io_signature(&container,
6734                                         DXIL_PSG1,
6735                                         ctx->mod.num_sig_patch_consts,
6736                                         ctx->mod.patch_consts,
6737                                         ctx->mod.minor_validator >= 7)) {
6738       debug_printf("D3D12: failed to write patch constant signature\n");
6739       retval = false;
6740       goto out;
6741    }
6742 
6743    struct dxil_validation_state validation_state;
6744    memset(&validation_state, 0, sizeof(validation_state));
6745    dxil_fill_validation_state(ctx, &validation_state);
6746 
6747    if (!dxil_container_add_state_validation(&container,&ctx->mod,
6748                                             &validation_state)) {
6749       debug_printf("D3D12: failed to write state-validation\n");
6750       retval = false;
6751       goto out;
6752    }
6753 
6754    if (!dxil_container_add_module(&container, &ctx->mod)) {
6755       debug_printf("D3D12: failed to write module\n");
6756       retval = false;
6757       goto out;
6758    }
6759 
6760    if (!dxil_container_write(&container, blob)) {
6761       debug_printf("D3D12: dxil_container_write failed\n");
6762       retval = false;
6763       goto out;
6764    }
6765    dxil_container_finish(&container);
6766 
6767    if (debug_dxil & DXIL_DEBUG_DUMP_BLOB) {
6768       static int shader_id = 0;
6769       char buffer[64];
6770       snprintf(buffer, sizeof(buffer), "shader_%s_%d.blob",
6771                get_shader_kind_str(ctx->mod.shader_kind), shader_id++);
6772       debug_printf("Try to write blob to %s\n", buffer);
6773       FILE *f = fopen(buffer, "wb");
6774       if (f) {
6775          fwrite(blob->data, 1, blob->size, f);
6776          fclose(f);
6777       }
6778    }
6779 
6780 out:
6781    dxil_module_release(&ctx->mod);
6782    ralloc_free(ctx->ralloc_ctx);
6783    free(ctx);
6784    return retval;
6785 }
6786