• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir_to_dxil.h"
25 
26 #include "dxil_container.h"
27 #include "dxil_dump.h"
28 #include "dxil_enums.h"
29 #include "dxil_function.h"
30 #include "dxil_module.h"
31 #include "dxil_nir.h"
32 #include "dxil_signature.h"
33 
34 #include "nir/nir_builder.h"
35 #include "util/u_debug.h"
36 #include "util/u_dynarray.h"
37 #include "util/u_math.h"
38 
39 #include "git_sha1.h"
40 
41 #include "vulkan/vulkan_core.h"
42 
43 #include <stdint.h>
44 
45 int debug_dxil = 0;
46 
47 static const struct debug_named_value
48 dxil_debug_options[] = {
49    { "verbose", DXIL_DEBUG_VERBOSE, NULL },
50    { "dump_blob",  DXIL_DEBUG_DUMP_BLOB , "Write shader blobs" },
51    { "trace",  DXIL_DEBUG_TRACE , "Trace instruction conversion" },
52    { "dump_module", DXIL_DEBUG_DUMP_MODULE, "dump module tree to stderr"},
53    DEBUG_NAMED_VALUE_END
54 };
55 
56 DEBUG_GET_ONCE_FLAGS_OPTION(debug_dxil, "DXIL_DEBUG", dxil_debug_options, 0)
57 
58 #define NIR_INSTR_UNSUPPORTED(instr) \
59    if (debug_dxil & DXIL_DEBUG_VERBOSE) \
60    do { \
61       fprintf(stderr, "Unsupported instruction:"); \
62       nir_print_instr(instr, stderr); \
63       fprintf(stderr, "\n"); \
64    } while (0)
65 
66 #define TRACE_CONVERSION(instr) \
67    if (debug_dxil & DXIL_DEBUG_TRACE) \
68       do { \
69          fprintf(stderr, "Convert '"); \
70          nir_print_instr(instr, stderr); \
71          fprintf(stderr, "'\n"); \
72       } while (0)
73 
74 static const nir_shader_compiler_options
75 nir_options = {
76    .lower_ineg = true,
77    .lower_fneg = true,
78    .lower_ffma16 = true,
79    .lower_ffma32 = true,
80    .lower_isign = true,
81    .lower_fsign = true,
82    .lower_iabs = true,
83    .lower_fmod = true,
84    .lower_fpow = true,
85    .lower_scmp = true,
86    .lower_ldexp = true,
87    .lower_flrp16 = true,
88    .lower_flrp32 = true,
89    .lower_flrp64 = true,
90    .lower_bitfield_extract = true,
91    .lower_find_msb_to_reverse = true,
92    .lower_extract_word = true,
93    .lower_extract_byte = true,
94    .lower_insert_word = true,
95    .lower_insert_byte = true,
96    .lower_all_io_to_elements = true,
97    .lower_all_io_to_temps = true,
98    .lower_hadd = true,
99    .lower_uadd_sat = true,
100    .lower_usub_sat = true,
101    .lower_iadd_sat = true,
102    .lower_uadd_carry = true,
103    .lower_mul_high = true,
104    .lower_rotate = true,
105    .lower_pack_64_2x32_split = true,
106    .lower_pack_32_2x16_split = true,
107    .lower_unpack_64_2x32_split = true,
108    .lower_unpack_32_2x16_split = true,
109    .lower_unpack_half_2x16 = true,
110    .lower_unpack_snorm_2x16 = true,
111    .lower_unpack_snorm_4x8 = true,
112    .lower_unpack_unorm_2x16 = true,
113    .lower_unpack_unorm_4x8 = true,
114    .lower_interpolate_at = true,
115    .has_fsub = true,
116    .has_isub = true,
117    .use_scoped_barrier = true,
118    .vertex_id_zero_based = true,
119    .lower_base_vertex = true,
120    .lower_helper_invocation = true,
121    .has_cs_global_id = true,
122    .has_txs = true,
123    .lower_mul_2x32_64 = true,
124    .lower_doubles_options =
125       nir_lower_drcp |
126       nir_lower_dsqrt |
127       nir_lower_drsq |
128       nir_lower_dfract |
129       nir_lower_dtrunc |
130       nir_lower_dfloor |
131       nir_lower_dceil |
132       nir_lower_dround_even,
133    .max_unroll_iterations = 32, /* arbitrary */
134    .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
135 };
136 
137 const nir_shader_compiler_options*
dxil_get_nir_compiler_options(void)138 dxil_get_nir_compiler_options(void)
139 {
140    return &nir_options;
141 }
142 
143 static bool
emit_llvm_ident(struct dxil_module * m)144 emit_llvm_ident(struct dxil_module *m)
145 {
146    const struct dxil_mdnode *compiler = dxil_get_metadata_string(m, "Mesa version " PACKAGE_VERSION MESA_GIT_SHA1);
147    if (!compiler)
148       return false;
149 
150    const struct dxil_mdnode *llvm_ident = dxil_get_metadata_node(m, &compiler, 1);
151    return llvm_ident &&
152           dxil_add_metadata_named_node(m, "llvm.ident", &llvm_ident, 1);
153 }
154 
155 static bool
emit_named_version(struct dxil_module * m,const char * name,int major,int minor)156 emit_named_version(struct dxil_module *m, const char *name,
157                    int major, int minor)
158 {
159    const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, major);
160    const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, minor);
161    const struct dxil_mdnode *version_nodes[] = { major_node, minor_node };
162    const struct dxil_mdnode *version = dxil_get_metadata_node(m, version_nodes,
163                                                      ARRAY_SIZE(version_nodes));
164    return dxil_add_metadata_named_node(m, name, &version, 1);
165 }
166 
167 static const char *
get_shader_kind_str(enum dxil_shader_kind kind)168 get_shader_kind_str(enum dxil_shader_kind kind)
169 {
170    switch (kind) {
171    case DXIL_PIXEL_SHADER:
172       return "ps";
173    case DXIL_VERTEX_SHADER:
174       return "vs";
175    case DXIL_GEOMETRY_SHADER:
176       return "gs";
177    case DXIL_HULL_SHADER:
178       return "hs";
179    case DXIL_DOMAIN_SHADER:
180       return "ds";
181    case DXIL_COMPUTE_SHADER:
182       return "cs";
183    default:
184       unreachable("invalid shader kind");
185    }
186 }
187 
188 static bool
emit_dx_shader_model(struct dxil_module * m)189 emit_dx_shader_model(struct dxil_module *m)
190 {
191    const struct dxil_mdnode *type_node = dxil_get_metadata_string(m, get_shader_kind_str(m->shader_kind));
192    const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, m->major_version);
193    const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, m->minor_version);
194    const struct dxil_mdnode *shader_model[] = { type_node, major_node,
195                                                 minor_node };
196    const struct dxil_mdnode *dx_shader_model = dxil_get_metadata_node(m, shader_model, ARRAY_SIZE(shader_model));
197 
198    return dxil_add_metadata_named_node(m, "dx.shaderModel",
199                                        &dx_shader_model, 1);
200 }
201 
202 enum {
203    DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG = 0,
204    DXIL_STRUCTURED_BUFFER_ELEMENT_STRIDE_TAG = 1
205 };
206 
207 enum dxil_intr {
208    DXIL_INTR_LOAD_INPUT = 4,
209    DXIL_INTR_STORE_OUTPUT = 5,
210    DXIL_INTR_FABS = 6,
211    DXIL_INTR_SATURATE = 7,
212 
213    DXIL_INTR_ISFINITE = 10,
214    DXIL_INTR_ISNORMAL = 11,
215 
216    DXIL_INTR_FCOS = 12,
217    DXIL_INTR_FSIN = 13,
218 
219    DXIL_INTR_FEXP2 = 21,
220    DXIL_INTR_FRC = 22,
221    DXIL_INTR_FLOG2 = 23,
222 
223    DXIL_INTR_SQRT = 24,
224    DXIL_INTR_RSQRT = 25,
225    DXIL_INTR_ROUND_NE = 26,
226    DXIL_INTR_ROUND_NI = 27,
227    DXIL_INTR_ROUND_PI = 28,
228    DXIL_INTR_ROUND_Z = 29,
229 
230    DXIL_INTR_BFREV = 30,
231    DXIL_INTR_COUNTBITS = 31,
232    DXIL_INTR_FIRSTBIT_LO = 32,
233    DXIL_INTR_FIRSTBIT_HI = 33,
234    DXIL_INTR_FIRSTBIT_SHI = 34,
235 
236    DXIL_INTR_FMAX = 35,
237    DXIL_INTR_FMIN = 36,
238    DXIL_INTR_IMAX = 37,
239    DXIL_INTR_IMIN = 38,
240    DXIL_INTR_UMAX = 39,
241    DXIL_INTR_UMIN = 40,
242 
243    DXIL_INTR_FMA = 47,
244 
245    DXIL_INTR_IBFE = 51,
246    DXIL_INTR_UBFE = 52,
247    DXIL_INTR_BFI = 53,
248 
249    DXIL_INTR_CREATE_HANDLE = 57,
250    DXIL_INTR_CBUFFER_LOAD_LEGACY = 59,
251 
252    DXIL_INTR_SAMPLE = 60,
253    DXIL_INTR_SAMPLE_BIAS = 61,
254    DXIL_INTR_SAMPLE_LEVEL = 62,
255    DXIL_INTR_SAMPLE_GRAD = 63,
256    DXIL_INTR_SAMPLE_CMP = 64,
257    DXIL_INTR_SAMPLE_CMP_LVL_ZERO = 65,
258 
259    DXIL_INTR_TEXTURE_LOAD = 66,
260    DXIL_INTR_TEXTURE_STORE = 67,
261 
262    DXIL_INTR_BUFFER_LOAD = 68,
263    DXIL_INTR_BUFFER_STORE = 69,
264 
265    DXIL_INTR_TEXTURE_SIZE = 72,
266    DXIL_INTR_TEXTURE_GATHER = 73,
267    DXIL_INTR_TEXTURE_GATHER_CMP = 74,
268 
269    DXIL_INTR_TEXTURE2DMS_GET_SAMPLE_POSITION = 75,
270    DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION = 76,
271    DXIL_INTR_RENDER_TARGET_GET_SAMPLE_COUNT = 77,
272 
273    DXIL_INTR_ATOMIC_BINOP = 78,
274    DXIL_INTR_ATOMIC_CMPXCHG = 79,
275    DXIL_INTR_BARRIER = 80,
276    DXIL_INTR_TEXTURE_LOD = 81,
277 
278    DXIL_INTR_DISCARD = 82,
279    DXIL_INTR_DDX_COARSE = 83,
280    DXIL_INTR_DDY_COARSE = 84,
281    DXIL_INTR_DDX_FINE = 85,
282    DXIL_INTR_DDY_FINE = 86,
283 
284    DXIL_INTR_EVAL_SNAPPED = 87,
285    DXIL_INTR_EVAL_SAMPLE_INDEX = 88,
286    DXIL_INTR_EVAL_CENTROID = 89,
287 
288    DXIL_INTR_SAMPLE_INDEX = 90,
289    DXIL_INTR_COVERAGE = 91,
290 
291    DXIL_INTR_THREAD_ID = 93,
292    DXIL_INTR_GROUP_ID = 94,
293    DXIL_INTR_THREAD_ID_IN_GROUP = 95,
294    DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP = 96,
295 
296    DXIL_INTR_EMIT_STREAM = 97,
297    DXIL_INTR_CUT_STREAM = 98,
298 
299    DXIL_INTR_GS_INSTANCE_ID = 100,
300 
301    DXIL_INTR_MAKE_DOUBLE = 101,
302    DXIL_INTR_SPLIT_DOUBLE = 102,
303 
304    DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT = 103,
305    DXIL_INTR_LOAD_PATCH_CONSTANT = 104,
306    DXIL_INTR_DOMAIN_LOCATION = 105,
307    DXIL_INTR_STORE_PATCH_CONSTANT = 106,
308    DXIL_INTR_OUTPUT_CONTROL_POINT_ID = 107,
309    DXIL_INTR_PRIMITIVE_ID = 108,
310 
311    DXIL_INTR_LEGACY_F32TOF16 = 130,
312    DXIL_INTR_LEGACY_F16TOF32 = 131,
313 
314    DXIL_INTR_ATTRIBUTE_AT_VERTEX = 137,
315 };
316 
317 enum dxil_atomic_op {
318    DXIL_ATOMIC_ADD = 0,
319    DXIL_ATOMIC_AND = 1,
320    DXIL_ATOMIC_OR = 2,
321    DXIL_ATOMIC_XOR = 3,
322    DXIL_ATOMIC_IMIN = 4,
323    DXIL_ATOMIC_IMAX = 5,
324    DXIL_ATOMIC_UMIN = 6,
325    DXIL_ATOMIC_UMAX = 7,
326    DXIL_ATOMIC_EXCHANGE = 8,
327 };
328 
329 typedef struct {
330    unsigned id;
331    unsigned binding;
332    unsigned size;
333    unsigned space;
334 } resource_array_layout;
335 
336 static void
fill_resource_metadata(struct dxil_module * m,const struct dxil_mdnode ** fields,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout)337 fill_resource_metadata(struct dxil_module *m, const struct dxil_mdnode **fields,
338                        const struct dxil_type *struct_type,
339                        const char *name, const resource_array_layout *layout)
340 {
341    const struct dxil_type *pointer_type = dxil_module_get_pointer_type(m, struct_type);
342    const struct dxil_value *pointer_undef = dxil_module_get_undef(m, pointer_type);
343 
344    fields[0] = dxil_get_metadata_int32(m, layout->id); // resource ID
345    fields[1] = dxil_get_metadata_value(m, pointer_type, pointer_undef); // global constant symbol
346    fields[2] = dxil_get_metadata_string(m, name ? name : ""); // name
347    fields[3] = dxil_get_metadata_int32(m, layout->space); // space ID
348    fields[4] = dxil_get_metadata_int32(m, layout->binding); // lower bound
349    fields[5] = dxil_get_metadata_int32(m, layout->size); // range size
350 }
351 
352 static const struct dxil_mdnode *
emit_srv_metadata(struct dxil_module * m,const struct dxil_type * elem_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind)353 emit_srv_metadata(struct dxil_module *m, const struct dxil_type *elem_type,
354                   const char *name, const resource_array_layout *layout,
355                   enum dxil_component_type comp_type,
356                   enum dxil_resource_kind res_kind)
357 {
358    const struct dxil_mdnode *fields[9];
359 
360    const struct dxil_mdnode *metadata_tag_nodes[2];
361 
362    fill_resource_metadata(m, fields, elem_type, name, layout);
363    fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
364    fields[7] = dxil_get_metadata_int1(m, 0); // sample count
365    if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
366        res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
367       metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
368       metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
369       fields[8] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
370    } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
371       fields[8] = NULL;
372    else
373       unreachable("Structured buffers not supported yet");
374 
375    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
376 }
377 
378 static const struct dxil_mdnode *
emit_uav_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind)379 emit_uav_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
380                   const char *name, const resource_array_layout *layout,
381                   enum dxil_component_type comp_type,
382                   enum dxil_resource_kind res_kind)
383 {
384    const struct dxil_mdnode *fields[11];
385 
386    const struct dxil_mdnode *metadata_tag_nodes[2];
387 
388    fill_resource_metadata(m, fields, struct_type, name, layout);
389    fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
390    fields[7] = dxil_get_metadata_int1(m, false); // globally-coherent
391    fields[8] = dxil_get_metadata_int1(m, false); // has counter
392    fields[9] = dxil_get_metadata_int1(m, false); // is ROV
393    if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
394        res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
395       metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
396       metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
397       fields[10] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
398    } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
399       fields[10] = NULL;
400    else
401       unreachable("Structured buffers not supported yet");
402 
403    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
404 }
405 
406 static const struct dxil_mdnode *
emit_cbv_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,unsigned size)407 emit_cbv_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
408                   const char *name, const resource_array_layout *layout,
409                   unsigned size)
410 {
411    const struct dxil_mdnode *fields[8];
412 
413    fill_resource_metadata(m, fields, struct_type, name, layout);
414    fields[6] = dxil_get_metadata_int32(m, size); // constant buffer size
415    fields[7] = NULL; // metadata
416 
417    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
418 }
419 
420 static const struct dxil_mdnode *
emit_sampler_metadata(struct dxil_module * m,const struct dxil_type * struct_type,nir_variable * var,const resource_array_layout * layout)421 emit_sampler_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
422                       nir_variable *var, const resource_array_layout *layout)
423 {
424    const struct dxil_mdnode *fields[8];
425    const struct glsl_type *type = glsl_without_array(var->type);
426 
427    fill_resource_metadata(m, fields, struct_type, var->name, layout);
428    fields[6] = dxil_get_metadata_int32(m, DXIL_SAMPLER_KIND_DEFAULT); // sampler kind
429    enum dxil_sampler_kind sampler_kind = glsl_sampler_type_is_shadow(type) ?
430           DXIL_SAMPLER_KIND_COMPARISON : DXIL_SAMPLER_KIND_DEFAULT;
431    fields[6] = dxil_get_metadata_int32(m, sampler_kind); // sampler kind
432    fields[7] = NULL; // metadata
433 
434    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
435 }
436 
437 
438 #define MAX_SRVS 128
439 #define MAX_UAVS 64
440 #define MAX_CBVS 64 // ??
441 #define MAX_SAMPLERS 64 // ??
442 
443 struct dxil_def {
444    const struct dxil_value *chans[NIR_MAX_VEC_COMPONENTS];
445 };
446 
447 struct ntd_context {
448    void *ralloc_ctx;
449    const struct nir_to_dxil_options *opts;
450    struct nir_shader *shader;
451 
452    struct dxil_module mod;
453 
454    struct util_dynarray srv_metadata_nodes;
455    const struct dxil_value *srv_handles[MAX_SRVS];
456 
457    struct util_dynarray uav_metadata_nodes;
458    const struct dxil_value *ssbo_handles[MAX_UAVS];
459    const struct dxil_value *image_handles[MAX_UAVS];
460    uint32_t num_uavs;
461 
462    struct util_dynarray cbv_metadata_nodes;
463    const struct dxil_value *cbv_handles[MAX_CBVS];
464 
465    struct util_dynarray sampler_metadata_nodes;
466    const struct dxil_value *sampler_handles[MAX_SAMPLERS];
467 
468    struct util_dynarray resources;
469 
470    const struct dxil_mdnode *shader_property_nodes[6];
471    size_t num_shader_property_nodes;
472 
473    struct dxil_def *defs;
474    unsigned num_defs;
475    struct hash_table *phis;
476 
477    const struct dxil_value *sharedvars;
478    const struct dxil_value *scratchvars;
479    struct hash_table *consts;
480 
481    nir_variable *ps_front_face;
482    nir_variable *system_value[SYSTEM_VALUE_MAX];
483 
484    nir_function *tess_ctrl_patch_constant_func;
485    unsigned tess_input_control_point_count;
486 
487    struct dxil_func_def *main_func_def;
488    struct dxil_func_def *tess_ctrl_patch_constant_func_def;
489    unsigned unnamed_ubo_count;
490 };
491 
492 static const char*
unary_func_name(enum dxil_intr intr)493 unary_func_name(enum dxil_intr intr)
494 {
495    switch (intr) {
496    case DXIL_INTR_COUNTBITS:
497    case DXIL_INTR_FIRSTBIT_HI:
498    case DXIL_INTR_FIRSTBIT_SHI:
499    case DXIL_INTR_FIRSTBIT_LO:
500       return "dx.op.unaryBits";
501    case DXIL_INTR_ISFINITE:
502    case DXIL_INTR_ISNORMAL:
503       return "dx.op.isSpecialFloat";
504    default:
505       return "dx.op.unary";
506    }
507 }
508 
509 static const struct dxil_value *
emit_unary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0)510 emit_unary_call(struct ntd_context *ctx, enum overload_type overload,
511                 enum dxil_intr intr,
512                 const struct dxil_value *op0)
513 {
514    const struct dxil_func *func = dxil_get_function(&ctx->mod,
515                                                     unary_func_name(intr),
516                                                     overload);
517    if (!func)
518       return NULL;
519 
520    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
521    if (!opcode)
522       return NULL;
523 
524    const struct dxil_value *args[] = {
525      opcode,
526      op0
527    };
528 
529    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
530 }
531 
532 static const struct dxil_value *
emit_binary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)533 emit_binary_call(struct ntd_context *ctx, enum overload_type overload,
534                  enum dxil_intr intr,
535                  const struct dxil_value *op0, const struct dxil_value *op1)
536 {
537    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.binary", overload);
538    if (!func)
539       return NULL;
540 
541    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
542    if (!opcode)
543       return NULL;
544 
545    const struct dxil_value *args[] = {
546      opcode,
547      op0,
548      op1
549    };
550 
551    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
552 }
553 
554 static const struct dxil_value *
emit_tertiary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)555 emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload,
556                    enum dxil_intr intr,
557                    const struct dxil_value *op0,
558                    const struct dxil_value *op1,
559                    const struct dxil_value *op2)
560 {
561    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.tertiary", overload);
562    if (!func)
563       return NULL;
564 
565    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
566    if (!opcode)
567       return NULL;
568 
569    const struct dxil_value *args[] = {
570      opcode,
571      op0,
572      op1,
573      op2
574    };
575 
576    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
577 }
578 
579 static const struct dxil_value *
emit_quaternary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2,const struct dxil_value * op3)580 emit_quaternary_call(struct ntd_context *ctx, enum overload_type overload,
581                      enum dxil_intr intr,
582                      const struct dxil_value *op0,
583                      const struct dxil_value *op1,
584                      const struct dxil_value *op2,
585                      const struct dxil_value *op3)
586 {
587    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quaternary", overload);
588    if (!func)
589       return NULL;
590 
591    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
592    if (!opcode)
593       return NULL;
594 
595    const struct dxil_value *args[] = {
596      opcode,
597      op0,
598      op1,
599      op2,
600      op3
601    };
602 
603    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
604 }
605 
606 static const struct dxil_value *
emit_threadid_call(struct ntd_context * ctx,const struct dxil_value * comp)607 emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp)
608 {
609    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadId", DXIL_I32);
610    if (!func)
611       return NULL;
612 
613    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
614        DXIL_INTR_THREAD_ID);
615    if (!opcode)
616       return NULL;
617 
618    const struct dxil_value *args[] = {
619      opcode,
620      comp
621    };
622 
623    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
624 }
625 
626 static const struct dxil_value *
emit_threadidingroup_call(struct ntd_context * ctx,const struct dxil_value * comp)627 emit_threadidingroup_call(struct ntd_context *ctx,
628                           const struct dxil_value *comp)
629 {
630    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadIdInGroup", DXIL_I32);
631 
632    if (!func)
633       return NULL;
634 
635    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
636        DXIL_INTR_THREAD_ID_IN_GROUP);
637    if (!opcode)
638       return NULL;
639 
640    const struct dxil_value *args[] = {
641      opcode,
642      comp
643    };
644 
645    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
646 }
647 
648 static const struct dxil_value *
emit_flattenedthreadidingroup_call(struct ntd_context * ctx)649 emit_flattenedthreadidingroup_call(struct ntd_context *ctx)
650 {
651    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.flattenedThreadIdInGroup", DXIL_I32);
652 
653    if (!func)
654       return NULL;
655 
656    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
657       DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP);
658    if (!opcode)
659       return NULL;
660 
661    const struct dxil_value *args[] = {
662      opcode
663    };
664 
665    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
666 }
667 
668 static const struct dxil_value *
emit_groupid_call(struct ntd_context * ctx,const struct dxil_value * comp)669 emit_groupid_call(struct ntd_context *ctx, const struct dxil_value *comp)
670 {
671    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.groupId", DXIL_I32);
672 
673    if (!func)
674       return NULL;
675 
676    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
677        DXIL_INTR_GROUP_ID);
678    if (!opcode)
679       return NULL;
680 
681    const struct dxil_value *args[] = {
682      opcode,
683      comp
684    };
685 
686    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
687 }
688 
689 static const struct dxil_value *
emit_bufferload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],enum overload_type overload)690 emit_bufferload_call(struct ntd_context *ctx,
691                      const struct dxil_value *handle,
692                      const struct dxil_value *coord[2],
693                      enum overload_type overload)
694 {
695    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferLoad", overload);
696    if (!func)
697       return NULL;
698 
699    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
700       DXIL_INTR_BUFFER_LOAD);
701    const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1] };
702 
703    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
704 }
705 
706 static bool
emit_bufferstore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)707 emit_bufferstore_call(struct ntd_context *ctx,
708                       const struct dxil_value *handle,
709                       const struct dxil_value *coord[2],
710                       const struct dxil_value *value[4],
711                       const struct dxil_value *write_mask,
712                       enum overload_type overload)
713 {
714    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferStore", overload);
715 
716    if (!func)
717       return false;
718 
719    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
720       DXIL_INTR_BUFFER_STORE);
721    const struct dxil_value *args[] = {
722       opcode, handle, coord[0], coord[1],
723       value[0], value[1], value[2], value[3],
724       write_mask
725    };
726 
727    return dxil_emit_call_void(&ctx->mod, func,
728                               args, ARRAY_SIZE(args));
729 }
730 
731 static const struct dxil_value *
emit_textureload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],enum overload_type overload)732 emit_textureload_call(struct ntd_context *ctx,
733                       const struct dxil_value *handle,
734                       const struct dxil_value *coord[3],
735                       enum overload_type overload)
736 {
737    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", overload);
738    if (!func)
739       return NULL;
740    const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
741    const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
742 
743    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
744       DXIL_INTR_TEXTURE_LOAD);
745    const struct dxil_value *args[] = { opcode, handle,
746       /*lod_or_sample*/ int_undef,
747       coord[0], coord[1], coord[2],
748       /* offsets */ int_undef, int_undef, int_undef};
749 
750    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
751 }
752 
753 static bool
emit_texturestore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)754 emit_texturestore_call(struct ntd_context *ctx,
755                        const struct dxil_value *handle,
756                        const struct dxil_value *coord[3],
757                        const struct dxil_value *value[4],
758                        const struct dxil_value *write_mask,
759                        enum overload_type overload)
760 {
761    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureStore", overload);
762 
763    if (!func)
764       return false;
765 
766    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
767       DXIL_INTR_TEXTURE_STORE);
768    const struct dxil_value *args[] = {
769       opcode, handle, coord[0], coord[1], coord[2],
770       value[0], value[1], value[2], value[3],
771       write_mask
772    };
773 
774    return dxil_emit_call_void(&ctx->mod, func,
775                               args, ARRAY_SIZE(args));
776 }
777 
778 static const struct dxil_value *
emit_atomic_binop(struct ntd_context * ctx,const struct dxil_value * handle,enum dxil_atomic_op atomic_op,const struct dxil_value * coord[3],const struct dxil_value * value)779 emit_atomic_binop(struct ntd_context *ctx,
780                   const struct dxil_value *handle,
781                   enum dxil_atomic_op atomic_op,
782                   const struct dxil_value *coord[3],
783                   const struct dxil_value *value)
784 {
785    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicBinOp", DXIL_I32);
786 
787    if (!func)
788       return false;
789 
790    const struct dxil_value *opcode =
791       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_BINOP);
792    const struct dxil_value *atomic_op_value =
793       dxil_module_get_int32_const(&ctx->mod, atomic_op);
794    const struct dxil_value *args[] = {
795       opcode, handle, atomic_op_value,
796       coord[0], coord[1], coord[2], value
797    };
798 
799    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
800 }
801 
802 static const struct dxil_value *
emit_atomic_cmpxchg(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * cmpval,const struct dxil_value * newval)803 emit_atomic_cmpxchg(struct ntd_context *ctx,
804                     const struct dxil_value *handle,
805                     const struct dxil_value *coord[3],
806                     const struct dxil_value *cmpval,
807                     const struct dxil_value *newval)
808 {
809    const struct dxil_func *func =
810       dxil_get_function(&ctx->mod, "dx.op.atomicCompareExchange", DXIL_I32);
811 
812    if (!func)
813       return false;
814 
815    const struct dxil_value *opcode =
816       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_CMPXCHG);
817    const struct dxil_value *args[] = {
818       opcode, handle, coord[0], coord[1], coord[2], cmpval, newval
819    };
820 
821    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
822 }
823 
824 static const struct dxil_value *
emit_createhandle_call(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)825 emit_createhandle_call(struct ntd_context *ctx,
826                        enum dxil_resource_class resource_class,
827                        unsigned resource_range_id,
828                        const struct dxil_value *resource_range_index,
829                        bool non_uniform_resource_index)
830 {
831    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE);
832    const struct dxil_value *resource_class_value = dxil_module_get_int8_const(&ctx->mod, resource_class);
833    const struct dxil_value *resource_range_id_value = dxil_module_get_int32_const(&ctx->mod, resource_range_id);
834    const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
835    if (!opcode || !resource_class_value || !resource_range_id_value ||
836        !non_uniform_resource_index_value)
837       return NULL;
838 
839    const struct dxil_value *args[] = {
840       opcode,
841       resource_class_value,
842       resource_range_id_value,
843       resource_range_index,
844       non_uniform_resource_index_value
845    };
846 
847    const struct dxil_func *func =
848          dxil_get_function(&ctx->mod, "dx.op.createHandle", DXIL_NONE);
849 
850    if (!func)
851          return NULL;
852 
853    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
854 }
855 
856 static const struct dxil_value *
emit_createhandle_call_const_index(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned resource_range_id,unsigned resource_range_index,bool non_uniform_resource_index)857 emit_createhandle_call_const_index(struct ntd_context *ctx,
858                                    enum dxil_resource_class resource_class,
859                                    unsigned resource_range_id,
860                                    unsigned resource_range_index,
861                                    bool non_uniform_resource_index)
862 {
863 
864    const struct dxil_value *resource_range_index_value = dxil_module_get_int32_const(&ctx->mod, resource_range_index);
865    if (!resource_range_index_value)
866       return NULL;
867 
868    return emit_createhandle_call(ctx, resource_class, resource_range_id,
869                                  resource_range_index_value,
870                                  non_uniform_resource_index);
871 }
872 
873 static void
add_resource(struct ntd_context * ctx,enum dxil_resource_type type,enum dxil_resource_kind kind,const resource_array_layout * layout)874 add_resource(struct ntd_context *ctx, enum dxil_resource_type type,
875              enum dxil_resource_kind kind,
876              const resource_array_layout *layout)
877 {
878    struct dxil_resource_v0 *resource_v0 = NULL;
879    struct dxil_resource_v1 *resource_v1 = NULL;
880    if (ctx->mod.minor_validator >= 6) {
881       resource_v1 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v1, 1);
882       resource_v0 = &resource_v1->v0;
883    } else {
884       resource_v0 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v0, 1);
885    }
886    resource_v0->resource_type = type;
887    resource_v0->space = layout->space;
888    resource_v0->lower_bound = layout->binding;
889    if (layout->size == 0 || (uint64_t)layout->size + layout->binding >= UINT_MAX)
890       resource_v0->upper_bound = UINT_MAX;
891    else
892       resource_v0->upper_bound = layout->binding + layout->size - 1;
893    if (type == DXIL_RES_UAV_TYPED ||
894        type == DXIL_RES_UAV_RAW ||
895        type == DXIL_RES_UAV_STRUCTURED) {
896       uint32_t new_uav_count = ctx->num_uavs + layout->size;
897       if (layout->size == 0 || new_uav_count < ctx->num_uavs)
898          ctx->num_uavs = UINT_MAX;
899       else
900          ctx->num_uavs = new_uav_count;
901       if (ctx->mod.minor_validator >= 6 && ctx->num_uavs > 8)
902          ctx->mod.feats.use_64uavs = 1;
903    }
904 
905    if (resource_v1) {
906       resource_v1->resource_kind = kind;
907       /* No flags supported yet */
908       resource_v1->resource_flags = 0;
909    }
910 }
911 
912 static unsigned
get_resource_id(struct ntd_context * ctx,enum dxil_resource_class class,unsigned space,unsigned binding)913 get_resource_id(struct ntd_context *ctx, enum dxil_resource_class class,
914                 unsigned space, unsigned binding)
915 {
916    unsigned offset = 0;
917    unsigned count = 0;
918 
919    unsigned num_srvs = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
920    unsigned num_uavs = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
921    unsigned num_cbvs = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
922    unsigned num_samplers = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
923 
924    switch (class) {
925    case DXIL_RESOURCE_CLASS_UAV:
926       offset = num_srvs + num_samplers + num_cbvs;
927       count = num_uavs;
928       break;
929    case DXIL_RESOURCE_CLASS_SRV:
930       offset = num_samplers + num_cbvs;
931       count = num_srvs;
932       break;
933    case DXIL_RESOURCE_CLASS_SAMPLER:
934       offset = num_cbvs;
935       count = num_samplers;
936       break;
937    case DXIL_RESOURCE_CLASS_CBV:
938       offset = 0;
939       count = num_cbvs;
940       break;
941    }
942 
943    unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
944       sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
945    assert(offset + count <= ctx->resources.size / resource_element_size);
946    for (unsigned i = offset; i < offset + count; ++i) {
947       const struct dxil_resource_v0 *resource = (const struct dxil_resource_v0 *)((const char *)ctx->resources.data + resource_element_size * i);
948       if (resource->space == space &&
949           resource->lower_bound <= binding &&
950           resource->upper_bound >= binding) {
951          return i - offset;
952       }
953    }
954 
955    unreachable("Resource access for undeclared range");
956    return 0;
957 }
958 
959 static bool
emit_srv(struct ntd_context * ctx,nir_variable * var,unsigned count)960 emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned count)
961 {
962    unsigned id = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
963    unsigned binding = var->data.binding;
964    resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
965 
966    enum dxil_component_type comp_type;
967    enum dxil_resource_kind res_kind;
968    enum dxil_resource_type res_type;
969    if (var->data.mode == nir_var_mem_ssbo) {
970       comp_type = DXIL_COMP_TYPE_INVALID;
971       res_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
972       res_type = DXIL_RES_SRV_RAW;
973    } else {
974       comp_type = dxil_get_comp_type(var->type);
975       res_kind = dxil_get_resource_kind(var->type);
976       res_type = DXIL_RES_SRV_TYPED;
977    }
978    const struct dxil_type *res_type_as_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, false /* readwrite */);
979 
980    if (glsl_type_is_array(var->type))
981       res_type_as_type = dxil_module_get_array_type(&ctx->mod, res_type_as_type, count);
982 
983    const struct dxil_mdnode *srv_meta = emit_srv_metadata(&ctx->mod, res_type_as_type, var->name,
984                                                           &layout, comp_type, res_kind);
985 
986    if (!srv_meta)
987       return false;
988 
989    util_dynarray_append(&ctx->srv_metadata_nodes, const struct dxil_mdnode *, srv_meta);
990    add_resource(ctx, res_type, res_kind, &layout);
991    if (res_type == DXIL_RES_SRV_RAW)
992       ctx->mod.raw_and_structured_buffers = true;
993 
994    return true;
995 }
996 
997 static bool
emit_globals(struct ntd_context * ctx,unsigned size)998 emit_globals(struct ntd_context *ctx, unsigned size)
999 {
1000    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo)
1001       size++;
1002 
1003    if (!size)
1004       return true;
1005 
1006    const struct dxil_type *struct_type = dxil_module_get_res_type(&ctx->mod,
1007       DXIL_RESOURCE_KIND_RAW_BUFFER, DXIL_COMP_TYPE_INVALID, true /* readwrite */);
1008    if (!struct_type)
1009       return false;
1010 
1011    const struct dxil_type *array_type =
1012       dxil_module_get_array_type(&ctx->mod, struct_type, size);
1013    if (!array_type)
1014       return false;
1015 
1016    resource_array_layout layout = {0, 0, size, 0};
1017    const struct dxil_mdnode *uav_meta =
1018       emit_uav_metadata(&ctx->mod, array_type,
1019                                    "globals", &layout,
1020                                    DXIL_COMP_TYPE_INVALID,
1021                                    DXIL_RESOURCE_KIND_RAW_BUFFER);
1022    if (!uav_meta)
1023       return false;
1024 
1025    util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
1026    if (ctx->mod.minor_validator < 6 &&
1027        util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
1028       ctx->mod.feats.use_64uavs = 1;
1029    /* Handles to UAVs used for kernel globals are created on-demand */
1030    add_resource(ctx, DXIL_RES_UAV_RAW, DXIL_RESOURCE_KIND_RAW_BUFFER, &layout);
1031    ctx->mod.raw_and_structured_buffers = true;
1032    return true;
1033 }
1034 
1035 static bool
emit_uav(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned count,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind,const char * name)1036 emit_uav(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned count,
1037          enum dxil_component_type comp_type, enum dxil_resource_kind res_kind, const char *name)
1038 {
1039    unsigned id = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
1040    resource_array_layout layout = { id, binding, count, space };
1041 
1042    const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, true /* readwrite */);
1043    res_type = dxil_module_get_array_type(&ctx->mod, res_type, count);
1044    const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, res_type, name,
1045                                                           &layout, comp_type, res_kind);
1046 
1047    if (!uav_meta)
1048       return false;
1049 
1050    util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
1051    if (ctx->mod.minor_validator < 6 &&
1052        util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
1053       ctx->mod.feats.use_64uavs = 1;
1054 
1055    add_resource(ctx, res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER ? DXIL_RES_UAV_RAW : DXIL_RES_UAV_TYPED, res_kind, &layout);
1056    if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
1057       ctx->mod.raw_and_structured_buffers = true;
1058    if (ctx->mod.shader_kind != DXIL_PIXEL_SHADER &&
1059        ctx->mod.shader_kind != DXIL_COMPUTE_SHADER)
1060       ctx->mod.feats.uavs_at_every_stage = true;
1061 
1062    return true;
1063 }
1064 
1065 static bool
emit_uav_var(struct ntd_context * ctx,nir_variable * var,unsigned count)1066 emit_uav_var(struct ntd_context *ctx, nir_variable *var, unsigned count)
1067 {
1068    unsigned binding, space;
1069    if (ctx->opts->environment == DXIL_ENVIRONMENT_GL) {
1070       /* For GL, the image intrinsics are already lowered, using driver_location
1071        * as the 0-based image index. Use space 1 so that we can keep using these
1072        * NIR constants without having to remap them, and so they don't overlap
1073        * SSBOs, which are also 0-based UAV bindings.
1074        */
1075       binding = var->data.driver_location;
1076       space = 1;
1077    } else {
1078       binding = var->data.binding;
1079       space = var->data.descriptor_set;
1080    }
1081    enum dxil_component_type comp_type = dxil_get_comp_type(var->type);
1082    enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type);
1083    const char *name = var->name;
1084 
1085    return emit_uav(ctx, binding, space, count, comp_type, res_kind, name);
1086 }
1087 
1088 static void
var_fill_const_array_with_vector_or_scalar(struct ntd_context * ctx,const struct nir_constant * c,const struct glsl_type * type,void * const_vals,unsigned int offset)1089 var_fill_const_array_with_vector_or_scalar(struct ntd_context *ctx,
1090                                            const struct nir_constant *c,
1091                                            const struct glsl_type *type,
1092                                            void *const_vals,
1093                                            unsigned int offset)
1094 {
1095    assert(glsl_type_is_vector_or_scalar(type));
1096    unsigned int components = glsl_get_vector_elements(type);
1097    unsigned bit_size = glsl_get_bit_size(type);
1098    unsigned int increment = bit_size / 8;
1099 
1100    for (unsigned int comp = 0; comp < components; comp++) {
1101       uint8_t *dst = (uint8_t *)const_vals + offset;
1102 
1103       switch (bit_size) {
1104       case 64:
1105          memcpy(dst, &c->values[comp].u64, sizeof(c->values[0].u64));
1106          break;
1107       case 32:
1108          memcpy(dst, &c->values[comp].u32, sizeof(c->values[0].u32));
1109          break;
1110       case 16:
1111          memcpy(dst, &c->values[comp].u16, sizeof(c->values[0].u16));
1112          break;
1113       case 8:
1114          assert(glsl_base_type_is_integer(glsl_get_base_type(type)));
1115          memcpy(dst, &c->values[comp].u8, sizeof(c->values[0].u8));
1116          break;
1117       default:
1118          unreachable("unexpeted bit-size");
1119       }
1120 
1121       offset += increment;
1122    }
1123 }
1124 
1125 static void
var_fill_const_array(struct ntd_context * ctx,const struct nir_constant * c,const struct glsl_type * type,void * const_vals,unsigned int offset)1126 var_fill_const_array(struct ntd_context *ctx, const struct nir_constant *c,
1127                      const struct glsl_type *type, void *const_vals,
1128                      unsigned int offset)
1129 {
1130    assert(!glsl_type_is_interface(type));
1131 
1132    if (glsl_type_is_vector_or_scalar(type)) {
1133       var_fill_const_array_with_vector_or_scalar(ctx, c, type,
1134                                                  const_vals,
1135                                                  offset);
1136    } else if (glsl_type_is_array(type)) {
1137       assert(!glsl_type_is_unsized_array(type));
1138       const struct glsl_type *without = glsl_without_array(type);
1139       unsigned stride = glsl_get_explicit_stride(without);
1140 
1141       for (unsigned elt = 0; elt < glsl_get_length(type); elt++) {
1142          var_fill_const_array(ctx, c->elements[elt], without,
1143                               const_vals, offset + (elt * stride));
1144          offset += glsl_get_cl_size(without);
1145       }
1146    } else if (glsl_type_is_struct(type)) {
1147       for (unsigned int elt = 0; elt < glsl_get_length(type); elt++) {
1148          const struct glsl_type *elt_type = glsl_get_struct_field(type, elt);
1149          unsigned field_offset = glsl_get_struct_field_offset(type, elt);
1150 
1151          var_fill_const_array(ctx, c->elements[elt],
1152                               elt_type, const_vals,
1153                               offset + field_offset);
1154       }
1155    } else
1156       unreachable("unknown GLSL type in var_fill_const_array");
1157 }
1158 
1159 static bool
emit_global_consts(struct ntd_context * ctx)1160 emit_global_consts(struct ntd_context *ctx)
1161 {
1162    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_temp) {
1163       assert(var->constant_initializer);
1164 
1165       unsigned int num_members = DIV_ROUND_UP(glsl_get_cl_size(var->type), 4);
1166       uint32_t *const_ints = ralloc_array(ctx->ralloc_ctx, uint32_t, num_members);
1167       var_fill_const_array(ctx, var->constant_initializer, var->type,
1168                                  const_ints, 0);
1169       const struct dxil_value **const_vals =
1170          ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, num_members);
1171       if (!const_vals)
1172          return false;
1173       for (int i = 0; i < num_members; i++)
1174          const_vals[i] = dxil_module_get_int32_const(&ctx->mod, const_ints[i]);
1175 
1176       const struct dxil_type *elt_type = dxil_module_get_int_type(&ctx->mod, 32);
1177       if (!elt_type)
1178          return false;
1179       const struct dxil_type *type =
1180          dxil_module_get_array_type(&ctx->mod, elt_type, num_members);
1181       if (!type)
1182          return false;
1183       const struct dxil_value *agg_vals =
1184          dxil_module_get_array_const(&ctx->mod, type, const_vals);
1185       if (!agg_vals)
1186          return false;
1187 
1188       const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name, type,
1189                                                               DXIL_AS_DEFAULT, 4,
1190                                                               agg_vals);
1191       if (!gvar)
1192          return false;
1193 
1194       if (!_mesa_hash_table_insert(ctx->consts, var, (void *)gvar))
1195          return false;
1196    }
1197 
1198    return true;
1199 }
1200 
1201 static bool
emit_cbv(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned size,unsigned count,char * name)1202 emit_cbv(struct ntd_context *ctx, unsigned binding, unsigned space,
1203          unsigned size, unsigned count, char *name)
1204 {
1205    assert(count != 0);
1206 
1207    unsigned idx = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1208 
1209    const struct dxil_type *float32 = dxil_module_get_float_type(&ctx->mod, 32);
1210    const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, float32, size);
1211    const struct dxil_type *buffer_type = dxil_module_get_struct_type(&ctx->mod, name,
1212                                                                      &array_type, 1);
1213    // All ubo[1]s should have been lowered to ubo with static indexing
1214    const struct dxil_type *final_type = count != 1 ? dxil_module_get_array_type(&ctx->mod, buffer_type, count) : buffer_type;
1215    resource_array_layout layout = {idx, binding, count, space};
1216    const struct dxil_mdnode *cbv_meta = emit_cbv_metadata(&ctx->mod, final_type,
1217                                                           name, &layout, 4 * size);
1218 
1219    if (!cbv_meta)
1220       return false;
1221 
1222    util_dynarray_append(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *, cbv_meta);
1223    add_resource(ctx, DXIL_RES_CBV, DXIL_RESOURCE_KIND_CBUFFER, &layout);
1224 
1225    return true;
1226 }
1227 
1228 static bool
emit_ubo_var(struct ntd_context * ctx,nir_variable * var)1229 emit_ubo_var(struct ntd_context *ctx, nir_variable *var)
1230 {
1231    unsigned count = 1;
1232    if (glsl_type_is_array(var->type))
1233       count = glsl_get_length(var->type);
1234 
1235    char *name = var->name;
1236    char temp_name[30];
1237    if (name && strlen(name) == 0) {
1238       snprintf(temp_name, sizeof(temp_name), "__unnamed_ubo_%d",
1239                ctx->unnamed_ubo_count++);
1240       name = temp_name;
1241    }
1242 
1243    const struct glsl_type *type = glsl_without_array(var->type);
1244    assert(glsl_type_is_struct(type) || glsl_type_is_interface(type));
1245    unsigned dwords = ALIGN_POT(glsl_get_explicit_size(type, false), 16) / 4;
1246 
1247    return emit_cbv(ctx, var->data.binding, var->data.descriptor_set,
1248                    dwords, count, name);
1249 }
1250 
1251 static bool
emit_sampler(struct ntd_context * ctx,nir_variable * var,unsigned count)1252 emit_sampler(struct ntd_context *ctx, nir_variable *var, unsigned count)
1253 {
1254    unsigned id = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1255    unsigned binding = var->data.binding;
1256    resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1257    const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
1258    const struct dxil_type *sampler_type = dxil_module_get_struct_type(&ctx->mod, "struct.SamplerState", &int32_type, 1);
1259 
1260    if (glsl_type_is_array(var->type))
1261       sampler_type = dxil_module_get_array_type(&ctx->mod, sampler_type, count);
1262 
1263    const struct dxil_mdnode *sampler_meta = emit_sampler_metadata(&ctx->mod, sampler_type, var, &layout);
1264 
1265    if (!sampler_meta)
1266       return false;
1267 
1268    util_dynarray_append(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *, sampler_meta);
1269    add_resource(ctx, DXIL_RES_SAMPLER, DXIL_RESOURCE_KIND_SAMPLER, &layout);
1270 
1271    return true;
1272 }
1273 
1274 static bool
emit_static_indexing_handles(struct ntd_context * ctx)1275 emit_static_indexing_handles(struct ntd_context *ctx)
1276 {
1277    /* Vulkan always uses dynamic handles, from instructions in the NIR */
1278    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN)
1279       return true;
1280 
1281    unsigned last_res_class = -1;
1282    unsigned id = 0;
1283 
1284    unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
1285       sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
1286    for (struct dxil_resource_v0 *res = (struct dxil_resource_v0 *)ctx->resources.data;
1287         res < (struct dxil_resource_v0 *)((char *)ctx->resources.data + ctx->resources.size);
1288         res = (struct dxil_resource_v0 *)((char *)res + resource_element_size)) {
1289       enum dxil_resource_class res_class;
1290       const struct dxil_value **handle_array;
1291       switch (res->resource_type) {
1292       case DXIL_RES_SRV_TYPED:
1293       case DXIL_RES_SRV_RAW:
1294       case DXIL_RES_SRV_STRUCTURED:
1295          res_class = DXIL_RESOURCE_CLASS_SRV;
1296          handle_array = ctx->srv_handles;
1297          break;
1298       case DXIL_RES_CBV:
1299          res_class = DXIL_RESOURCE_CLASS_CBV;
1300          handle_array = ctx->cbv_handles;
1301          break;
1302       case DXIL_RES_SAMPLER:
1303          res_class = DXIL_RESOURCE_CLASS_SAMPLER;
1304          handle_array = ctx->sampler_handles;
1305          break;
1306       case DXIL_RES_UAV_RAW:
1307          res_class = DXIL_RESOURCE_CLASS_UAV;
1308          handle_array = ctx->ssbo_handles;
1309          break;
1310       case DXIL_RES_UAV_TYPED:
1311       case DXIL_RES_UAV_STRUCTURED:
1312       case DXIL_RES_UAV_STRUCTURED_WITH_COUNTER:
1313          res_class = DXIL_RESOURCE_CLASS_UAV;
1314          handle_array = ctx->image_handles;
1315          break;
1316       default:
1317          unreachable("Unexpected resource type");
1318       }
1319 
1320       if (last_res_class != res_class)
1321          id = 0;
1322       else
1323          id++;
1324       last_res_class = res_class;
1325 
1326       if (res->space > 1)
1327          continue;
1328       assert(res->space == 0 ||
1329          (res->space == 1 &&
1330             res->resource_type != DXIL_RES_UAV_RAW &&
1331             ctx->opts->environment == DXIL_ENVIRONMENT_GL));
1332 
1333       /* CL uses dynamic handles for the "globals" UAV array, but uses static
1334        * handles for UBOs, textures, and samplers.
1335        */
1336       if (ctx->opts->environment == DXIL_ENVIRONMENT_CL &&
1337           res->resource_type == DXIL_RES_UAV_RAW)
1338          continue;
1339 
1340       for (unsigned i = res->lower_bound; i <= res->upper_bound; ++i) {
1341          handle_array[i] = emit_createhandle_call_const_index(ctx, res_class, id, i, false);
1342          if (!handle_array[i])
1343             return false;
1344       }
1345    }
1346    return true;
1347 }
1348 
1349 static const struct dxil_mdnode *
emit_gs_state(struct ntd_context * ctx)1350 emit_gs_state(struct ntd_context *ctx)
1351 {
1352    const struct dxil_mdnode *gs_state_nodes[5];
1353    const nir_shader *s = ctx->shader;
1354 
1355    gs_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, dxil_get_input_primitive(s->info.gs.input_primitive));
1356    gs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.vertices_out);
1357    gs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.gs.active_stream_mask, 1));
1358    gs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, dxil_get_primitive_topology(s->info.gs.output_primitive));
1359    gs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.invocations);
1360 
1361    for (unsigned i = 0; i < ARRAY_SIZE(gs_state_nodes); ++i) {
1362       if (!gs_state_nodes[i])
1363          return NULL;
1364    }
1365 
1366    return dxil_get_metadata_node(&ctx->mod, gs_state_nodes, ARRAY_SIZE(gs_state_nodes));
1367 }
1368 
1369 static enum dxil_tessellator_domain
get_tessellator_domain(enum tess_primitive_mode primitive_mode)1370 get_tessellator_domain(enum tess_primitive_mode primitive_mode)
1371 {
1372    switch (primitive_mode) {
1373    case TESS_PRIMITIVE_QUADS: return DXIL_TESSELLATOR_DOMAIN_QUAD;
1374    case TESS_PRIMITIVE_TRIANGLES: return DXIL_TESSELLATOR_DOMAIN_TRI;
1375    case TESS_PRIMITIVE_ISOLINES: return DXIL_TESSELLATOR_DOMAIN_ISOLINE;
1376    default:
1377       unreachable("Invalid tessellator primitive mode");
1378    }
1379 }
1380 
1381 static enum dxil_tessellator_partitioning
get_tessellator_partitioning(enum gl_tess_spacing spacing)1382 get_tessellator_partitioning(enum gl_tess_spacing spacing)
1383 {
1384    switch (spacing) {
1385    default:
1386    case TESS_SPACING_EQUAL:
1387       return DXIL_TESSELLATOR_PARTITIONING_INTEGER;
1388    case TESS_SPACING_FRACTIONAL_EVEN:
1389       return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
1390    case TESS_SPACING_FRACTIONAL_ODD:
1391       return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
1392    }
1393 }
1394 
1395 static enum dxil_tessellator_output_primitive
get_tessellator_output_primitive(const struct shader_info * info)1396 get_tessellator_output_primitive(const struct shader_info *info)
1397 {
1398    if (info->tess.point_mode)
1399       return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_POINT;
1400    if (info->tess._primitive_mode == TESS_PRIMITIVE_ISOLINES)
1401       return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_LINE;
1402    /* Note: GL tessellation domain is inverted from D3D, which means triangle
1403     * winding needs to be inverted.
1404     */
1405    if (info->tess.ccw)
1406       return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CW;
1407    return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CCW;
1408 }
1409 
1410 static const struct dxil_mdnode *
emit_hs_state(struct ntd_context * ctx)1411 emit_hs_state(struct ntd_context *ctx)
1412 {
1413    const struct dxil_mdnode *hs_state_nodes[7];
1414 
1415    hs_state_nodes[0] = dxil_get_metadata_func(&ctx->mod, ctx->tess_ctrl_patch_constant_func_def->func);
1416    hs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->tess_input_control_point_count);
1417    hs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1418    hs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1419    hs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_partitioning(ctx->shader->info.tess.spacing));
1420    hs_state_nodes[5] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_output_primitive(&ctx->shader->info));
1421    hs_state_nodes[6] = dxil_get_metadata_float32(&ctx->mod, 64.0f);
1422 
1423    return dxil_get_metadata_node(&ctx->mod, hs_state_nodes, ARRAY_SIZE(hs_state_nodes));
1424 }
1425 
1426 static const struct dxil_mdnode *
emit_ds_state(struct ntd_context * ctx)1427 emit_ds_state(struct ntd_context *ctx)
1428 {
1429    const struct dxil_mdnode *ds_state_nodes[2];
1430 
1431    ds_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1432    ds_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1433 
1434    return dxil_get_metadata_node(&ctx->mod, ds_state_nodes, ARRAY_SIZE(ds_state_nodes));
1435 }
1436 
1437 static const struct dxil_mdnode *
emit_threads(struct ntd_context * ctx)1438 emit_threads(struct ntd_context *ctx)
1439 {
1440    const nir_shader *s = ctx->shader;
1441    const struct dxil_mdnode *threads_x = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[0], 1));
1442    const struct dxil_mdnode *threads_y = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[1], 1));
1443    const struct dxil_mdnode *threads_z = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[2], 1));
1444    if (!threads_x || !threads_y || !threads_z)
1445       return false;
1446 
1447    const struct dxil_mdnode *threads_nodes[] = { threads_x, threads_y, threads_z };
1448    return dxil_get_metadata_node(&ctx->mod, threads_nodes, ARRAY_SIZE(threads_nodes));
1449 }
1450 
1451 static int64_t
get_module_flags(struct ntd_context * ctx)1452 get_module_flags(struct ntd_context *ctx)
1453 {
1454    /* See the DXIL documentation for the definition of these flags:
1455     *
1456     * https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-flags
1457     */
1458 
1459    uint64_t flags = 0;
1460    if (ctx->mod.feats.doubles)
1461       flags |= (1 << 2);
1462    if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
1463        ctx->shader->info.fs.early_fragment_tests)
1464       flags |= (1 << 3);
1465    if (ctx->mod.raw_and_structured_buffers)
1466       flags |= (1 << 4);
1467    if (ctx->mod.feats.min_precision)
1468       flags |= (1 << 5);
1469    if (ctx->mod.feats.dx11_1_double_extensions)
1470       flags |= (1 << 6);
1471    if (ctx->mod.feats.array_layer_from_vs_or_ds)
1472       flags |= (1 << 9);
1473    if (ctx->mod.feats.inner_coverage)
1474       flags |= (1 << 10);
1475    if (ctx->mod.feats.typed_uav_load_additional_formats)
1476       flags |= (1 << 13);
1477    if (ctx->mod.feats.use_64uavs)
1478       flags |= (1 << 15);
1479    if (ctx->mod.feats.uavs_at_every_stage)
1480       flags |= (1 << 16);
1481    if (ctx->mod.feats.cs_4x_raw_sb)
1482       flags |= (1 << 17);
1483    if (ctx->mod.feats.wave_ops)
1484       flags |= (1 << 19);
1485    if (ctx->mod.feats.int64_ops)
1486       flags |= (1 << 20);
1487    if (ctx->mod.feats.barycentrics)
1488       flags |= (1 << 22);
1489    if (ctx->mod.feats.stencil_ref)
1490       flags |= (1 << 11);
1491    if (ctx->mod.feats.native_low_precision)
1492       flags |= (1 << 23) | (1 << 5);
1493 
1494    if (ctx->opts->disable_math_refactoring)
1495       flags |= (1 << 1);
1496 
1497    return flags;
1498 }
1499 
1500 static const struct dxil_mdnode *
emit_entrypoint(struct ntd_context * ctx,const struct dxil_func * func,const char * name,const struct dxil_mdnode * signatures,const struct dxil_mdnode * resources,const struct dxil_mdnode * shader_props)1501 emit_entrypoint(struct ntd_context *ctx,
1502                 const struct dxil_func *func, const char *name,
1503                 const struct dxil_mdnode *signatures,
1504                 const struct dxil_mdnode *resources,
1505                 const struct dxil_mdnode *shader_props)
1506 {
1507    char truncated_name[254] = { 0 };
1508    strncpy(truncated_name, name, ARRAY_SIZE(truncated_name) - 1);
1509 
1510    const struct dxil_mdnode *func_md = dxil_get_metadata_func(&ctx->mod, func);
1511    const struct dxil_mdnode *name_md = dxil_get_metadata_string(&ctx->mod, truncated_name);
1512    const struct dxil_mdnode *nodes[] = {
1513       func_md,
1514       name_md,
1515       signatures,
1516       resources,
1517       shader_props
1518    };
1519    return dxil_get_metadata_node(&ctx->mod, nodes,
1520                                  ARRAY_SIZE(nodes));
1521 }
1522 
1523 static const struct dxil_mdnode *
emit_resources(struct ntd_context * ctx)1524 emit_resources(struct ntd_context *ctx)
1525 {
1526    bool emit_resources = false;
1527    const struct dxil_mdnode *resources_nodes[] = {
1528       NULL, NULL, NULL, NULL
1529    };
1530 
1531 #define ARRAY_AND_SIZE(arr) arr.data, util_dynarray_num_elements(&arr, const struct dxil_mdnode *)
1532 
1533    if (ctx->srv_metadata_nodes.size) {
1534       resources_nodes[0] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->srv_metadata_nodes));
1535       emit_resources = true;
1536    }
1537 
1538    if (ctx->uav_metadata_nodes.size) {
1539       resources_nodes[1] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->uav_metadata_nodes));
1540       emit_resources = true;
1541    }
1542 
1543    if (ctx->cbv_metadata_nodes.size) {
1544       resources_nodes[2] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->cbv_metadata_nodes));
1545       emit_resources = true;
1546    }
1547 
1548    if (ctx->sampler_metadata_nodes.size) {
1549       resources_nodes[3] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->sampler_metadata_nodes));
1550       emit_resources = true;
1551    }
1552 
1553 #undef ARRAY_AND_SIZE
1554 
1555    return emit_resources ?
1556       dxil_get_metadata_node(&ctx->mod, resources_nodes, ARRAY_SIZE(resources_nodes)): NULL;
1557 }
1558 
1559 static boolean
emit_tag(struct ntd_context * ctx,enum dxil_shader_tag tag,const struct dxil_mdnode * value_node)1560 emit_tag(struct ntd_context *ctx, enum dxil_shader_tag tag,
1561          const struct dxil_mdnode *value_node)
1562 {
1563    const struct dxil_mdnode *tag_node = dxil_get_metadata_int32(&ctx->mod, tag);
1564    if (!tag_node || !value_node)
1565       return false;
1566    assert(ctx->num_shader_property_nodes <= ARRAY_SIZE(ctx->shader_property_nodes) - 2);
1567    ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = tag_node;
1568    ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = value_node;
1569 
1570    return true;
1571 }
1572 
1573 static bool
emit_metadata(struct ntd_context * ctx)1574 emit_metadata(struct ntd_context *ctx)
1575 {
1576    /* DXIL versions are 1.x for shader model 6.x */
1577    assert(ctx->mod.major_version == 6);
1578    unsigned dxilMajor = 1;
1579    unsigned dxilMinor = ctx->mod.minor_version;
1580    unsigned valMajor = ctx->mod.major_validator;
1581    unsigned valMinor = ctx->mod.minor_validator;
1582    if (!emit_llvm_ident(&ctx->mod) ||
1583        !emit_named_version(&ctx->mod, "dx.version", dxilMajor, dxilMinor) ||
1584        !emit_named_version(&ctx->mod, "dx.valver", valMajor, valMinor) ||
1585        !emit_dx_shader_model(&ctx->mod))
1586       return false;
1587 
1588    const struct dxil_func_def *main_func_def = ctx->main_func_def;
1589    if (!main_func_def)
1590       return false;
1591    const struct dxil_func *main_func = main_func_def->func;
1592 
1593    const struct dxil_mdnode *resources_node = emit_resources(ctx);
1594 
1595    const struct dxil_mdnode *main_entrypoint = dxil_get_metadata_func(&ctx->mod, main_func);
1596    const struct dxil_mdnode *node27 = dxil_get_metadata_node(&ctx->mod, NULL, 0);
1597 
1598    const struct dxil_mdnode *node4 = dxil_get_metadata_int32(&ctx->mod, 0);
1599    const struct dxil_mdnode *nodes_4_27_27[] = {
1600       node4, node27, node27
1601    };
1602    const struct dxil_mdnode *node28 = dxil_get_metadata_node(&ctx->mod, nodes_4_27_27,
1603                                                       ARRAY_SIZE(nodes_4_27_27));
1604 
1605    const struct dxil_mdnode *node29 = dxil_get_metadata_node(&ctx->mod, &node28, 1);
1606 
1607    const struct dxil_mdnode *node3 = dxil_get_metadata_int32(&ctx->mod, 1);
1608    const struct dxil_mdnode *main_type_annotation_nodes[] = {
1609       node3, main_entrypoint, node29
1610    };
1611    const struct dxil_mdnode *main_type_annotation = dxil_get_metadata_node(&ctx->mod, main_type_annotation_nodes,
1612                                                                            ARRAY_SIZE(main_type_annotation_nodes));
1613 
1614    if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {
1615       if (!emit_tag(ctx, DXIL_SHADER_TAG_GS_STATE, emit_gs_state(ctx)))
1616          return false;
1617    } else if (ctx->mod.shader_kind == DXIL_HULL_SHADER) {
1618       ctx->tess_input_control_point_count = 32;
1619       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
1620          if (nir_is_arrayed_io(var, MESA_SHADER_TESS_CTRL)) {
1621             ctx->tess_input_control_point_count = glsl_array_size(var->type);
1622             break;
1623          }
1624       }
1625 
1626       if (!emit_tag(ctx, DXIL_SHADER_TAG_HS_STATE, emit_hs_state(ctx)))
1627          return false;
1628    } else if (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
1629       if (!emit_tag(ctx, DXIL_SHADER_TAG_DS_STATE, emit_ds_state(ctx)))
1630          return false;
1631    } else if (ctx->mod.shader_kind == DXIL_COMPUTE_SHADER) {
1632       if (!emit_tag(ctx, DXIL_SHADER_TAG_NUM_THREADS, emit_threads(ctx)))
1633          return false;
1634    }
1635 
1636    uint64_t flags = get_module_flags(ctx);
1637    if (flags != 0) {
1638       if (!emit_tag(ctx, DXIL_SHADER_TAG_FLAGS, dxil_get_metadata_int64(&ctx->mod, flags)))
1639          return false;
1640    }
1641    const struct dxil_mdnode *shader_properties = NULL;
1642    if (ctx->num_shader_property_nodes > 0) {
1643       shader_properties = dxil_get_metadata_node(&ctx->mod, ctx->shader_property_nodes,
1644                                                  ctx->num_shader_property_nodes);
1645       if (!shader_properties)
1646          return false;
1647    }
1648 
1649    nir_function_impl *entry_func_impl = nir_shader_get_entrypoint(ctx->shader);
1650    const struct dxil_mdnode *dx_entry_point = emit_entrypoint(ctx, main_func,
1651        entry_func_impl->function->name, get_signatures(&ctx->mod), resources_node, shader_properties);
1652    if (!dx_entry_point)
1653       return false;
1654 
1655    if (resources_node) {
1656       const struct dxil_mdnode *dx_resources = resources_node;
1657       dxil_add_metadata_named_node(&ctx->mod, "dx.resources",
1658                                        &dx_resources, 1);
1659    }
1660 
1661    const struct dxil_mdnode *dx_type_annotations[] = { main_type_annotation };
1662    return dxil_add_metadata_named_node(&ctx->mod, "dx.typeAnnotations",
1663                                        dx_type_annotations,
1664                                        ARRAY_SIZE(dx_type_annotations)) &&
1665           dxil_add_metadata_named_node(&ctx->mod, "dx.entryPoints",
1666                                        &dx_entry_point, 1);
1667 }
1668 
1669 static const struct dxil_value *
bitcast_to_int(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)1670 bitcast_to_int(struct ntd_context *ctx, unsigned bit_size,
1671                const struct dxil_value *value)
1672 {
1673    const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size);
1674    if (!type)
1675       return NULL;
1676 
1677    return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
1678 }
1679 
1680 static const struct dxil_value *
bitcast_to_float(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)1681 bitcast_to_float(struct ntd_context *ctx, unsigned bit_size,
1682                  const struct dxil_value *value)
1683 {
1684    const struct dxil_type *type = dxil_module_get_float_type(&ctx->mod, bit_size);
1685    if (!type)
1686       return NULL;
1687 
1688    return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
1689 }
1690 
1691 static void
store_ssa_def(struct ntd_context * ctx,nir_ssa_def * ssa,unsigned chan,const struct dxil_value * value)1692 store_ssa_def(struct ntd_context *ctx, nir_ssa_def *ssa, unsigned chan,
1693               const struct dxil_value *value)
1694 {
1695    assert(ssa->index < ctx->num_defs);
1696    assert(chan < ssa->num_components);
1697    /* We pre-defined the dest value because of a phi node, so bitcast while storing if the
1698     * base type differs */
1699    if (ctx->defs[ssa->index].chans[chan]) {
1700       const struct dxil_type *expect_type = dxil_value_get_type(ctx->defs[ssa->index].chans[chan]);
1701       const struct dxil_type *value_type = dxil_value_get_type(value);
1702       if (dxil_type_to_nir_type(expect_type) != dxil_type_to_nir_type(value_type))
1703          value = dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, expect_type, value);
1704    }
1705    ctx->defs[ssa->index].chans[chan] = value;
1706 }
1707 
1708 static void
store_dest_value(struct ntd_context * ctx,nir_dest * dest,unsigned chan,const struct dxil_value * value)1709 store_dest_value(struct ntd_context *ctx, nir_dest *dest, unsigned chan,
1710                  const struct dxil_value *value)
1711 {
1712    assert(dest->is_ssa);
1713    assert(value);
1714    store_ssa_def(ctx, &dest->ssa, chan, value);
1715 }
1716 
1717 static void
store_dest(struct ntd_context * ctx,nir_dest * dest,unsigned chan,const struct dxil_value * value,nir_alu_type type)1718 store_dest(struct ntd_context *ctx, nir_dest *dest, unsigned chan,
1719            const struct dxil_value *value, nir_alu_type type)
1720 {
1721    switch (nir_alu_type_get_base_type(type)) {
1722    case nir_type_float:
1723       if (nir_dest_bit_size(*dest) == 64)
1724          ctx->mod.feats.doubles = true;
1725       store_dest_value(ctx, dest, chan, value);
1726       break;
1727    case nir_type_uint:
1728    case nir_type_int:
1729       if (nir_dest_bit_size(*dest) == 16)
1730          ctx->mod.feats.native_low_precision = true;
1731       if (nir_dest_bit_size(*dest) == 64)
1732          ctx->mod.feats.int64_ops = true;
1733       FALLTHROUGH;
1734    case nir_type_bool:
1735       store_dest_value(ctx, dest, chan, value);
1736       break;
1737    default:
1738       unreachable("unexpected nir_alu_type");
1739    }
1740 }
1741 
1742 static void
store_alu_dest(struct ntd_context * ctx,nir_alu_instr * alu,unsigned chan,const struct dxil_value * value)1743 store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan,
1744                const struct dxil_value *value)
1745 {
1746    assert(!alu->dest.saturate);
1747    store_dest(ctx, &alu->dest.dest, chan, value,
1748               nir_op_infos[alu->op].output_type);
1749 }
1750 
1751 static const struct dxil_value *
get_src_ssa(struct ntd_context * ctx,const nir_ssa_def * ssa,unsigned chan)1752 get_src_ssa(struct ntd_context *ctx, const nir_ssa_def *ssa, unsigned chan)
1753 {
1754    assert(ssa->index < ctx->num_defs);
1755    assert(chan < ssa->num_components);
1756    assert(ctx->defs[ssa->index].chans[chan]);
1757    return ctx->defs[ssa->index].chans[chan];
1758 }
1759 
1760 static const struct dxil_value *
get_src(struct ntd_context * ctx,nir_src * src,unsigned chan,nir_alu_type type)1761 get_src(struct ntd_context *ctx, nir_src *src, unsigned chan,
1762         nir_alu_type type)
1763 {
1764    assert(src->is_ssa);
1765    const struct dxil_value *value = get_src_ssa(ctx, src->ssa, chan);
1766 
1767    const int bit_size = nir_src_bit_size(*src);
1768 
1769    switch (nir_alu_type_get_base_type(type)) {
1770    case nir_type_int:
1771    case nir_type_uint: {
1772       assert(bit_size != 64 || ctx->mod.feats.int64_ops);
1773       const struct dxil_type *expect_type =  dxil_module_get_int_type(&ctx->mod, bit_size);
1774       /* nohing to do */
1775       if (dxil_value_type_equal_to(value, expect_type))
1776          return value;
1777       assert(dxil_value_type_bitsize_equal_to(value, bit_size));
1778       return bitcast_to_int(ctx,  bit_size, value);
1779       }
1780 
1781    case nir_type_float:
1782       assert(nir_src_bit_size(*src) >= 16);
1783       assert(nir_src_bit_size(*src) != 64 || ctx->mod.feats.doubles);
1784       if (dxil_value_type_equal_to(value, dxil_module_get_float_type(&ctx->mod, bit_size)))
1785          return value;
1786       assert(dxil_value_type_bitsize_equal_to(value, bit_size));
1787       return bitcast_to_float(ctx, bit_size, value);
1788 
1789    case nir_type_bool:
1790       if (!dxil_value_type_bitsize_equal_to(value, 1)) {
1791          return dxil_emit_cast(&ctx->mod, DXIL_CAST_TRUNC,
1792                                dxil_module_get_int_type(&ctx->mod, 1), value);
1793       }
1794       return value;
1795 
1796    default:
1797       unreachable("unexpected nir_alu_type");
1798    }
1799 }
1800 
1801 static const struct dxil_type *
get_alu_src_type(struct ntd_context * ctx,nir_alu_instr * alu,unsigned src)1802 get_alu_src_type(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)
1803 {
1804    assert(!alu->src[src].abs);
1805    assert(!alu->src[src].negate);
1806    nir_ssa_def *ssa_src = alu->src[src].src.ssa;
1807    unsigned chan = alu->src[src].swizzle[0];
1808    const struct dxil_value *value = get_src_ssa(ctx, ssa_src, chan);
1809    return dxil_value_get_type(value);
1810 }
1811 
1812 static const struct dxil_value *
get_alu_src(struct ntd_context * ctx,nir_alu_instr * alu,unsigned src)1813 get_alu_src(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)
1814 {
1815    assert(!alu->src[src].abs);
1816    assert(!alu->src[src].negate);
1817 
1818    unsigned chan = alu->src[src].swizzle[0];
1819    return get_src(ctx, &alu->src[src].src, chan,
1820                   nir_op_infos[alu->op].input_types[src]);
1821 }
1822 
1823 static bool
emit_binop(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)1824 emit_binop(struct ntd_context *ctx, nir_alu_instr *alu,
1825            enum dxil_bin_opcode opcode,
1826            const struct dxil_value *op0, const struct dxil_value *op1)
1827 {
1828    bool is_float_op = nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float;
1829 
1830    enum dxil_opt_flags flags = 0;
1831    if (is_float_op && !alu->exact)
1832       flags |= DXIL_UNSAFE_ALGEBRA;
1833 
1834    const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, flags);
1835    if (!v)
1836       return false;
1837    store_alu_dest(ctx, alu, 0, v);
1838    return true;
1839 }
1840 
1841 static bool
emit_shift(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)1842 emit_shift(struct ntd_context *ctx, nir_alu_instr *alu,
1843            enum dxil_bin_opcode opcode,
1844            const struct dxil_value *op0, const struct dxil_value *op1)
1845 {
1846    unsigned op0_bit_size = nir_src_bit_size(alu->src[0].src);
1847    unsigned op1_bit_size = nir_src_bit_size(alu->src[1].src);
1848    if (op0_bit_size != op1_bit_size) {
1849       const struct dxil_type *type =
1850          dxil_module_get_int_type(&ctx->mod, op0_bit_size);
1851       enum dxil_cast_opcode cast_op =
1852          op1_bit_size < op0_bit_size ? DXIL_CAST_ZEXT : DXIL_CAST_TRUNC;
1853       op1 = dxil_emit_cast(&ctx->mod, cast_op, type, op1);
1854    }
1855 
1856    const struct dxil_value *v =
1857       dxil_emit_binop(&ctx->mod, opcode, op0, op1, 0);
1858    if (!v)
1859       return false;
1860    store_alu_dest(ctx, alu, 0, v);
1861    return true;
1862 }
1863 
1864 static bool
emit_cmp(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_cmp_pred pred,const struct dxil_value * op0,const struct dxil_value * op1)1865 emit_cmp(struct ntd_context *ctx, nir_alu_instr *alu,
1866          enum dxil_cmp_pred pred,
1867          const struct dxil_value *op0, const struct dxil_value *op1)
1868 {
1869    const struct dxil_value *v = dxil_emit_cmp(&ctx->mod, pred, op0, op1);
1870    if (!v)
1871       return false;
1872    store_alu_dest(ctx, alu, 0, v);
1873    return true;
1874 }
1875 
1876 static enum dxil_cast_opcode
get_cast_op(nir_alu_instr * alu)1877 get_cast_op(nir_alu_instr *alu)
1878 {
1879    unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1880    unsigned src_bits = nir_src_bit_size(alu->src[0].src);
1881 
1882    switch (alu->op) {
1883    /* bool -> int */
1884    case nir_op_b2i16:
1885    case nir_op_b2i32:
1886    case nir_op_b2i64:
1887       return DXIL_CAST_ZEXT;
1888 
1889    /* float -> float */
1890    case nir_op_f2f16_rtz:
1891    case nir_op_f2f32:
1892    case nir_op_f2f64:
1893       assert(dst_bits != src_bits);
1894       if (dst_bits < src_bits)
1895          return DXIL_CAST_FPTRUNC;
1896       else
1897          return DXIL_CAST_FPEXT;
1898 
1899    /* int -> int */
1900    case nir_op_i2i16:
1901    case nir_op_i2i32:
1902    case nir_op_i2i64:
1903       assert(dst_bits != src_bits);
1904       if (dst_bits < src_bits)
1905          return DXIL_CAST_TRUNC;
1906       else
1907          return DXIL_CAST_SEXT;
1908 
1909    /* uint -> uint */
1910    case nir_op_u2u16:
1911    case nir_op_u2u32:
1912    case nir_op_u2u64:
1913       assert(dst_bits != src_bits);
1914       if (dst_bits < src_bits)
1915          return DXIL_CAST_TRUNC;
1916       else
1917          return DXIL_CAST_ZEXT;
1918 
1919    /* float -> int */
1920    case nir_op_f2i16:
1921    case nir_op_f2i32:
1922    case nir_op_f2i64:
1923       return DXIL_CAST_FPTOSI;
1924 
1925    /* float -> uint */
1926    case nir_op_f2u16:
1927    case nir_op_f2u32:
1928    case nir_op_f2u64:
1929       return DXIL_CAST_FPTOUI;
1930 
1931    /* int -> float */
1932    case nir_op_i2f16:
1933    case nir_op_i2f32:
1934    case nir_op_i2f64:
1935       return DXIL_CAST_SITOFP;
1936 
1937    /* uint -> float */
1938    case nir_op_u2f16:
1939    case nir_op_u2f32:
1940    case nir_op_u2f64:
1941       return DXIL_CAST_UITOFP;
1942 
1943    default:
1944       unreachable("unexpected cast op");
1945    }
1946 }
1947 
1948 static const struct dxil_type *
get_cast_dest_type(struct ntd_context * ctx,nir_alu_instr * alu)1949 get_cast_dest_type(struct ntd_context *ctx, nir_alu_instr *alu)
1950 {
1951    unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1952    switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type)) {
1953    case nir_type_bool:
1954       assert(dst_bits == 1);
1955       FALLTHROUGH;
1956    case nir_type_int:
1957    case nir_type_uint:
1958       return dxil_module_get_int_type(&ctx->mod, dst_bits);
1959 
1960    case nir_type_float:
1961       return dxil_module_get_float_type(&ctx->mod, dst_bits);
1962 
1963    default:
1964       unreachable("unknown nir_alu_type");
1965    }
1966 }
1967 
1968 static bool
is_double(nir_alu_type alu_type,unsigned bit_size)1969 is_double(nir_alu_type alu_type, unsigned bit_size)
1970 {
1971    return nir_alu_type_get_base_type(alu_type) == nir_type_float &&
1972           bit_size == 64;
1973 }
1974 
1975 static bool
emit_cast(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * value)1976 emit_cast(struct ntd_context *ctx, nir_alu_instr *alu,
1977           const struct dxil_value *value)
1978 {
1979    enum dxil_cast_opcode opcode = get_cast_op(alu);
1980    const struct dxil_type *type = get_cast_dest_type(ctx, alu);
1981    if (!type)
1982       return false;
1983 
1984    const nir_op_info *info = &nir_op_infos[alu->op];
1985    switch (opcode) {
1986    case DXIL_CAST_UITOFP:
1987    case DXIL_CAST_SITOFP:
1988       if (is_double(info->output_type, nir_dest_bit_size(alu->dest.dest)))
1989          ctx->mod.feats.dx11_1_double_extensions = true;
1990       break;
1991    case DXIL_CAST_FPTOUI:
1992    case DXIL_CAST_FPTOSI:
1993       if (is_double(info->input_types[0], nir_src_bit_size(alu->src[0].src)))
1994          ctx->mod.feats.dx11_1_double_extensions = true;
1995       break;
1996    default:
1997       break;
1998    }
1999 
2000    const struct dxil_value *v = dxil_emit_cast(&ctx->mod, opcode, type,
2001                                                value);
2002    if (!v)
2003       return false;
2004    store_alu_dest(ctx, alu, 0, v);
2005    return true;
2006 }
2007 
2008 static enum overload_type
get_overload(nir_alu_type alu_type,unsigned bit_size)2009 get_overload(nir_alu_type alu_type, unsigned bit_size)
2010 {
2011    switch (nir_alu_type_get_base_type(alu_type)) {
2012    case nir_type_int:
2013    case nir_type_uint:
2014       switch (bit_size) {
2015       case 16: return DXIL_I16;
2016       case 32: return DXIL_I32;
2017       case 64: return DXIL_I64;
2018       default:
2019          unreachable("unexpected bit_size");
2020       }
2021    case nir_type_float:
2022       switch (bit_size) {
2023       case 16: return DXIL_F16;
2024       case 32: return DXIL_F32;
2025       case 64: return DXIL_F64;
2026       default:
2027          unreachable("unexpected bit_size");
2028       }
2029    default:
2030       unreachable("unexpected output type");
2031    }
2032 }
2033 
2034 static bool
emit_unary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op)2035 emit_unary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2036                  enum dxil_intr intr, const struct dxil_value *op)
2037 {
2038    const nir_op_info *info = &nir_op_infos[alu->op];
2039    unsigned src_bits = nir_src_bit_size(alu->src[0].src);
2040    enum overload_type overload = get_overload(info->input_types[0], src_bits);
2041 
2042    const struct dxil_value *v = emit_unary_call(ctx, overload, intr, op);
2043    if (!v)
2044       return false;
2045    store_alu_dest(ctx, alu, 0, v);
2046    return true;
2047 }
2048 
2049 static bool
emit_binary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)2050 emit_binary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2051                   enum dxil_intr intr,
2052                   const struct dxil_value *op0, const struct dxil_value *op1)
2053 {
2054    const nir_op_info *info = &nir_op_infos[alu->op];
2055    assert(info->output_type == info->input_types[0]);
2056    assert(info->output_type == info->input_types[1]);
2057    unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
2058    assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2059    assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2060    enum overload_type overload = get_overload(info->output_type, dst_bits);
2061 
2062    const struct dxil_value *v = emit_binary_call(ctx, overload, intr,
2063                                                  op0, op1);
2064    if (!v)
2065       return false;
2066    store_alu_dest(ctx, alu, 0, v);
2067    return true;
2068 }
2069 
2070 static bool
emit_tertiary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)2071 emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2072                     enum dxil_intr intr,
2073                     const struct dxil_value *op0,
2074                     const struct dxil_value *op1,
2075                     const struct dxil_value *op2)
2076 {
2077    const nir_op_info *info = &nir_op_infos[alu->op];
2078    unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
2079    assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2080    assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2081    assert(nir_src_bit_size(alu->src[2].src) == dst_bits);
2082 
2083    assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[0], dst_bits));
2084    assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[1], dst_bits));
2085    assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[2], dst_bits));
2086 
2087    enum overload_type overload = get_overload(info->output_type, dst_bits);
2088 
2089    const struct dxil_value *v = emit_tertiary_call(ctx, overload, intr,
2090                                                    op0, op1, op2);
2091    if (!v)
2092       return false;
2093    store_alu_dest(ctx, alu, 0, v);
2094    return true;
2095 }
2096 
2097 static bool
emit_bitfield_insert(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * base,const struct dxil_value * insert,const struct dxil_value * offset,const struct dxil_value * width)2098 emit_bitfield_insert(struct ntd_context *ctx, nir_alu_instr *alu,
2099                      const struct dxil_value *base,
2100                      const struct dxil_value *insert,
2101                      const struct dxil_value *offset,
2102                      const struct dxil_value *width)
2103 {
2104    /* DXIL is width, offset, insert, base, NIR is base, insert, offset, width */
2105    const struct dxil_value *v = emit_quaternary_call(ctx, DXIL_I32, DXIL_INTR_BFI,
2106                                                      width, offset, insert, base);
2107    if (!v)
2108       return false;
2109 
2110    /* DXIL uses the 5 LSB from width/offset. Special-case width >= 32 == copy insert. */
2111    const struct dxil_value *compare_width = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_SGE,
2112       width, dxil_module_get_int32_const(&ctx->mod, 32));
2113    v = dxil_emit_select(&ctx->mod, compare_width, insert, v);
2114    store_alu_dest(ctx, alu, 0, v);
2115    return true;
2116 }
2117 
emit_select(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * sel,const struct dxil_value * val_true,const struct dxil_value * val_false)2118 static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu,
2119                         const struct dxil_value *sel,
2120                         const struct dxil_value *val_true,
2121                         const struct dxil_value *val_false)
2122 {
2123    assert(sel);
2124    assert(val_true);
2125    assert(val_false);
2126 
2127    const struct dxil_value *v = dxil_emit_select(&ctx->mod, sel, val_true, val_false);
2128    if (!v)
2129       return false;
2130 
2131    store_alu_dest(ctx, alu, 0, v);
2132    return true;
2133 }
2134 
2135 static bool
emit_b2f16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2136 emit_b2f16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2137 {
2138    assert(val);
2139 
2140    struct dxil_module *m = &ctx->mod;
2141 
2142    const struct dxil_value *c1 = dxil_module_get_float16_const(m, 0x3C00);
2143    const struct dxil_value *c0 = dxil_module_get_float16_const(m, 0);
2144 
2145    if (!c0 || !c1)
2146       return false;
2147 
2148    return emit_select(ctx, alu, val, c1, c0);
2149 }
2150 
2151 static bool
emit_b2f32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2152 emit_b2f32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2153 {
2154    assert(val);
2155 
2156    struct dxil_module *m = &ctx->mod;
2157 
2158    const struct dxil_value *c1 = dxil_module_get_float_const(m, 1.0f);
2159    const struct dxil_value *c0 = dxil_module_get_float_const(m, 0.0f);
2160 
2161    if (!c0 || !c1)
2162       return false;
2163 
2164    return emit_select(ctx, alu, val, c1, c0);
2165 }
2166 
2167 static bool
emit_b2f64(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2168 emit_b2f64(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2169 {
2170    assert(val);
2171 
2172    struct dxil_module *m = &ctx->mod;
2173 
2174    const struct dxil_value *c1 = dxil_module_get_double_const(m, 1.0);
2175    const struct dxil_value *c0 = dxil_module_get_double_const(m, 0.0);
2176 
2177    if (!c0 || !c1)
2178       return false;
2179 
2180    ctx->mod.feats.doubles = 1;
2181    return emit_select(ctx, alu, val, c1, c0);
2182 }
2183 
2184 static bool
emit_f2b32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2185 emit_f2b32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2186 {
2187    assert(val);
2188 
2189    const struct dxil_value *zero = dxil_module_get_float_const(&ctx->mod, 0.0f);
2190    return emit_cmp(ctx, alu, DXIL_FCMP_UNE, val, zero);
2191 }
2192 
2193 static bool
emit_f16tof32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val,bool shift)2194 emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val, bool shift)
2195 {
2196    if (shift) {
2197       val = dxil_emit_binop(&ctx->mod, DXIL_BINOP_LSHR, val,
2198          dxil_module_get_int32_const(&ctx->mod, 16), 0);
2199       if (!val)
2200          return false;
2201    }
2202 
2203    const struct dxil_func *func = dxil_get_function(&ctx->mod,
2204                                                     "dx.op.legacyF16ToF32",
2205                                                     DXIL_NONE);
2206    if (!func)
2207       return false;
2208 
2209    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F16TOF32);
2210    if (!opcode)
2211       return false;
2212 
2213    const struct dxil_value *args[] = {
2214      opcode,
2215      val
2216    };
2217 
2218    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2219    if (!v)
2220       return false;
2221    store_alu_dest(ctx, alu, 0, v);
2222    return true;
2223 }
2224 
2225 static bool
emit_f32tof16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val0,const struct dxil_value * val1)2226 emit_f32tof16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val0, const struct dxil_value *val1)
2227 {
2228    const struct dxil_func *func = dxil_get_function(&ctx->mod,
2229                                                     "dx.op.legacyF32ToF16",
2230                                                     DXIL_NONE);
2231    if (!func)
2232       return false;
2233 
2234    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F32TOF16);
2235    if (!opcode)
2236       return false;
2237 
2238    const struct dxil_value *args[] = {
2239      opcode,
2240      val0
2241    };
2242 
2243    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2244    if (!v)
2245       return false;
2246 
2247    if (!nir_src_is_const(alu->src[1].src) || nir_src_as_int(alu->src[1].src) != 0) {
2248       args[1] = val1;
2249       const struct dxil_value *v_high = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2250       if (!v_high)
2251          return false;
2252 
2253       v_high = dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL, v_high,
2254          dxil_module_get_int32_const(&ctx->mod, 16), 0);
2255       if (!v_high)
2256          return false;
2257 
2258       v = dxil_emit_binop(&ctx->mod, DXIL_BINOP_OR, v, v_high, 0);
2259       if (!v)
2260          return false;
2261    }
2262 
2263    store_alu_dest(ctx, alu, 0, v);
2264    return true;
2265 }
2266 
2267 static bool
emit_vec(struct ntd_context * ctx,nir_alu_instr * alu,unsigned num_inputs)2268 emit_vec(struct ntd_context *ctx, nir_alu_instr *alu, unsigned num_inputs)
2269 {
2270    const struct dxil_type *type = get_alu_src_type(ctx, alu, 0);
2271    nir_alu_type t = dxil_type_to_nir_type(type);
2272 
2273    for (unsigned i = 0; i < num_inputs; i++) {
2274       const struct dxil_value *src =
2275          get_src(ctx, &alu->src[i].src, alu->src[i].swizzle[0], t);
2276       if (!src)
2277          return false;
2278 
2279       store_alu_dest(ctx, alu, i, src);
2280    }
2281    return true;
2282 }
2283 
2284 static bool
emit_make_double(struct ntd_context * ctx,nir_alu_instr * alu)2285 emit_make_double(struct ntd_context *ctx, nir_alu_instr *alu)
2286 {
2287    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.makeDouble", DXIL_F64);
2288    if (!func)
2289       return false;
2290 
2291    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_MAKE_DOUBLE);
2292    if (!opcode)
2293       return false;
2294 
2295    const struct dxil_value *args[3] = {
2296       opcode,
2297       get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_uint32),
2298       get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[1], nir_type_uint32),
2299    };
2300    if (!args[1] || !args[2])
2301       return false;
2302 
2303    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2304    if (!v)
2305       return false;
2306    store_dest(ctx, &alu->dest.dest, 0, v, nir_type_float64);
2307    return true;
2308 }
2309 
2310 static bool
emit_split_double(struct ntd_context * ctx,nir_alu_instr * alu)2311 emit_split_double(struct ntd_context *ctx, nir_alu_instr *alu)
2312 {
2313    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.splitDouble", DXIL_F64);
2314    if (!func)
2315       return false;
2316 
2317    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SPLIT_DOUBLE);
2318    if (!opcode)
2319       return false;
2320 
2321    const struct dxil_value *args[] = {
2322       opcode,
2323       get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_float64)
2324    };
2325    if (!args[1])
2326       return false;
2327 
2328    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2329    if (!v)
2330       return false;
2331 
2332    const struct dxil_value *hi = dxil_emit_extractval(&ctx->mod, v, 0);
2333    const struct dxil_value *lo = dxil_emit_extractval(&ctx->mod, v, 1);
2334    if (!hi || !lo)
2335       return false;
2336 
2337    store_dest_value(ctx, &alu->dest.dest, 0, hi);
2338    store_dest_value(ctx, &alu->dest.dest, 1, lo);
2339    return true;
2340 }
2341 
2342 static bool
emit_alu(struct ntd_context * ctx,nir_alu_instr * alu)2343 emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
2344 {
2345    /* handle vec-instructions first; they are the only ones that produce
2346     * vector results.
2347     */
2348    switch (alu->op) {
2349    case nir_op_vec2:
2350    case nir_op_vec3:
2351    case nir_op_vec4:
2352    case nir_op_vec8:
2353    case nir_op_vec16:
2354       return emit_vec(ctx, alu, nir_op_infos[alu->op].num_inputs);
2355    case nir_op_mov: {
2356          assert(nir_dest_num_components(alu->dest.dest) == 1);
2357          store_ssa_def(ctx, &alu->dest.dest.ssa, 0, get_src_ssa(ctx,
2358                         alu->src->src.ssa, alu->src->swizzle[0]));
2359          return true;
2360       }
2361    case nir_op_pack_double_2x32_dxil:
2362       return emit_make_double(ctx, alu);
2363    case nir_op_unpack_double_2x32_dxil:
2364       return emit_split_double(ctx, alu);
2365    default:
2366       /* silence warnings */
2367       ;
2368    }
2369 
2370    /* other ops should be scalar */
2371    assert(alu->dest.write_mask == 1);
2372    const struct dxil_value *src[4];
2373    assert(nir_op_infos[alu->op].num_inputs <= 4);
2374    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
2375       src[i] = get_alu_src(ctx, alu, i);
2376       if (!src[i])
2377          return false;
2378    }
2379 
2380    switch (alu->op) {
2381    case nir_op_iadd:
2382    case nir_op_fadd: return emit_binop(ctx, alu, DXIL_BINOP_ADD, src[0], src[1]);
2383 
2384    case nir_op_isub:
2385    case nir_op_fsub: return emit_binop(ctx, alu, DXIL_BINOP_SUB, src[0], src[1]);
2386 
2387    case nir_op_imul:
2388    case nir_op_fmul: return emit_binop(ctx, alu, DXIL_BINOP_MUL, src[0], src[1]);
2389 
2390    case nir_op_fdiv:
2391       if (alu->dest.dest.ssa.bit_size == 64)
2392          ctx->mod.feats.dx11_1_double_extensions = 1;
2393       FALLTHROUGH;
2394    case nir_op_idiv:
2395       return emit_binop(ctx, alu, DXIL_BINOP_SDIV, src[0], src[1]);
2396 
2397    case nir_op_udiv: return emit_binop(ctx, alu, DXIL_BINOP_UDIV, src[0], src[1]);
2398    case nir_op_irem: return emit_binop(ctx, alu, DXIL_BINOP_SREM, src[0], src[1]);
2399    case nir_op_imod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2400    case nir_op_umod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2401    case nir_op_ishl: return emit_shift(ctx, alu, DXIL_BINOP_SHL, src[0], src[1]);
2402    case nir_op_ishr: return emit_shift(ctx, alu, DXIL_BINOP_ASHR, src[0], src[1]);
2403    case nir_op_ushr: return emit_shift(ctx, alu, DXIL_BINOP_LSHR, src[0], src[1]);
2404    case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]);
2405    case nir_op_ior:  return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]);
2406    case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]);
2407    case nir_op_inot: {
2408       unsigned bit_size = alu->dest.dest.ssa.bit_size;
2409       intmax_t val = bit_size == 1 ? 1 : -1;
2410       const struct dxil_value *negative_one = dxil_module_get_int_const(&ctx->mod, val, bit_size);
2411       return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], negative_one);
2412    }
2413    case nir_op_ieq:  return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]);
2414    case nir_op_ine:  return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]);
2415    case nir_op_ige:  return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]);
2416    case nir_op_uge:  return emit_cmp(ctx, alu, DXIL_ICMP_UGE, src[0], src[1]);
2417    case nir_op_ilt:  return emit_cmp(ctx, alu, DXIL_ICMP_SLT, src[0], src[1]);
2418    case nir_op_ult:  return emit_cmp(ctx, alu, DXIL_ICMP_ULT, src[0], src[1]);
2419    case nir_op_feq:  return emit_cmp(ctx, alu, DXIL_FCMP_OEQ, src[0], src[1]);
2420    case nir_op_fneu: return emit_cmp(ctx, alu, DXIL_FCMP_UNE, src[0], src[1]);
2421    case nir_op_flt:  return emit_cmp(ctx, alu, DXIL_FCMP_OLT, src[0], src[1]);
2422    case nir_op_fge:  return emit_cmp(ctx, alu, DXIL_FCMP_OGE, src[0], src[1]);
2423    case nir_op_bcsel: return emit_select(ctx, alu, src[0], src[1], src[2]);
2424    case nir_op_ftrunc: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_Z, src[0]);
2425    case nir_op_fabs: return emit_unary_intin(ctx, alu, DXIL_INTR_FABS, src[0]);
2426    case nir_op_fcos: return emit_unary_intin(ctx, alu, DXIL_INTR_FCOS, src[0]);
2427    case nir_op_fsin: return emit_unary_intin(ctx, alu, DXIL_INTR_FSIN, src[0]);
2428    case nir_op_fceil: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_PI, src[0]);
2429    case nir_op_fexp2: return emit_unary_intin(ctx, alu, DXIL_INTR_FEXP2, src[0]);
2430    case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]);
2431    case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]);
2432    case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]);
2433    case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]);
2434    case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]);
2435 
2436    case nir_op_fddx:
2437    case nir_op_fddx_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_COARSE, src[0]);
2438    case nir_op_fddx_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_FINE, src[0]);
2439    case nir_op_fddy:
2440    case nir_op_fddy_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_COARSE, src[0]);
2441    case nir_op_fddy_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_FINE, src[0]);
2442 
2443    case nir_op_fround_even: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NE, src[0]);
2444    case nir_op_frcp: {
2445          const struct dxil_value *one = dxil_module_get_float_const(&ctx->mod, 1.0f);
2446          return emit_binop(ctx, alu, DXIL_BINOP_SDIV, one, src[0]);
2447       }
2448    case nir_op_fsat: return emit_unary_intin(ctx, alu, DXIL_INTR_SATURATE, src[0]);
2449    case nir_op_bit_count: return emit_unary_intin(ctx, alu, DXIL_INTR_COUNTBITS, src[0]);
2450    case nir_op_bitfield_reverse: return emit_unary_intin(ctx, alu, DXIL_INTR_BFREV, src[0]);
2451    case nir_op_ufind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_HI, src[0]);
2452    case nir_op_ifind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_SHI, src[0]);
2453    case nir_op_find_lsb: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_LO, src[0]);
2454    case nir_op_imax: return emit_binary_intin(ctx, alu, DXIL_INTR_IMAX, src[0], src[1]);
2455    case nir_op_imin: return emit_binary_intin(ctx, alu, DXIL_INTR_IMIN, src[0], src[1]);
2456    case nir_op_umax: return emit_binary_intin(ctx, alu, DXIL_INTR_UMAX, src[0], src[1]);
2457    case nir_op_umin: return emit_binary_intin(ctx, alu, DXIL_INTR_UMIN, src[0], src[1]);
2458    case nir_op_frsq: return emit_unary_intin(ctx, alu, DXIL_INTR_RSQRT, src[0]);
2459    case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]);
2460    case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]);
2461    case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]);
2462    case nir_op_ffma:
2463       if (alu->dest.dest.ssa.bit_size == 64)
2464          ctx->mod.feats.dx11_1_double_extensions = 1;
2465       return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]);
2466 
2467    case nir_op_ibfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_IBFE, src[2], src[1], src[0]);
2468    case nir_op_ubfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_UBFE, src[2], src[1], src[0]);
2469    case nir_op_bitfield_insert: return emit_bitfield_insert(ctx, alu, src[0], src[1], src[2], src[3]);
2470 
2471    case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0], false);
2472    case nir_op_unpack_half_2x16_split_y: return emit_f16tof32(ctx, alu, src[0], true);
2473    case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0], src[1]);
2474 
2475    case nir_op_b2i16:
2476    case nir_op_i2i16:
2477    case nir_op_f2i16:
2478    case nir_op_f2u16:
2479    case nir_op_u2u16:
2480    case nir_op_u2f16:
2481    case nir_op_i2f16:
2482    case nir_op_f2f16_rtz:
2483    case nir_op_b2i32:
2484    case nir_op_f2f32:
2485    case nir_op_f2i32:
2486    case nir_op_f2u32:
2487    case nir_op_i2f32:
2488    case nir_op_i2i32:
2489    case nir_op_u2f32:
2490    case nir_op_u2u32:
2491    case nir_op_b2i64:
2492    case nir_op_f2f64:
2493    case nir_op_f2i64:
2494    case nir_op_f2u64:
2495    case nir_op_i2f64:
2496    case nir_op_i2i64:
2497    case nir_op_u2f64:
2498    case nir_op_u2u64:
2499       return emit_cast(ctx, alu, src[0]);
2500 
2501    case nir_op_f2b32: return emit_f2b32(ctx, alu, src[0]);
2502    case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]);
2503    case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]);
2504    case nir_op_b2f64: return emit_b2f64(ctx, alu, src[0]);
2505    default:
2506       NIR_INSTR_UNSUPPORTED(&alu->instr);
2507       assert("Unimplemented ALU instruction");
2508       return false;
2509    }
2510 }
2511 
2512 static const struct dxil_value *
load_ubo(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * offset,enum overload_type overload)2513 load_ubo(struct ntd_context *ctx, const struct dxil_value *handle,
2514          const struct dxil_value *offset, enum overload_type overload)
2515 {
2516    assert(handle && offset);
2517 
2518    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CBUFFER_LOAD_LEGACY);
2519    if (!opcode)
2520       return NULL;
2521 
2522    const struct dxil_value *args[] = {
2523       opcode, handle, offset
2524    };
2525 
2526    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cbufferLoadLegacy", overload);
2527    if (!func)
2528       return NULL;
2529    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2530 }
2531 
2532 static bool
emit_barrier_impl(struct ntd_context * ctx,nir_variable_mode modes,nir_scope execution_scope,nir_scope mem_scope)2533 emit_barrier_impl(struct ntd_context *ctx, nir_variable_mode modes, nir_scope execution_scope, nir_scope mem_scope)
2534 {
2535    const struct dxil_value *opcode, *mode;
2536    const struct dxil_func *func;
2537    uint32_t flags = 0;
2538 
2539    if (execution_scope == NIR_SCOPE_WORKGROUP)
2540       flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP;
2541 
2542    if (modes & (nir_var_mem_ssbo | nir_var_mem_global | nir_var_image)) {
2543       if (mem_scope > NIR_SCOPE_WORKGROUP)
2544          flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL;
2545       else
2546          flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP;
2547    }
2548 
2549    if (modes & nir_var_mem_shared)
2550       flags |= DXIL_BARRIER_MODE_GROUPSHARED_MEM_FENCE;
2551 
2552    func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE);
2553    if (!func)
2554       return false;
2555 
2556    opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER);
2557    if (!opcode)
2558       return false;
2559 
2560    mode = dxil_module_get_int32_const(&ctx->mod, flags);
2561    if (!mode)
2562       return false;
2563 
2564    const struct dxil_value *args[] = { opcode, mode };
2565 
2566    return dxil_emit_call_void(&ctx->mod, func,
2567                               args, ARRAY_SIZE(args));
2568 }
2569 
2570 static bool
emit_barrier(struct ntd_context * ctx,nir_intrinsic_instr * intr)2571 emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2572 {
2573    return emit_barrier_impl(ctx,
2574       nir_intrinsic_memory_modes(intr),
2575       nir_intrinsic_execution_scope(intr),
2576       nir_intrinsic_memory_scope(intr));
2577 }
2578 
2579 /* Memory barrier for UAVs (buffers/images) at cross-workgroup scope */
2580 static bool
emit_memory_barrier(struct ntd_context * ctx,nir_intrinsic_instr * intr)2581 emit_memory_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2582 {
2583    return emit_barrier_impl(ctx,
2584       nir_var_mem_global,
2585       NIR_SCOPE_NONE,
2586       NIR_SCOPE_DEVICE);
2587 }
2588 
2589 /* Memory barrier for TGSM */
2590 static bool
emit_memory_barrier_shared(struct ntd_context * ctx,nir_intrinsic_instr * intr)2591 emit_memory_barrier_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2592 {
2593    return emit_barrier_impl(ctx,
2594       nir_var_mem_shared,
2595       NIR_SCOPE_NONE,
2596       NIR_SCOPE_WORKGROUP);
2597 }
2598 
2599 /* Memory barrier for all intra-workgroup memory accesses (UAVs and TGSM) */
2600 static bool
emit_group_memory_barrier(struct ntd_context * ctx,nir_intrinsic_instr * intr)2601 emit_group_memory_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2602 {
2603    return emit_barrier_impl(ctx,
2604       nir_var_mem_shared | nir_var_mem_global,
2605       NIR_SCOPE_NONE,
2606       NIR_SCOPE_WORKGROUP);
2607 }
2608 
2609 static bool
emit_control_barrier(struct ntd_context * ctx,nir_intrinsic_instr * intr)2610 emit_control_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2611 {
2612    return emit_barrier_impl(ctx,
2613       nir_var_mem_shared,
2614       NIR_SCOPE_WORKGROUP,
2615       NIR_SCOPE_NONE);
2616 }
2617 
2618 static bool
emit_load_global_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)2619 emit_load_global_invocation_id(struct ntd_context *ctx,
2620                                     nir_intrinsic_instr *intr)
2621 {
2622    assert(intr->dest.is_ssa);
2623    nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
2624 
2625    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2626       if (comps & (1 << i)) {
2627          const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
2628          if (!idx)
2629             return false;
2630          const struct dxil_value *globalid = emit_threadid_call(ctx, idx);
2631 
2632          if (!globalid)
2633             return false;
2634 
2635          store_dest_value(ctx, &intr->dest, i, globalid);
2636       }
2637    }
2638    return true;
2639 }
2640 
2641 static bool
emit_load_local_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)2642 emit_load_local_invocation_id(struct ntd_context *ctx,
2643                               nir_intrinsic_instr *intr)
2644 {
2645    assert(intr->dest.is_ssa);
2646    nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
2647 
2648    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2649       if (comps & (1 << i)) {
2650          const struct dxil_value
2651             *idx = dxil_module_get_int32_const(&ctx->mod, i);
2652          if (!idx)
2653             return false;
2654          const struct dxil_value
2655             *threadidingroup = emit_threadidingroup_call(ctx, idx);
2656          if (!threadidingroup)
2657             return false;
2658          store_dest_value(ctx, &intr->dest, i, threadidingroup);
2659       }
2660    }
2661    return true;
2662 }
2663 
2664 static bool
emit_load_local_invocation_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)2665 emit_load_local_invocation_index(struct ntd_context *ctx,
2666                                  nir_intrinsic_instr *intr)
2667 {
2668    assert(intr->dest.is_ssa);
2669 
2670    const struct dxil_value
2671       *flattenedthreadidingroup = emit_flattenedthreadidingroup_call(ctx);
2672    if (!flattenedthreadidingroup)
2673       return false;
2674    store_dest_value(ctx, &intr->dest, 0, flattenedthreadidingroup);
2675 
2676    return true;
2677 }
2678 
2679 static bool
emit_load_local_workgroup_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)2680 emit_load_local_workgroup_id(struct ntd_context *ctx,
2681                               nir_intrinsic_instr *intr)
2682 {
2683    assert(intr->dest.is_ssa);
2684    nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
2685 
2686    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2687       if (comps & (1 << i)) {
2688          const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
2689          if (!idx)
2690             return false;
2691          const struct dxil_value *groupid = emit_groupid_call(ctx, idx);
2692          if (!groupid)
2693             return false;
2694          store_dest_value(ctx, &intr->dest, i, groupid);
2695       }
2696    }
2697    return true;
2698 }
2699 
2700 static const struct dxil_value *
call_unary_external_function(struct ntd_context * ctx,const char * name,int32_t dxil_intr)2701 call_unary_external_function(struct ntd_context *ctx,
2702                              const char *name,
2703                              int32_t dxil_intr)
2704 {
2705    const struct dxil_func *func =
2706       dxil_get_function(&ctx->mod, name, DXIL_I32);
2707    if (!func)
2708       return false;
2709 
2710    const struct dxil_value *opcode =
2711       dxil_module_get_int32_const(&ctx->mod, dxil_intr);
2712    if (!opcode)
2713       return false;
2714 
2715    const struct dxil_value *args[] = {opcode};
2716 
2717    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2718 }
2719 
2720 static bool
emit_load_unary_external_function(struct ntd_context * ctx,nir_intrinsic_instr * intr,const char * name,int32_t dxil_intr)2721 emit_load_unary_external_function(struct ntd_context *ctx,
2722                                   nir_intrinsic_instr *intr, const char *name,
2723                                   int32_t dxil_intr)
2724 {
2725    const struct dxil_value *value = call_unary_external_function(ctx, name, dxil_intr);
2726    store_dest_value(ctx, &intr->dest, 0, value);
2727 
2728    return true;
2729 }
2730 
2731 static bool
emit_load_sample_mask_in(struct ntd_context * ctx,nir_intrinsic_instr * intr)2732 emit_load_sample_mask_in(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2733 {
2734    const struct dxil_value *value = call_unary_external_function(ctx,
2735       "dx.op.coverage", DXIL_INTR_COVERAGE);
2736 
2737    /* Mask coverage with (1 << sample index). Note, done as an AND to handle extrapolation cases. */
2738    if (ctx->mod.info.has_per_sample_input) {
2739       value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND, value,
2740          dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL,
2741             dxil_module_get_int32_const(&ctx->mod, 1),
2742             call_unary_external_function(ctx, "dx.op.sampleIndex", DXIL_INTR_SAMPLE_INDEX), 0), 0);
2743    }
2744 
2745    store_dest_value(ctx, &intr->dest, 0, value);
2746    return true;
2747 }
2748 
2749 static bool
emit_load_tess_coord(struct ntd_context * ctx,nir_intrinsic_instr * intr)2750 emit_load_tess_coord(struct ntd_context *ctx,
2751                      nir_intrinsic_instr *intr)
2752 {
2753    const struct dxil_func *func =
2754       dxil_get_function(&ctx->mod, "dx.op.domainLocation", DXIL_F32);
2755    if (!func)
2756       return false;
2757 
2758    const struct dxil_value *opcode =
2759       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DOMAIN_LOCATION);
2760    if (!opcode)
2761       return false;
2762 
2763    unsigned num_coords = ctx->shader->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES ? 3 : 2;
2764    for (unsigned i = 0; i < num_coords; ++i) {
2765       unsigned component_idx = i;
2766 
2767       const struct dxil_value *component = dxil_module_get_int32_const(&ctx->mod, component_idx);
2768       if (!component)
2769          return false;
2770 
2771       const struct dxil_value *args[] = { opcode, component };
2772 
2773       const struct dxil_value *value =
2774          dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2775       store_dest_value(ctx, &intr->dest, i, value);
2776    }
2777 
2778    for (unsigned i = num_coords; i < intr->dest.ssa.num_components; ++i) {
2779       const struct dxil_value *value = dxil_module_get_float_const(&ctx->mod, 0.0f);
2780       store_dest_value(ctx, &intr->dest, i, value);
2781    }
2782 
2783    return true;
2784 }
2785 
2786 static const struct dxil_value *
get_int32_undef(struct dxil_module * m)2787 get_int32_undef(struct dxil_module *m)
2788 {
2789    const struct dxil_type *int32_type =
2790       dxil_module_get_int_type(m, 32);
2791    if (!int32_type)
2792       return NULL;
2793 
2794    return dxil_module_get_undef(m, int32_type);
2795 }
2796 
2797 static const struct dxil_value *
emit_gep_for_index(struct ntd_context * ctx,const nir_variable * var,const struct dxil_value * index)2798 emit_gep_for_index(struct ntd_context *ctx, const nir_variable *var,
2799                    const struct dxil_value *index)
2800 {
2801    assert(var->data.mode == nir_var_shader_temp);
2802 
2803    struct hash_entry *he = _mesa_hash_table_search(ctx->consts, var);
2804    assert(he != NULL);
2805    const struct dxil_value *ptr = he->data;
2806 
2807    const struct dxil_value *zero = dxil_module_get_int32_const(&ctx->mod, 0);
2808    if (!zero)
2809       return NULL;
2810 
2811    const struct dxil_value *ops[] = { ptr, zero, index };
2812    return dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
2813 }
2814 
2815 static const struct dxil_value *
get_resource_handle(struct ntd_context * ctx,nir_src * src,enum dxil_resource_class class,enum dxil_resource_kind kind)2816 get_resource_handle(struct ntd_context *ctx, nir_src *src, enum dxil_resource_class class,
2817                     enum dxil_resource_kind kind)
2818 {
2819    /* This source might be one of:
2820     * 1. Constant resource index - just look it up in precomputed handle arrays
2821     *    If it's null in that array, create a handle, and store the result
2822     * 2. A handle from load_vulkan_descriptor - just get the stored SSA value
2823     * 3. Dynamic resource index - create a handle for it here
2824     */
2825    assert(src->ssa->num_components == 1 && src->ssa->bit_size == 32);
2826    nir_const_value *const_block_index = nir_src_as_const_value(*src);
2827    const struct dxil_value **handle_entry = NULL;
2828    if (const_block_index) {
2829       assert(ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN);
2830       switch (kind) {
2831       case DXIL_RESOURCE_KIND_CBUFFER:
2832          handle_entry = &ctx->cbv_handles[const_block_index->u32];
2833          break;
2834       case DXIL_RESOURCE_KIND_RAW_BUFFER:
2835          if (class == DXIL_RESOURCE_CLASS_UAV)
2836             handle_entry = &ctx->ssbo_handles[const_block_index->u32];
2837          else
2838             handle_entry = &ctx->srv_handles[const_block_index->u32];
2839          break;
2840       case DXIL_RESOURCE_KIND_SAMPLER:
2841          handle_entry = &ctx->sampler_handles[const_block_index->u32];
2842          break;
2843       default:
2844          if (class == DXIL_RESOURCE_CLASS_UAV)
2845             handle_entry = &ctx->image_handles[const_block_index->u32];
2846          else
2847             handle_entry = &ctx->srv_handles[const_block_index->u32];
2848          break;
2849       }
2850    }
2851 
2852    if (handle_entry && *handle_entry)
2853       return *handle_entry;
2854 
2855    const struct dxil_value *value = get_src_ssa(ctx, src->ssa, 0);
2856    if (nir_src_as_deref(*src) ||
2857        ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
2858       return value;
2859    }
2860 
2861    unsigned space = 0;
2862    if (ctx->opts->environment == DXIL_ENVIRONMENT_GL &&
2863        class == DXIL_RESOURCE_CLASS_UAV) {
2864       if (kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
2865          space = 2;
2866       else
2867          space = 1;
2868    }
2869 
2870    /* The base binding here will almost always be zero. The only cases where we end
2871     * up in this type of dynamic indexing are:
2872     * 1. GL UBOs
2873     * 2. GL SSBOs
2874     * 2. CL SSBOs
2875     * In all cases except GL UBOs, the resources are a single zero-based array.
2876     * In that case, the base is 1, because uniforms use 0 and cannot by dynamically
2877     * indexed. All other cases should either fall into static indexing (first early return),
2878     * deref-based dynamic handle creation (images, or Vulkan textures/samplers), or
2879     * load_vulkan_descriptor handle creation.
2880     */
2881    unsigned base_binding = 0;
2882    if (ctx->opts->environment == DXIL_ENVIRONMENT_GL &&
2883        class == DXIL_RESOURCE_CLASS_CBV)
2884       base_binding = 1;
2885 
2886    const struct dxil_value *handle = emit_createhandle_call(ctx, class,
2887       get_resource_id(ctx, class, space, base_binding), value, !const_block_index);
2888    if (handle_entry)
2889       *handle_entry = handle;
2890 
2891    return handle;
2892 }
2893 
2894 static bool
emit_load_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)2895 emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2896 {
2897    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
2898 
2899    enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
2900    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
2901       nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
2902       if (var && var->data.access & ACCESS_NON_WRITEABLE)
2903          class = DXIL_RESOURCE_CLASS_SRV;
2904    }
2905 
2906    const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
2907    const struct dxil_value *offset =
2908       get_src(ctx, &intr->src[1], 0, nir_type_uint);
2909    if (!int32_undef || !handle || !offset)
2910       return false;
2911 
2912    assert(nir_src_bit_size(intr->src[0]) == 32);
2913    assert(nir_intrinsic_dest_components(intr) <= 4);
2914 
2915    const struct dxil_value *coord[2] = {
2916       offset,
2917       int32_undef
2918    };
2919 
2920    const struct dxil_value *load = emit_bufferload_call(ctx, handle, coord, DXIL_I32);
2921    if (!load)
2922       return false;
2923 
2924    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2925       const struct dxil_value *val =
2926          dxil_emit_extractval(&ctx->mod, load, i);
2927       if (!val)
2928          return false;
2929       store_dest_value(ctx, &intr->dest, i, val);
2930    }
2931    return true;
2932 }
2933 
2934 static bool
emit_store_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)2935 emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2936 {
2937    const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[1], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
2938    const struct dxil_value *offset =
2939       get_src(ctx, &intr->src[2], 0, nir_type_uint);
2940    if (!handle || !offset)
2941       return false;
2942 
2943    assert(nir_src_bit_size(intr->src[0]) == 32);
2944    unsigned num_components = nir_src_num_components(intr->src[0]);
2945    assert(num_components <= 4);
2946    const struct dxil_value *value[4];
2947    for (unsigned i = 0; i < num_components; ++i) {
2948       value[i] = get_src(ctx, &intr->src[0], i, nir_type_uint);
2949       if (!value[i])
2950          return false;
2951    }
2952 
2953    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
2954    if (!int32_undef)
2955       return false;
2956 
2957    const struct dxil_value *coord[2] = {
2958       offset,
2959       int32_undef
2960    };
2961 
2962    for (int i = num_components; i < 4; ++i)
2963       value[i] = int32_undef;
2964 
2965    const struct dxil_value *write_mask =
2966       dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
2967    if (!write_mask)
2968       return false;
2969 
2970    return emit_bufferstore_call(ctx, handle, coord, value, write_mask, DXIL_I32);
2971 }
2972 
2973 static bool
emit_store_ssbo_masked(struct ntd_context * ctx,nir_intrinsic_instr * intr)2974 emit_store_ssbo_masked(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2975 {
2976    const struct dxil_value *value =
2977       get_src(ctx, &intr->src[0], 0, nir_type_uint);
2978    const struct dxil_value *mask =
2979       get_src(ctx, &intr->src[1], 0, nir_type_uint);
2980    const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[2], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
2981    const struct dxil_value *offset =
2982       get_src(ctx, &intr->src[3], 0, nir_type_uint);
2983    if (!value || !mask || !handle || !offset)
2984       return false;
2985 
2986    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
2987    if (!int32_undef)
2988       return false;
2989 
2990    const struct dxil_value *coord[3] = {
2991       offset, int32_undef, int32_undef
2992    };
2993 
2994    return
2995       emit_atomic_binop(ctx, handle, DXIL_ATOMIC_AND, coord, mask) != NULL &&
2996       emit_atomic_binop(ctx, handle, DXIL_ATOMIC_OR, coord, value) != NULL;
2997 }
2998 
2999 static bool
emit_store_shared(struct ntd_context * ctx,nir_intrinsic_instr * intr)3000 emit_store_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3001 {
3002    const struct dxil_value *zero, *index;
3003 
3004    /* All shared mem accesses should have been lowered to scalar 32bit
3005     * accesses.
3006     */
3007    assert(nir_src_bit_size(intr->src[0]) == 32);
3008    assert(nir_src_num_components(intr->src[0]) == 1);
3009 
3010    zero = dxil_module_get_int32_const(&ctx->mod, 0);
3011    if (!zero)
3012       return false;
3013 
3014    if (intr->intrinsic == nir_intrinsic_store_shared_dxil)
3015       index = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3016    else
3017       index = get_src(ctx, &intr->src[2], 0, nir_type_uint);
3018    if (!index)
3019       return false;
3020 
3021    const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
3022    const struct dxil_value *ptr, *value;
3023 
3024    ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3025    if (!ptr)
3026       return false;
3027 
3028    value = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3029    if (!value)
3030       return false;
3031 
3032    if (intr->intrinsic == nir_intrinsic_store_shared_dxil)
3033       return dxil_emit_store(&ctx->mod, value, ptr, 4, false);
3034 
3035    const struct dxil_value *mask = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3036    if (!mask)
3037       return false;
3038 
3039    if (!dxil_emit_atomicrmw(&ctx->mod, mask, ptr, DXIL_RMWOP_AND, false,
3040                             DXIL_ATOMIC_ORDERING_ACQREL,
3041                             DXIL_SYNC_SCOPE_CROSSTHREAD))
3042       return false;
3043 
3044    if (!dxil_emit_atomicrmw(&ctx->mod, value, ptr, DXIL_RMWOP_OR, false,
3045                             DXIL_ATOMIC_ORDERING_ACQREL,
3046                             DXIL_SYNC_SCOPE_CROSSTHREAD))
3047       return false;
3048 
3049    return true;
3050 }
3051 
3052 static bool
emit_store_scratch(struct ntd_context * ctx,nir_intrinsic_instr * intr)3053 emit_store_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3054 {
3055    const struct dxil_value *zero, *index;
3056 
3057    /* All scratch mem accesses should have been lowered to scalar 32bit
3058     * accesses.
3059     */
3060    assert(nir_src_bit_size(intr->src[0]) == 32);
3061    assert(nir_src_num_components(intr->src[0]) == 1);
3062 
3063    zero = dxil_module_get_int32_const(&ctx->mod, 0);
3064    if (!zero)
3065       return false;
3066 
3067    index = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3068    if (!index)
3069       return false;
3070 
3071    const struct dxil_value *ops[] = { ctx->scratchvars, zero, index };
3072    const struct dxil_value *ptr, *value;
3073 
3074    ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3075    if (!ptr)
3076       return false;
3077 
3078    value = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3079    if (!value)
3080       return false;
3081 
3082    return dxil_emit_store(&ctx->mod, value, ptr, 4, false);
3083 }
3084 
3085 static bool
emit_load_ubo(struct ntd_context * ctx,nir_intrinsic_instr * intr)3086 emit_load_ubo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3087 {
3088    const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER);
3089    if (!handle)
3090       return false;
3091 
3092    const struct dxil_value *offset;
3093    nir_const_value *const_offset = nir_src_as_const_value(intr->src[1]);
3094    if (const_offset) {
3095       offset = dxil_module_get_int32_const(&ctx->mod, const_offset->i32 >> 4);
3096    } else {
3097       const struct dxil_value *offset_src = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3098       const struct dxil_value *c4 = dxil_module_get_int32_const(&ctx->mod, 4);
3099       if (!offset_src || !c4)
3100          return false;
3101 
3102       offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ASHR, offset_src, c4, 0);
3103    }
3104 
3105    const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_F32);
3106 
3107    if (!agg)
3108       return false;
3109 
3110    for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
3111       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, agg, i);
3112       store_dest(ctx, &intr->dest, i, retval,
3113                  nir_dest_bit_size(intr->dest) > 1 ? nir_type_float : nir_type_bool);
3114    }
3115    return true;
3116 }
3117 
3118 static bool
emit_load_ubo_dxil(struct ntd_context * ctx,nir_intrinsic_instr * intr)3119 emit_load_ubo_dxil(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3120 {
3121    assert(nir_dest_num_components(intr->dest) <= 4);
3122    assert(nir_dest_bit_size(intr->dest) == 32);
3123 
3124    const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER);
3125    const struct dxil_value *offset =
3126       get_src(ctx, &intr->src[1], 0, nir_type_uint);
3127 
3128    if (!handle || !offset)
3129       return false;
3130 
3131    const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_I32);
3132    if (!agg)
3133       return false;
3134 
3135    for (unsigned i = 0; i < nir_dest_num_components(intr->dest); i++)
3136       store_dest_value(ctx, &intr->dest, i,
3137                        dxil_emit_extractval(&ctx->mod, agg, i));
3138 
3139    return true;
3140 }
3141 
3142 /* Need to add patch-ness as a matching parameter, since driver_location is *not* unique
3143  * between control points and patch variables in HS/DS
3144  */
3145 static nir_variable *
find_patch_matching_variable_by_driver_location(nir_shader * s,nir_variable_mode mode,unsigned driver_location,bool patch)3146 find_patch_matching_variable_by_driver_location(nir_shader *s, nir_variable_mode mode, unsigned driver_location, bool patch)
3147 {
3148    nir_foreach_variable_with_modes(var, s, mode) {
3149       if (var->data.driver_location == driver_location &&
3150           var->data.patch == patch)
3151          return var;
3152    }
3153    return NULL;
3154 }
3155 
3156 static bool
emit_store_output_via_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3157 emit_store_output_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3158 {
3159    assert(intr->intrinsic == nir_intrinsic_store_output ||
3160           ctx->mod.shader_kind == DXIL_HULL_SHADER);
3161    bool is_patch_constant = intr->intrinsic == nir_intrinsic_store_output &&
3162       ctx->mod.shader_kind == DXIL_HULL_SHADER;
3163    nir_alu_type out_type = nir_intrinsic_src_type(intr);
3164    enum overload_type overload = get_overload(out_type, intr->src[0].ssa->bit_size);
3165    const struct dxil_func *func = dxil_get_function(&ctx->mod, is_patch_constant ?
3166       "dx.op.storePatchConstant" : "dx.op.storeOutput",
3167       overload);
3168 
3169    if (!func)
3170       return false;
3171 
3172    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, is_patch_constant ?
3173       DXIL_INTR_STORE_PATCH_CONSTANT : DXIL_INTR_STORE_OUTPUT);
3174    const struct dxil_value *output_id = dxil_module_get_int32_const(&ctx->mod, nir_intrinsic_base(intr));
3175    unsigned row_index = intr->intrinsic == nir_intrinsic_store_output ? 1 : 2;
3176 
3177    /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3178     * generation, so muck with them here too.
3179     */
3180    nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3181    bool is_tess_level = is_patch_constant &&
3182                         (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3183                          semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3184 
3185    const struct dxil_value *row = NULL;
3186    const struct dxil_value *col = NULL;
3187    if (is_tess_level)
3188       col = dxil_module_get_int8_const(&ctx->mod, 0);
3189    else
3190       row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3191 
3192    bool success = true;
3193    uint32_t writemask = nir_intrinsic_write_mask(intr);
3194 
3195    nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_out, nir_intrinsic_base(intr), is_patch_constant);
3196    unsigned var_base_component = var->data.location_frac;
3197    unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3198 
3199    if (ctx->mod.minor_validator >= 5) {
3200       struct dxil_signature_record *sig_rec = is_patch_constant ?
3201          &ctx->mod.patch_consts[nir_intrinsic_base(intr)] :
3202          &ctx->mod.outputs[nir_intrinsic_base(intr)];
3203       unsigned comp_size = intr->src[0].ssa->bit_size == 64 ? 2 : 1;
3204       unsigned comp_mask = 0;
3205       if (is_tess_level)
3206          comp_mask = 1;
3207       else if (comp_size == 1)
3208          comp_mask = writemask << var_base_component;
3209       else {
3210          for (unsigned i = 0; i < intr->num_components; ++i)
3211             if ((writemask & (1 << i)))
3212                comp_mask |= 3 << ((i + var_base_component) * comp_size);
3213       }
3214       for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3215          sig_rec->elements[r].never_writes_mask &= ~comp_mask;
3216 
3217       if (!nir_src_is_const(intr->src[row_index])) {
3218          struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3219             &ctx->mod.psv_patch_consts[nir_intrinsic_base(intr)] :
3220             &ctx->mod.psv_outputs[nir_intrinsic_base(intr)];
3221          psv_rec->dynamic_mask_and_stream |= comp_mask;
3222       }
3223    }
3224 
3225    for (unsigned i = 0; i < intr->num_components && success; ++i) {
3226       if (writemask & (1 << i)) {
3227          if (is_tess_level)
3228             row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3229          else
3230             col = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3231          const struct dxil_value *value = get_src(ctx, &intr->src[0], i, out_type);
3232          if (!col || !row || !value)
3233             return false;
3234 
3235          const struct dxil_value *args[] = {
3236             opcode, output_id, row, col, value
3237          };
3238          success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3239       }
3240    }
3241 
3242    return success;
3243 }
3244 
3245 static bool
emit_load_input_via_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3246 emit_load_input_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3247 {
3248    bool attr_at_vertex = false;
3249    if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER &&
3250       ctx->opts->interpolate_at_vertex &&
3251       ctx->opts->provoking_vertex != 0 &&
3252       (nir_intrinsic_dest_type(intr) & nir_type_float)) {
3253       nir_variable *var = nir_find_variable_with_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr));
3254 
3255       attr_at_vertex = var && var->data.interpolation == INTERP_MODE_FLAT;
3256    }
3257 
3258    bool is_patch_constant = (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER &&
3259                              intr->intrinsic == nir_intrinsic_load_input) ||
3260                             (ctx->mod.shader_kind == DXIL_HULL_SHADER &&
3261                              intr->intrinsic == nir_intrinsic_load_output);
3262    bool is_output_control_point = intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3263 
3264    unsigned opcode_val;
3265    const char *func_name;
3266    if (attr_at_vertex) {
3267       opcode_val = DXIL_INTR_ATTRIBUTE_AT_VERTEX;
3268       func_name = "dx.op.attributeAtVertex";
3269       if (ctx->mod.minor_validator >= 6)
3270          ctx->mod.feats.barycentrics = 1;
3271    } else if (is_patch_constant) {
3272       opcode_val = DXIL_INTR_LOAD_PATCH_CONSTANT;
3273       func_name = "dx.op.loadPatchConstant";
3274    } else if (is_output_control_point) {
3275       opcode_val = DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT;
3276       func_name = "dx.op.loadOutputControlPoint";
3277    } else {
3278       opcode_val = DXIL_INTR_LOAD_INPUT;
3279       func_name = "dx.op.loadInput";
3280    }
3281 
3282    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3283    if (!opcode)
3284       return false;
3285 
3286    const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod,
3287       is_patch_constant || is_output_control_point ?
3288          nir_intrinsic_base(intr) :
3289          ctx->mod.input_mappings[nir_intrinsic_base(intr)]);
3290    if (!input_id)
3291       return false;
3292 
3293    bool is_per_vertex =
3294       intr->intrinsic == nir_intrinsic_load_per_vertex_input ||
3295       intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3296    int row_index = is_per_vertex ? 1 : 0;
3297    const struct dxil_value *vertex_id = NULL;
3298    if (!is_patch_constant) {
3299       if (is_per_vertex) {
3300          vertex_id = get_src(ctx, &intr->src[0], 0, nir_type_int);
3301       } else if (attr_at_vertex) {
3302          vertex_id = dxil_module_get_int8_const(&ctx->mod, ctx->opts->provoking_vertex);
3303       } else {
3304          const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
3305          if (!int32_type)
3306             return false;
3307 
3308          vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);
3309       }
3310       if (!vertex_id)
3311          return false;
3312    }
3313 
3314    /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3315     * generation, so muck with them here too.
3316     */
3317    nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3318    bool is_tess_level = is_patch_constant &&
3319                         (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3320                          semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3321 
3322    const struct dxil_value *row = NULL;
3323    const struct dxil_value *comp = NULL;
3324    if (is_tess_level)
3325       comp = dxil_module_get_int8_const(&ctx->mod, 0);
3326    else
3327       row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3328 
3329    nir_alu_type out_type = nir_intrinsic_dest_type(intr);
3330    enum overload_type overload = get_overload(out_type, intr->dest.ssa.bit_size);
3331 
3332    const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, overload);
3333 
3334    if (!func)
3335       return false;
3336 
3337    nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), is_patch_constant);
3338    unsigned var_base_component = var ? var->data.location_frac : 0;
3339    unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3340 
3341    if (ctx->mod.minor_validator >= 5 &&
3342        !is_output_control_point &&
3343        intr->intrinsic != nir_intrinsic_load_output) {
3344       struct dxil_signature_record *sig_rec = is_patch_constant ?
3345          &ctx->mod.patch_consts[nir_intrinsic_base(intr)] :
3346          &ctx->mod.inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3347       unsigned comp_size = intr->dest.ssa.bit_size == 64 ? 2 : 1;
3348       unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3349       comp_mask <<= (var_base_component * comp_size);
3350       if (is_tess_level)
3351          comp_mask = 1;
3352       for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3353          sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3354 
3355       if (!nir_src_is_const(intr->src[row_index])) {
3356          struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3357             &ctx->mod.psv_patch_consts[nir_intrinsic_base(intr)] :
3358             &ctx->mod.psv_inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3359          psv_rec->dynamic_mask_and_stream |= comp_mask;
3360       }
3361    }
3362 
3363    for (unsigned i = 0; i < intr->num_components; ++i) {
3364       if (is_tess_level)
3365          row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3366       else
3367          comp = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3368 
3369       if (!row || !comp)
3370          return false;
3371 
3372       const struct dxil_value *args[] = {
3373          opcode, input_id, row, comp, vertex_id
3374       };
3375 
3376       unsigned num_args = ARRAY_SIZE(args) - (is_patch_constant ? 1 : 0);
3377       const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3378       if (!retval)
3379          return false;
3380       store_dest(ctx, &intr->dest, i, retval, out_type);
3381    }
3382    return true;
3383 }
3384 
3385 static bool
emit_load_interpolated_input(struct ntd_context * ctx,nir_intrinsic_instr * intr)3386 emit_load_interpolated_input(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3387 {
3388    nir_intrinsic_instr *barycentric = nir_src_as_intrinsic(intr->src[0]);
3389 
3390    const struct dxil_value *args[6] = { 0 };
3391 
3392    unsigned opcode_val;
3393    const char *func_name;
3394    unsigned num_args;
3395    switch (barycentric->intrinsic) {
3396    case nir_intrinsic_load_barycentric_at_offset:
3397       opcode_val = DXIL_INTR_EVAL_SNAPPED;
3398       func_name = "dx.op.evalSnapped";
3399       num_args = 6;
3400       for (unsigned i = 0; i < 2; ++i) {
3401          const struct dxil_value *float_offset = get_src(ctx, &barycentric->src[0], i, nir_type_float);
3402          /* GLSL uses [-0.5f, 0.5f), DXIL uses (-8, 7) */
3403          const struct dxil_value *offset_16 = dxil_emit_binop(&ctx->mod,
3404             DXIL_BINOP_MUL, float_offset, dxil_module_get_float_const(&ctx->mod, 16.0f), 0);
3405          args[i + 4] = dxil_emit_cast(&ctx->mod, DXIL_CAST_FPTOSI,
3406             dxil_module_get_int_type(&ctx->mod, 32), offset_16);
3407       }
3408       break;
3409    case nir_intrinsic_load_barycentric_pixel:
3410       opcode_val = DXIL_INTR_EVAL_SNAPPED;
3411       func_name = "dx.op.evalSnapped";
3412       num_args = 6;
3413       args[4] = args[5] = dxil_module_get_int32_const(&ctx->mod, 0);
3414       break;
3415    case nir_intrinsic_load_barycentric_at_sample:
3416       opcode_val = DXIL_INTR_EVAL_SAMPLE_INDEX;
3417       func_name = "dx.op.evalSampleIndex";
3418       num_args = 5;
3419       args[4] = get_src(ctx, &barycentric->src[0], 0, nir_type_int);
3420       break;
3421    case nir_intrinsic_load_barycentric_centroid:
3422       opcode_val = DXIL_INTR_EVAL_CENTROID;
3423       func_name = "dx.op.evalCentroid";
3424       num_args = 4;
3425       break;
3426    default:
3427       unreachable("Unsupported interpolation barycentric intrinsic");
3428    }
3429    args[0] = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3430    args[1] = dxil_module_get_int32_const(&ctx->mod, nir_intrinsic_base(intr));
3431    args[2] = get_src(ctx, &intr->src[1], 0, nir_type_int);
3432 
3433    const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, DXIL_F32);
3434 
3435    if (!func)
3436       return false;
3437 
3438    nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), false);
3439    unsigned var_base_component = var ? var->data.location_frac : 0;
3440    unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3441 
3442    if (ctx->mod.minor_validator >= 5) {
3443       struct dxil_signature_record *sig_rec =
3444          &ctx->mod.inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3445       unsigned comp_size = intr->dest.ssa.bit_size == 64 ? 2 : 1;
3446       unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3447       comp_mask <<= (var_base_component * comp_size);
3448       for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3449          sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3450 
3451       if (!nir_src_is_const(intr->src[1])) {
3452          struct dxil_psv_signature_element *psv_rec =
3453             &ctx->mod.psv_inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3454          psv_rec->dynamic_mask_and_stream |= comp_mask;
3455       }
3456    }
3457 
3458    for (unsigned i = 0; i < intr->num_components; ++i) {
3459       args[3] = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3460 
3461       const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3462       if (!retval)
3463          return false;
3464       store_dest(ctx, &intr->dest, i, retval, nir_type_float);
3465    }
3466    return true;
3467 }
3468 
3469 static bool
emit_load_ptr(struct ntd_context * ctx,nir_intrinsic_instr * intr)3470 emit_load_ptr(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3471 {
3472    struct nir_variable *var =
3473       nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
3474 
3475    const struct dxil_value *index =
3476       get_src(ctx, &intr->src[1], 0, nir_type_uint);
3477    if (!index)
3478       return false;
3479 
3480    const struct dxil_value *ptr = emit_gep_for_index(ctx, var, index);
3481    if (!ptr)
3482       return false;
3483 
3484    const struct dxil_value *retval =
3485       dxil_emit_load(&ctx->mod, ptr, 4, false);
3486    if (!retval)
3487       return false;
3488 
3489    store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3490    return true;
3491 }
3492 
3493 static bool
emit_load_shared(struct ntd_context * ctx,nir_intrinsic_instr * intr)3494 emit_load_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3495 {
3496    const struct dxil_value *zero, *index;
3497    unsigned bit_size = nir_dest_bit_size(intr->dest);
3498    unsigned align = bit_size / 8;
3499 
3500    /* All shared mem accesses should have been lowered to scalar 32bit
3501     * accesses.
3502     */
3503    assert(bit_size == 32);
3504    assert(nir_dest_num_components(intr->dest) == 1);
3505 
3506    zero = dxil_module_get_int32_const(&ctx->mod, 0);
3507    if (!zero)
3508       return false;
3509 
3510    index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3511    if (!index)
3512       return false;
3513 
3514    const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
3515    const struct dxil_value *ptr, *retval;
3516 
3517    ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3518    if (!ptr)
3519       return false;
3520 
3521    retval = dxil_emit_load(&ctx->mod, ptr, align, false);
3522    if (!retval)
3523       return false;
3524 
3525    store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3526    return true;
3527 }
3528 
3529 static bool
emit_load_scratch(struct ntd_context * ctx,nir_intrinsic_instr * intr)3530 emit_load_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3531 {
3532    const struct dxil_value *zero, *index;
3533    unsigned bit_size = nir_dest_bit_size(intr->dest);
3534    unsigned align = bit_size / 8;
3535 
3536    /* All scratch mem accesses should have been lowered to scalar 32bit
3537     * accesses.
3538     */
3539    assert(bit_size == 32);
3540    assert(nir_dest_num_components(intr->dest) == 1);
3541 
3542    zero = dxil_module_get_int32_const(&ctx->mod, 0);
3543    if (!zero)
3544       return false;
3545 
3546    index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3547    if (!index)
3548       return false;
3549 
3550    const struct dxil_value *ops[] = { ctx->scratchvars, zero, index };
3551    const struct dxil_value *ptr, *retval;
3552 
3553    ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3554    if (!ptr)
3555       return false;
3556 
3557    retval = dxil_emit_load(&ctx->mod, ptr, align, false);
3558    if (!retval)
3559       return false;
3560 
3561    store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3562    return true;
3563 }
3564 
3565 static bool
emit_discard_if_with_value(struct ntd_context * ctx,const struct dxil_value * value)3566 emit_discard_if_with_value(struct ntd_context *ctx, const struct dxil_value *value)
3567 {
3568    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DISCARD);
3569    if (!opcode)
3570       return false;
3571 
3572    const struct dxil_value *args[] = {
3573      opcode,
3574      value
3575    };
3576 
3577    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.discard", DXIL_NONE);
3578    if (!func)
3579       return false;
3580 
3581    return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3582 }
3583 
3584 static bool
emit_discard_if(struct ntd_context * ctx,nir_intrinsic_instr * intr)3585 emit_discard_if(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3586 {
3587    const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_bool);
3588    if (!value)
3589       return false;
3590 
3591    return emit_discard_if_with_value(ctx, value);
3592 }
3593 
3594 static bool
emit_discard(struct ntd_context * ctx)3595 emit_discard(struct ntd_context *ctx)
3596 {
3597    const struct dxil_value *value = dxil_module_get_int1_const(&ctx->mod, true);
3598    return emit_discard_if_with_value(ctx, value);
3599 }
3600 
3601 static bool
emit_emit_vertex(struct ntd_context * ctx,nir_intrinsic_instr * intr)3602 emit_emit_vertex(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3603 {
3604    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_EMIT_STREAM);
3605    const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
3606    if (!opcode || !stream_id)
3607       return false;
3608 
3609    const struct dxil_value *args[] = {
3610      opcode,
3611      stream_id
3612    };
3613 
3614    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.emitStream", DXIL_NONE);
3615    if (!func)
3616       return false;
3617 
3618    return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3619 }
3620 
3621 static bool
emit_end_primitive(struct ntd_context * ctx,nir_intrinsic_instr * intr)3622 emit_end_primitive(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3623 {
3624    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CUT_STREAM);
3625    const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
3626    if (!opcode || !stream_id)
3627       return false;
3628 
3629    const struct dxil_value *args[] = {
3630      opcode,
3631      stream_id
3632    };
3633 
3634    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cutStream", DXIL_NONE);
3635    if (!func)
3636       return false;
3637 
3638    return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3639 }
3640 
3641 static bool
emit_image_store(struct ntd_context * ctx,nir_intrinsic_instr * intr)3642 emit_image_store(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3643 {
3644    const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
3645    if (!handle)
3646       return false;
3647 
3648    bool is_array = false;
3649    if (intr->intrinsic == nir_intrinsic_image_deref_store)
3650       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
3651    else
3652       is_array = nir_intrinsic_image_array(intr);
3653 
3654    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3655    if (!int32_undef)
3656       return false;
3657 
3658    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
3659    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_store ?
3660       nir_intrinsic_image_dim(intr) :
3661       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type);
3662    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
3663    if (is_array)
3664       ++num_coords;
3665 
3666    assert(num_coords <= nir_src_num_components(intr->src[1]));
3667    for (unsigned i = 0; i < num_coords; ++i) {
3668       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
3669       if (!coord[i])
3670          return false;
3671    }
3672 
3673    nir_alu_type in_type = nir_intrinsic_src_type(intr);
3674    enum overload_type overload = get_overload(in_type, 32);
3675 
3676    assert(nir_src_bit_size(intr->src[3]) == 32);
3677    unsigned num_components = nir_src_num_components(intr->src[3]);
3678    assert(num_components <= 4);
3679    const struct dxil_value *value[4];
3680    for (unsigned i = 0; i < num_components; ++i) {
3681       value[i] = get_src(ctx, &intr->src[3], i, in_type);
3682       if (!value[i])
3683          return false;
3684    }
3685 
3686    for (int i = num_components; i < 4; ++i)
3687       value[i] = int32_undef;
3688 
3689    const struct dxil_value *write_mask =
3690       dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
3691    if (!write_mask)
3692       return false;
3693 
3694    if (image_dim == GLSL_SAMPLER_DIM_BUF) {
3695       coord[1] = int32_undef;
3696       return emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
3697    } else
3698       return emit_texturestore_call(ctx, handle, coord, value, write_mask, overload);
3699 }
3700 
3701 static bool
emit_image_load(struct ntd_context * ctx,nir_intrinsic_instr * intr)3702 emit_image_load(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3703 {
3704    const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
3705    if (!handle)
3706       return false;
3707 
3708    bool is_array = false;
3709    if (intr->intrinsic == nir_intrinsic_image_deref_load)
3710       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
3711    else
3712       is_array = nir_intrinsic_image_array(intr);
3713 
3714    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3715    if (!int32_undef)
3716       return false;
3717 
3718    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
3719    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_load ?
3720       nir_intrinsic_image_dim(intr) :
3721       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type);
3722    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
3723    if (is_array)
3724       ++num_coords;
3725 
3726    assert(num_coords <= nir_src_num_components(intr->src[1]));
3727    for (unsigned i = 0; i < num_coords; ++i) {
3728       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
3729       if (!coord[i])
3730          return false;
3731    }
3732 
3733    nir_alu_type out_type = nir_intrinsic_dest_type(intr);
3734    enum overload_type overload = get_overload(out_type, 32);
3735 
3736    const struct dxil_value *load_result;
3737    if (image_dim == GLSL_SAMPLER_DIM_BUF) {
3738       coord[1] = int32_undef;
3739       load_result = emit_bufferload_call(ctx, handle, coord, overload);
3740    } else
3741       load_result = emit_textureload_call(ctx, handle, coord, overload);
3742 
3743    if (!load_result)
3744       return false;
3745 
3746    assert(nir_dest_bit_size(intr->dest) == 32);
3747    unsigned num_components = nir_dest_num_components(intr->dest);
3748    assert(num_components <= 4);
3749    for (unsigned i = 0; i < num_components; ++i) {
3750       const struct dxil_value *component = dxil_emit_extractval(&ctx->mod, load_result, i);
3751       if (!component)
3752          return false;
3753       store_dest(ctx, &intr->dest, i, component, out_type);
3754    }
3755 
3756    /* FIXME: This flag should be set to true when the RWTexture is attached
3757     * a vector, and we always declare a vec4 right now, so it should always be
3758     * true. Might be worth reworking the dxil_module_get_res_type() to use a
3759     * scalar when the image only has one component.
3760     */
3761    ctx->mod.feats.typed_uav_load_additional_formats = true;
3762 
3763    return true;
3764 }
3765 
3766 static bool
emit_image_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_atomic_op op,nir_alu_type type)3767 emit_image_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr,
3768                   enum dxil_atomic_op op, nir_alu_type type)
3769 {
3770    const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
3771    if (!handle)
3772       return false;
3773 
3774    bool is_array = false;
3775    nir_deref_instr *src_as_deref = nir_src_as_deref(intr->src[0]);
3776    if (src_as_deref)
3777       is_array = glsl_sampler_type_is_array(src_as_deref->type);
3778    else
3779       is_array = nir_intrinsic_image_array(intr);
3780 
3781    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3782    if (!int32_undef)
3783       return false;
3784 
3785    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
3786    enum glsl_sampler_dim image_dim = src_as_deref ?
3787       glsl_get_sampler_dim(src_as_deref->type) :
3788       nir_intrinsic_image_dim(intr);
3789    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
3790    if (is_array)
3791       ++num_coords;
3792 
3793    assert(num_coords <= nir_src_num_components(intr->src[1]));
3794    for (unsigned i = 0; i < num_coords; ++i) {
3795       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
3796       if (!coord[i])
3797          return false;
3798    }
3799 
3800    const struct dxil_value *value = get_src(ctx, &intr->src[3], 0, type);
3801    if (!value)
3802       return false;
3803 
3804    const struct dxil_value *retval =
3805       emit_atomic_binop(ctx, handle, op, coord, value);
3806 
3807    if (!retval)
3808       return false;
3809 
3810    store_dest(ctx, &intr->dest, 0, retval, type);
3811    return true;
3812 }
3813 
3814 static bool
emit_image_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)3815 emit_image_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3816 {
3817    const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
3818    if (!handle)
3819       return false;
3820 
3821    bool is_array = false;
3822    if (intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap)
3823       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
3824    else
3825       is_array = nir_intrinsic_image_array(intr);
3826 
3827    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3828    if (!int32_undef)
3829       return false;
3830 
3831    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
3832    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_atomic_comp_swap ?
3833       nir_intrinsic_image_dim(intr) :
3834       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type);
3835    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
3836    if (is_array)
3837       ++num_coords;
3838 
3839    assert(num_coords <= nir_src_num_components(intr->src[1]));
3840    for (unsigned i = 0; i < num_coords; ++i) {
3841       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
3842       if (!coord[i])
3843          return false;
3844    }
3845 
3846    const struct dxil_value *cmpval = get_src(ctx, &intr->src[3], 0, nir_type_uint);
3847    const struct dxil_value *newval = get_src(ctx, &intr->src[4], 0, nir_type_uint);
3848    if (!cmpval || !newval)
3849       return false;
3850 
3851    const struct dxil_value *retval =
3852       emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
3853 
3854    if (!retval)
3855       return false;
3856 
3857    store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3858    return true;
3859 }
3860 
3861 struct texop_parameters {
3862    const struct dxil_value *tex;
3863    const struct dxil_value *sampler;
3864    const struct dxil_value *bias, *lod_or_sample, *min_lod;
3865    const struct dxil_value *coord[4], *offset[3], *dx[3], *dy[3];
3866    const struct dxil_value *cmp;
3867    enum overload_type overload;
3868 };
3869 
3870 static const struct dxil_value *
emit_texture_size(struct ntd_context * ctx,struct texop_parameters * params)3871 emit_texture_size(struct ntd_context *ctx, struct texop_parameters *params)
3872 {
3873    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.getDimensions", DXIL_NONE);
3874    if (!func)
3875       return false;
3876 
3877    const struct dxil_value *args[] = {
3878       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_SIZE),
3879       params->tex,
3880       params->lod_or_sample
3881    };
3882 
3883    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3884 }
3885 
3886 static bool
emit_image_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)3887 emit_image_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3888 {
3889    const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
3890    if (!handle)
3891       return false;
3892 
3893    const struct dxil_value *lod = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3894    if (!lod)
3895       return false;
3896 
3897    struct texop_parameters params = {
3898       .tex = handle,
3899       .lod_or_sample = lod
3900    };
3901    const struct dxil_value *dimensions = emit_texture_size(ctx, &params);
3902    if (!dimensions)
3903       return false;
3904 
3905    for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
3906       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, i);
3907       store_dest(ctx, &intr->dest, i, retval, nir_type_uint);
3908    }
3909 
3910    return true;
3911 }
3912 
3913 static bool
emit_get_ssbo_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)3914 emit_get_ssbo_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3915 {
3916    enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
3917    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
3918       nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
3919       if (var && var->data.access & ACCESS_NON_WRITEABLE)
3920          class = DXIL_RESOURCE_CLASS_SRV;
3921    }
3922 
3923    const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
3924    if (!handle)
3925       return false;
3926 
3927    struct texop_parameters params = {
3928       .tex = handle,
3929       .lod_or_sample = dxil_module_get_undef(
3930                         &ctx->mod, dxil_module_get_int_type(&ctx->mod, 32))
3931    };
3932 
3933    const struct dxil_value *dimensions = emit_texture_size(ctx, &params);
3934    if (!dimensions)
3935       return false;
3936 
3937    const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, 0);
3938    store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3939 
3940    return true;
3941 }
3942 
3943 static bool
emit_ssbo_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_atomic_op op,nir_alu_type type)3944 emit_ssbo_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr,
3945                    enum dxil_atomic_op op, nir_alu_type type)
3946 {
3947    const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
3948    const struct dxil_value *offset =
3949       get_src(ctx, &intr->src[1], 0, nir_type_uint);
3950    const struct dxil_value *value =
3951       get_src(ctx, &intr->src[2], 0, type);
3952 
3953    if (!value || !handle || !offset)
3954       return false;
3955 
3956    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3957    if (!int32_undef)
3958       return false;
3959 
3960    const struct dxil_value *coord[3] = {
3961       offset, int32_undef, int32_undef
3962    };
3963 
3964    const struct dxil_value *retval =
3965       emit_atomic_binop(ctx, handle, op, coord, value);
3966 
3967    if (!retval)
3968       return false;
3969 
3970    store_dest(ctx, &intr->dest, 0, retval, type);
3971    return true;
3972 }
3973 
3974 static bool
emit_ssbo_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)3975 emit_ssbo_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3976 {
3977    const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
3978    const struct dxil_value *offset =
3979       get_src(ctx, &intr->src[1], 0, nir_type_uint);
3980    const struct dxil_value *cmpval =
3981       get_src(ctx, &intr->src[2], 0, nir_type_int);
3982    const struct dxil_value *newval =
3983       get_src(ctx, &intr->src[3], 0, nir_type_int);
3984 
3985    if (!cmpval || !newval || !handle || !offset)
3986       return false;
3987 
3988    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3989    if (!int32_undef)
3990       return false;
3991 
3992    const struct dxil_value *coord[3] = {
3993       offset, int32_undef, int32_undef
3994    };
3995 
3996    const struct dxil_value *retval =
3997       emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
3998 
3999    if (!retval)
4000       return false;
4001 
4002    store_dest(ctx, &intr->dest, 0, retval, nir_type_int);
4003    return true;
4004 }
4005 
4006 static bool
emit_shared_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_rmw_op op,nir_alu_type type)4007 emit_shared_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr,
4008                    enum dxil_rmw_op op, nir_alu_type type)
4009 {
4010    const struct dxil_value *zero, *index;
4011 
4012    assert(nir_src_bit_size(intr->src[1]) == 32);
4013 
4014    zero = dxil_module_get_int32_const(&ctx->mod, 0);
4015    if (!zero)
4016       return false;
4017 
4018    index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
4019    if (!index)
4020       return false;
4021 
4022    const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
4023    const struct dxil_value *ptr, *value, *retval;
4024 
4025    ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
4026    if (!ptr)
4027       return false;
4028 
4029    value = get_src(ctx, &intr->src[1], 0, type);
4030    if (!value)
4031       return false;
4032 
4033    retval = dxil_emit_atomicrmw(&ctx->mod, value, ptr, op, false,
4034                                 DXIL_ATOMIC_ORDERING_ACQREL,
4035                                 DXIL_SYNC_SCOPE_CROSSTHREAD);
4036    if (!retval)
4037       return false;
4038 
4039    store_dest(ctx, &intr->dest, 0, retval, type);
4040    return true;
4041 }
4042 
4043 static bool
emit_shared_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)4044 emit_shared_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4045 {
4046    const struct dxil_value *zero, *index;
4047 
4048    assert(nir_src_bit_size(intr->src[1]) == 32);
4049 
4050    zero = dxil_module_get_int32_const(&ctx->mod, 0);
4051    if (!zero)
4052       return false;
4053 
4054    index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
4055    if (!index)
4056       return false;
4057 
4058    const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
4059    const struct dxil_value *ptr, *cmpval, *newval, *retval;
4060 
4061    ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
4062    if (!ptr)
4063       return false;
4064 
4065    cmpval = get_src(ctx, &intr->src[1], 0, nir_type_uint);
4066    newval = get_src(ctx, &intr->src[2], 0, nir_type_uint);
4067    if (!cmpval || !newval)
4068       return false;
4069 
4070    retval = dxil_emit_cmpxchg(&ctx->mod, cmpval, newval, ptr, false,
4071                               DXIL_ATOMIC_ORDERING_ACQREL,
4072                               DXIL_SYNC_SCOPE_CROSSTHREAD);
4073    if (!retval)
4074       return false;
4075 
4076    store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
4077    return true;
4078 }
4079 
4080 static bool
emit_vulkan_resource_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)4081 emit_vulkan_resource_index(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4082 {
4083    unsigned int binding = nir_intrinsic_binding(intr);
4084 
4085    bool const_index = nir_src_is_const(intr->src[0]);
4086    if (const_index) {
4087       binding += nir_src_as_const_value(intr->src[0])->u32;
4088    }
4089 
4090    const struct dxil_value *index_value = dxil_module_get_int32_const(&ctx->mod, binding);
4091    if (!index_value)
4092       return false;
4093 
4094    if (!const_index) {
4095       const struct dxil_value *offset = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4096       if (!offset)
4097          return false;
4098 
4099       index_value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, index_value, offset, 0);
4100       if (!index_value)
4101          return false;
4102    }
4103 
4104    store_dest(ctx, &intr->dest, 0, index_value, nir_type_uint32);
4105    store_dest(ctx, &intr->dest, 1, dxil_module_get_int32_const(&ctx->mod, 0), nir_type_uint32);
4106    return true;
4107 }
4108 
4109 static bool
emit_load_vulkan_descriptor(struct ntd_context * ctx,nir_intrinsic_instr * intr)4110 emit_load_vulkan_descriptor(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4111 {
4112    nir_intrinsic_instr* index = nir_src_as_intrinsic(intr->src[0]);
4113    /* We currently do not support reindex */
4114    assert(index && index->intrinsic == nir_intrinsic_vulkan_resource_index);
4115 
4116    unsigned binding = nir_intrinsic_binding(index);
4117    unsigned space = nir_intrinsic_desc_set(index);
4118 
4119    /* The descriptor_set field for variables is only 5 bits. We shouldn't have intrinsics trying to go beyond that. */
4120    assert(space < 32);
4121 
4122    nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
4123 
4124    const struct dxil_value *handle = NULL;
4125    enum dxil_resource_class resource_class;
4126 
4127    switch (nir_intrinsic_desc_type(intr)) {
4128    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4129       resource_class = DXIL_RESOURCE_CLASS_CBV;
4130       break;
4131    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4132       if (var->data.access & ACCESS_NON_WRITEABLE)
4133          resource_class = DXIL_RESOURCE_CLASS_SRV;
4134       else
4135          resource_class = DXIL_RESOURCE_CLASS_UAV;
4136       break;
4137    default:
4138       unreachable("unknown descriptor type");
4139       return false;
4140    }
4141 
4142    const struct dxil_value *index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4143    if (!index_value)
4144       return false;
4145 
4146    handle = emit_createhandle_call(ctx, resource_class,
4147       get_resource_id(ctx, resource_class, space, binding),
4148       index_value, false);
4149 
4150    store_dest_value(ctx, &intr->dest, 0, handle);
4151    store_dest(ctx, &intr->dest, 1, get_src(ctx, &intr->src[0], 1, nir_type_uint32), nir_type_uint32);
4152 
4153    return true;
4154 }
4155 
4156 static bool
emit_load_sample_pos_from_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4157 emit_load_sample_pos_from_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4158 {
4159    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.renderTargetGetSamplePosition", DXIL_NONE);
4160    if (!func)
4161       return false;
4162 
4163    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION);
4164    if (!opcode)
4165       return false;
4166 
4167    const struct dxil_value *args[] = {
4168       opcode,
4169       get_src(ctx, &intr->src[0], 0, nir_type_uint32),
4170    };
4171    if (!args[1])
4172       return false;
4173 
4174    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4175    if (!v)
4176       return false;
4177 
4178    for (unsigned i = 0; i < 2; ++i) {
4179       /* GL coords go from 0 -> 1, D3D from -0.5 -> 0.5 */
4180       const struct dxil_value *coord = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
4181          dxil_emit_extractval(&ctx->mod, v, i),
4182          dxil_module_get_float_const(&ctx->mod, 0.5f), 0);
4183       store_dest(ctx, &intr->dest, i, coord, nir_type_float32);
4184    }
4185    return true;
4186 }
4187 
4188 static bool
emit_load_layer_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4189 emit_load_layer_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4190 {
4191    const struct dxil_value *layer_id = dxil_module_get_int32_const(&ctx->mod, 0);
4192    /* TODO: Properly implement this once multi-view is supported */
4193    store_dest_value(ctx, &intr->dest, 0, layer_id);
4194    return true;
4195 }
4196 
4197 static bool
emit_load_sample_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4198 emit_load_sample_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4199 {
4200    assert(ctx->mod.info.has_per_sample_input ||
4201           intr->intrinsic == nir_intrinsic_load_sample_id_no_per_sample);
4202 
4203    if (ctx->mod.info.has_per_sample_input)
4204       return emit_load_unary_external_function(ctx, intr, "dx.op.sampleIndex",
4205                                                DXIL_INTR_SAMPLE_INDEX);
4206 
4207    store_dest_value(ctx, &intr->dest, 0, dxil_module_get_int32_const(&ctx->mod, 0));
4208    return true;
4209 }
4210 
4211 static bool
emit_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4212 emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4213 {
4214    switch (intr->intrinsic) {
4215    case nir_intrinsic_load_global_invocation_id:
4216    case nir_intrinsic_load_global_invocation_id_zero_base:
4217       return emit_load_global_invocation_id(ctx, intr);
4218    case nir_intrinsic_load_local_invocation_id:
4219       return emit_load_local_invocation_id(ctx, intr);
4220    case nir_intrinsic_load_local_invocation_index:
4221       return emit_load_local_invocation_index(ctx, intr);
4222    case nir_intrinsic_load_workgroup_id:
4223    case nir_intrinsic_load_workgroup_id_zero_base:
4224       return emit_load_local_workgroup_id(ctx, intr);
4225    case nir_intrinsic_load_ssbo:
4226       return emit_load_ssbo(ctx, intr);
4227    case nir_intrinsic_store_ssbo:
4228       return emit_store_ssbo(ctx, intr);
4229    case nir_intrinsic_store_ssbo_masked_dxil:
4230       return emit_store_ssbo_masked(ctx, intr);
4231    case nir_intrinsic_store_shared_dxil:
4232    case nir_intrinsic_store_shared_masked_dxil:
4233       return emit_store_shared(ctx, intr);
4234    case nir_intrinsic_store_scratch_dxil:
4235       return emit_store_scratch(ctx, intr);
4236    case nir_intrinsic_load_ptr_dxil:
4237       return emit_load_ptr(ctx, intr);
4238    case nir_intrinsic_load_ubo:
4239       return emit_load_ubo(ctx, intr);
4240    case nir_intrinsic_load_ubo_dxil:
4241       return emit_load_ubo_dxil(ctx, intr);
4242    case nir_intrinsic_load_primitive_id:
4243       return emit_load_unary_external_function(ctx, intr, "dx.op.primitiveID",
4244                                                DXIL_INTR_PRIMITIVE_ID);
4245    case nir_intrinsic_load_sample_id:
4246    case nir_intrinsic_load_sample_id_no_per_sample:
4247       return emit_load_sample_id(ctx, intr);
4248    case nir_intrinsic_load_invocation_id:
4249       switch (ctx->mod.shader_kind) {
4250       case DXIL_HULL_SHADER:
4251          return emit_load_unary_external_function(ctx, intr, "dx.op.outputControlPointID",
4252                                                   DXIL_INTR_OUTPUT_CONTROL_POINT_ID);
4253       case DXIL_GEOMETRY_SHADER:
4254          return emit_load_unary_external_function(ctx, intr, "dx.op.gsInstanceID",
4255                                                   DXIL_INTR_GS_INSTANCE_ID);
4256       default:
4257          unreachable("Unexpected shader kind for invocation ID");
4258       }
4259    case nir_intrinsic_load_sample_mask_in:
4260       return emit_load_sample_mask_in(ctx, intr);
4261    case nir_intrinsic_load_tess_coord:
4262       return emit_load_tess_coord(ctx, intr);
4263    case nir_intrinsic_load_shared_dxil:
4264       return emit_load_shared(ctx, intr);
4265    case nir_intrinsic_load_scratch_dxil:
4266       return emit_load_scratch(ctx, intr);
4267    case nir_intrinsic_discard_if:
4268    case nir_intrinsic_demote_if:
4269       return emit_discard_if(ctx, intr);
4270    case nir_intrinsic_discard:
4271    case nir_intrinsic_demote:
4272       return emit_discard(ctx);
4273    case nir_intrinsic_emit_vertex:
4274       return emit_emit_vertex(ctx, intr);
4275    case nir_intrinsic_end_primitive:
4276       return emit_end_primitive(ctx, intr);
4277    case nir_intrinsic_scoped_barrier:
4278       return emit_barrier(ctx, intr);
4279    case nir_intrinsic_memory_barrier:
4280    case nir_intrinsic_memory_barrier_buffer:
4281    case nir_intrinsic_memory_barrier_image:
4282    case nir_intrinsic_memory_barrier_atomic_counter:
4283       return emit_memory_barrier(ctx, intr);
4284    case nir_intrinsic_memory_barrier_shared:
4285       return emit_memory_barrier_shared(ctx, intr);
4286    case nir_intrinsic_group_memory_barrier:
4287       return emit_group_memory_barrier(ctx, intr);
4288    case nir_intrinsic_control_barrier:
4289       return emit_control_barrier(ctx, intr);
4290    case nir_intrinsic_ssbo_atomic_add:
4291       return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_ADD, nir_type_int);
4292    case nir_intrinsic_ssbo_atomic_imin:
4293       return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMIN, nir_type_int);
4294    case nir_intrinsic_ssbo_atomic_umin:
4295       return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMIN, nir_type_uint);
4296    case nir_intrinsic_ssbo_atomic_imax:
4297       return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_int);
4298    case nir_intrinsic_ssbo_atomic_umax:
4299       return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMAX, nir_type_uint);
4300    case nir_intrinsic_ssbo_atomic_and:
4301       return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_AND, nir_type_uint);
4302    case nir_intrinsic_ssbo_atomic_or:
4303       return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_OR, nir_type_uint);
4304    case nir_intrinsic_ssbo_atomic_xor:
4305       return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_XOR, nir_type_uint);
4306    case nir_intrinsic_ssbo_atomic_exchange:
4307       return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_EXCHANGE, nir_type_int);
4308    case nir_intrinsic_ssbo_atomic_comp_swap:
4309       return emit_ssbo_atomic_comp_swap(ctx, intr);
4310    case nir_intrinsic_shared_atomic_add_dxil:
4311       return emit_shared_atomic(ctx, intr, DXIL_RMWOP_ADD, nir_type_int);
4312    case nir_intrinsic_shared_atomic_imin_dxil:
4313       return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MIN, nir_type_int);
4314    case nir_intrinsic_shared_atomic_umin_dxil:
4315       return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMIN, nir_type_uint);
4316    case nir_intrinsic_shared_atomic_imax_dxil:
4317       return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MAX, nir_type_int);
4318    case nir_intrinsic_shared_atomic_umax_dxil:
4319       return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMAX, nir_type_uint);
4320    case nir_intrinsic_shared_atomic_and_dxil:
4321       return emit_shared_atomic(ctx, intr, DXIL_RMWOP_AND, nir_type_uint);
4322    case nir_intrinsic_shared_atomic_or_dxil:
4323       return emit_shared_atomic(ctx, intr, DXIL_RMWOP_OR, nir_type_uint);
4324    case nir_intrinsic_shared_atomic_xor_dxil:
4325       return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XOR, nir_type_uint);
4326    case nir_intrinsic_shared_atomic_exchange_dxil:
4327       return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XCHG, nir_type_int);
4328    case nir_intrinsic_shared_atomic_comp_swap_dxil:
4329       return emit_shared_atomic_comp_swap(ctx, intr);
4330    case nir_intrinsic_image_deref_atomic_add:
4331    case nir_intrinsic_image_atomic_add:
4332       return emit_image_atomic(ctx, intr, DXIL_ATOMIC_ADD, nir_type_int);
4333    case nir_intrinsic_image_deref_atomic_imin:
4334    case nir_intrinsic_image_atomic_imin:
4335       return emit_image_atomic(ctx, intr, DXIL_ATOMIC_IMIN, nir_type_int);
4336    case nir_intrinsic_image_deref_atomic_umin:
4337    case nir_intrinsic_image_atomic_umin:
4338       return emit_image_atomic(ctx, intr, DXIL_ATOMIC_UMIN, nir_type_uint);
4339    case nir_intrinsic_image_deref_atomic_imax:
4340    case nir_intrinsic_image_atomic_imax:
4341       return emit_image_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_int);
4342    case nir_intrinsic_image_deref_atomic_umax:
4343    case nir_intrinsic_image_atomic_umax:
4344       return emit_image_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_uint);
4345    case nir_intrinsic_image_deref_atomic_and:
4346    case nir_intrinsic_image_atomic_and:
4347       return emit_image_atomic(ctx, intr, DXIL_ATOMIC_AND, nir_type_uint);
4348    case nir_intrinsic_image_deref_atomic_or:
4349    case nir_intrinsic_image_atomic_or:
4350       return emit_image_atomic(ctx, intr, DXIL_ATOMIC_OR, nir_type_uint);
4351    case nir_intrinsic_image_deref_atomic_xor:
4352    case nir_intrinsic_image_atomic_xor:
4353       return emit_image_atomic(ctx, intr, DXIL_ATOMIC_XOR, nir_type_uint);
4354    case nir_intrinsic_image_deref_atomic_exchange:
4355    case nir_intrinsic_image_atomic_exchange:
4356       return emit_image_atomic(ctx, intr, DXIL_ATOMIC_EXCHANGE, nir_type_uint);
4357    case nir_intrinsic_image_deref_atomic_comp_swap:
4358    case nir_intrinsic_image_atomic_comp_swap:
4359       return emit_image_atomic_comp_swap(ctx, intr);
4360    case nir_intrinsic_image_store:
4361    case nir_intrinsic_image_deref_store:
4362       return emit_image_store(ctx, intr);
4363    case nir_intrinsic_image_load:
4364    case nir_intrinsic_image_deref_load:
4365       return emit_image_load(ctx, intr);
4366    case nir_intrinsic_image_size:
4367    case nir_intrinsic_image_deref_size:
4368       return emit_image_size(ctx, intr);
4369    case nir_intrinsic_get_ssbo_size:
4370       return emit_get_ssbo_size(ctx, intr);
4371    case nir_intrinsic_load_input:
4372    case nir_intrinsic_load_per_vertex_input:
4373    case nir_intrinsic_load_output:
4374    case nir_intrinsic_load_per_vertex_output:
4375       return emit_load_input_via_intrinsic(ctx, intr);
4376    case nir_intrinsic_store_output:
4377    case nir_intrinsic_store_per_vertex_output:
4378       return emit_store_output_via_intrinsic(ctx, intr);
4379 
4380    case nir_intrinsic_load_barycentric_at_offset:
4381    case nir_intrinsic_load_barycentric_at_sample:
4382    case nir_intrinsic_load_barycentric_centroid:
4383    case nir_intrinsic_load_barycentric_pixel:
4384       /* Emit nothing, we only support these as inputs to load_interpolated_input */
4385       return true;
4386    case nir_intrinsic_load_interpolated_input:
4387       return emit_load_interpolated_input(ctx, intr);
4388       break;
4389 
4390    case nir_intrinsic_vulkan_resource_index:
4391       return emit_vulkan_resource_index(ctx, intr);
4392    case nir_intrinsic_load_vulkan_descriptor:
4393       return emit_load_vulkan_descriptor(ctx, intr);
4394    case nir_intrinsic_load_layer_id:
4395       return emit_load_layer_id(ctx, intr);
4396 
4397    case nir_intrinsic_load_sample_pos_from_id:
4398       return emit_load_sample_pos_from_id(ctx, intr);
4399 
4400    case nir_intrinsic_load_num_workgroups:
4401    case nir_intrinsic_load_workgroup_size:
4402    default:
4403       NIR_INSTR_UNSUPPORTED(&intr->instr);
4404       unreachable("Unimplemented intrinsic instruction");
4405       return false;
4406    }
4407 }
4408 
4409 static bool
emit_load_const(struct ntd_context * ctx,nir_load_const_instr * load_const)4410 emit_load_const(struct ntd_context *ctx, nir_load_const_instr *load_const)
4411 {
4412    for (int i = 0; i < load_const->def.num_components; ++i) {
4413       const struct dxil_value *value;
4414       switch (load_const->def.bit_size) {
4415       case 1:
4416          value = dxil_module_get_int1_const(&ctx->mod,
4417                                             load_const->value[i].b);
4418          break;
4419       case 16:
4420          ctx->mod.feats.native_low_precision = true;
4421          value = dxil_module_get_int16_const(&ctx->mod,
4422                                              load_const->value[i].u16);
4423          break;
4424       case 32:
4425          value = dxil_module_get_int32_const(&ctx->mod,
4426                                              load_const->value[i].u32);
4427          break;
4428       case 64:
4429          ctx->mod.feats.int64_ops = true;
4430          value = dxil_module_get_int64_const(&ctx->mod,
4431                                              load_const->value[i].u64);
4432          break;
4433       default:
4434          unreachable("unexpected bit_size");
4435       }
4436       if (!value)
4437          return false;
4438 
4439       store_ssa_def(ctx, &load_const->def, i, value);
4440    }
4441    return true;
4442 }
4443 
4444 static bool
emit_deref(struct ntd_context * ctx,nir_deref_instr * instr)4445 emit_deref(struct ntd_context* ctx, nir_deref_instr* instr)
4446 {
4447    assert(instr->deref_type == nir_deref_type_var ||
4448           instr->deref_type == nir_deref_type_array);
4449 
4450    /* In the CL environment, there's nothing to emit. Any references to
4451     * derefs will emit the necessary logic to handle scratch/shared GEP addressing
4452     */
4453    if (ctx->opts->environment == DXIL_ENVIRONMENT_CL)
4454       return true;
4455 
4456    /* In the Vulkan environment, we don't have cached handles for textures or
4457     * samplers, so let's use the opportunity of walking through the derefs to
4458     * emit those.
4459     */
4460    nir_variable *var = nir_deref_instr_get_variable(instr);
4461    assert(var);
4462 
4463    if (!glsl_type_is_sampler(glsl_without_array(var->type)) &&
4464        !glsl_type_is_image(glsl_without_array(var->type)) &&
4465        !glsl_type_is_texture(glsl_without_array(var->type)))
4466       return true;
4467 
4468    const struct glsl_type *type = instr->type;
4469    const struct dxil_value *binding;
4470    unsigned binding_val = ctx->opts->environment == DXIL_ENVIRONMENT_GL ?
4471       var->data.driver_location : var->data.binding;
4472 
4473    if (instr->deref_type == nir_deref_type_var) {
4474       binding = dxil_module_get_int32_const(&ctx->mod, binding_val);
4475    } else {
4476       const struct dxil_value *base = get_src(ctx, &instr->parent, 0, nir_type_uint32);
4477       const struct dxil_value *offset = get_src(ctx, &instr->arr.index, 0, nir_type_uint32);
4478       if (!base || !offset)
4479          return false;
4480 
4481       if (glsl_type_is_array(instr->type)) {
4482          offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_MUL, offset,
4483             dxil_module_get_int32_const(&ctx->mod, glsl_get_aoa_size(instr->type)), 0);
4484          if (!offset)
4485             return false;
4486       }
4487       binding = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, base, offset, 0);
4488    }
4489 
4490    if (!binding)
4491       return false;
4492 
4493    /* Haven't finished chasing the deref chain yet, just store the value */
4494    if (glsl_type_is_array(type)) {
4495       store_dest(ctx, &instr->dest, 0, binding, nir_type_uint32);
4496       return true;
4497    }
4498 
4499    assert(glsl_type_is_sampler(type) || glsl_type_is_image(type) || glsl_type_is_texture(type));
4500    enum dxil_resource_class res_class;
4501    if (glsl_type_is_image(type)) {
4502       if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN &&
4503           (var->data.access & ACCESS_NON_WRITEABLE))
4504          res_class = DXIL_RESOURCE_CLASS_SRV;
4505       else
4506          res_class = DXIL_RESOURCE_CLASS_UAV;
4507    } else if (glsl_type_is_sampler(type)) {
4508       res_class = DXIL_RESOURCE_CLASS_SAMPLER;
4509    } else {
4510       res_class = DXIL_RESOURCE_CLASS_SRV;
4511    }
4512 
4513    unsigned descriptor_set = ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN ?
4514       var->data.descriptor_set : (glsl_type_is_image(type) ? 1 : 0);
4515    const struct dxil_value *handle = emit_createhandle_call(ctx, res_class,
4516       get_resource_id(ctx, res_class, descriptor_set, binding_val), binding, false);
4517    if (!handle)
4518       return false;
4519 
4520    store_dest_value(ctx, &instr->dest, 0, handle);
4521    return true;
4522 }
4523 
4524 static bool
emit_cond_branch(struct ntd_context * ctx,const struct dxil_value * cond,int true_block,int false_block)4525 emit_cond_branch(struct ntd_context *ctx, const struct dxil_value *cond,
4526                  int true_block, int false_block)
4527 {
4528    assert(cond);
4529    assert(true_block >= 0);
4530    assert(false_block >= 0);
4531    return dxil_emit_branch(&ctx->mod, cond, true_block, false_block);
4532 }
4533 
4534 static bool
emit_branch(struct ntd_context * ctx,int block)4535 emit_branch(struct ntd_context *ctx, int block)
4536 {
4537    assert(block >= 0);
4538    return dxil_emit_branch(&ctx->mod, NULL, block, -1);
4539 }
4540 
4541 static bool
emit_jump(struct ntd_context * ctx,nir_jump_instr * instr)4542 emit_jump(struct ntd_context *ctx, nir_jump_instr *instr)
4543 {
4544    switch (instr->type) {
4545    case nir_jump_break:
4546    case nir_jump_continue:
4547       assert(instr->instr.block->successors[0]);
4548       assert(!instr->instr.block->successors[1]);
4549       return emit_branch(ctx, instr->instr.block->successors[0]->index);
4550 
4551    default:
4552       unreachable("Unsupported jump type\n");
4553    }
4554 }
4555 
4556 struct phi_block {
4557    unsigned num_components;
4558    struct dxil_instr *comp[NIR_MAX_VEC_COMPONENTS];
4559 };
4560 
4561 static bool
emit_phi(struct ntd_context * ctx,nir_phi_instr * instr)4562 emit_phi(struct ntd_context *ctx, nir_phi_instr *instr)
4563 {
4564    unsigned bit_size = nir_dest_bit_size(instr->dest);
4565    const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod,
4566                                                            bit_size);
4567 
4568    struct phi_block *vphi = ralloc(ctx->phis, struct phi_block);
4569    vphi->num_components = nir_dest_num_components(instr->dest);
4570 
4571    for (unsigned i = 0; i < vphi->num_components; ++i) {
4572       struct dxil_instr *phi = vphi->comp[i] = dxil_emit_phi(&ctx->mod, type);
4573       if (!phi)
4574          return false;
4575       store_dest_value(ctx, &instr->dest, i, dxil_instr_get_return_value(phi));
4576    }
4577    _mesa_hash_table_insert(ctx->phis, instr, vphi);
4578    return true;
4579 }
4580 
4581 static bool
fixup_phi(struct ntd_context * ctx,nir_phi_instr * instr,struct phi_block * vphi)4582 fixup_phi(struct ntd_context *ctx, nir_phi_instr *instr,
4583           struct phi_block *vphi)
4584 {
4585    const struct dxil_value *values[16];
4586    unsigned blocks[16];
4587    for (unsigned i = 0; i < vphi->num_components; ++i) {
4588       size_t num_incoming = 0;
4589       nir_foreach_phi_src(src, instr) {
4590          assert(src->src.is_ssa);
4591          const struct dxil_value *val = get_src_ssa(ctx, src->src.ssa, i);
4592          values[num_incoming] = val;
4593          blocks[num_incoming] = src->pred->index;
4594          ++num_incoming;
4595          if (num_incoming == ARRAY_SIZE(values)) {
4596             if (!dxil_phi_add_incoming(vphi->comp[i], values, blocks,
4597                                        num_incoming))
4598                return false;
4599             num_incoming = 0;
4600          }
4601       }
4602       if (num_incoming > 0 && !dxil_phi_add_incoming(vphi->comp[i], values,
4603                                                      blocks, num_incoming))
4604          return false;
4605    }
4606    return true;
4607 }
4608 
4609 static unsigned
get_n_src(struct ntd_context * ctx,const struct dxil_value ** values,unsigned max_components,nir_tex_src * src,nir_alu_type type)4610 get_n_src(struct ntd_context *ctx, const struct dxil_value **values,
4611           unsigned max_components, nir_tex_src *src, nir_alu_type type)
4612 {
4613    unsigned num_components = nir_src_num_components(src->src);
4614    unsigned i = 0;
4615 
4616    assert(num_components <= max_components);
4617 
4618    for (i = 0; i < num_components; ++i) {
4619       values[i] = get_src(ctx, &src->src, i, type);
4620       if (!values[i])
4621          return 0;
4622    }
4623 
4624    return num_components;
4625 }
4626 
4627 #define PAD_SRC(ctx, array, components, undef) \
4628    for (unsigned i = components; i < ARRAY_SIZE(array); ++i) { \
4629       array[i] = undef; \
4630    }
4631 
4632 static const struct dxil_value *
emit_sample(struct ntd_context * ctx,struct texop_parameters * params)4633 emit_sample(struct ntd_context *ctx, struct texop_parameters *params)
4634 {
4635    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sample", params->overload);
4636    if (!func)
4637       return NULL;
4638 
4639    const struct dxil_value *args[11] = {
4640       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE),
4641       params->tex, params->sampler,
4642       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
4643       params->offset[0], params->offset[1], params->offset[2],
4644       params->min_lod
4645    };
4646 
4647    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4648 }
4649 
4650 static const struct dxil_value *
emit_sample_bias(struct ntd_context * ctx,struct texop_parameters * params)4651 emit_sample_bias(struct ntd_context *ctx, struct texop_parameters *params)
4652 {
4653    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleBias", params->overload);
4654    if (!func)
4655       return NULL;
4656 
4657    assert(params->bias != NULL);
4658 
4659    const struct dxil_value *args[12] = {
4660       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_BIAS),
4661       params->tex, params->sampler,
4662       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
4663       params->offset[0], params->offset[1], params->offset[2],
4664       params->bias, params->min_lod
4665    };
4666 
4667    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4668 }
4669 
4670 static const struct dxil_value *
emit_sample_level(struct ntd_context * ctx,struct texop_parameters * params)4671 emit_sample_level(struct ntd_context *ctx, struct texop_parameters *params)
4672 {
4673    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleLevel", params->overload);
4674    if (!func)
4675       return NULL;
4676 
4677    assert(params->lod_or_sample != NULL);
4678 
4679    const struct dxil_value *args[11] = {
4680       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_LEVEL),
4681       params->tex, params->sampler,
4682       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
4683       params->offset[0], params->offset[1], params->offset[2],
4684       params->lod_or_sample
4685    };
4686 
4687    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4688 }
4689 
4690 static const struct dxil_value *
emit_sample_cmp(struct ntd_context * ctx,struct texop_parameters * params)4691 emit_sample_cmp(struct ntd_context *ctx, struct texop_parameters *params)
4692 {
4693    const struct dxil_func *func;
4694    enum dxil_intr opcode;
4695    int numparam;
4696 
4697    if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER)  {
4698       func = dxil_get_function(&ctx->mod, "dx.op.sampleCmp", DXIL_F32);
4699       opcode = DXIL_INTR_SAMPLE_CMP;
4700       numparam = 12;
4701    } else {
4702       func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevelZero", DXIL_F32);
4703       opcode = DXIL_INTR_SAMPLE_CMP_LVL_ZERO;
4704       numparam = 11;
4705    }
4706 
4707    if (!func)
4708       return NULL;
4709 
4710    const struct dxil_value *args[12] = {
4711       dxil_module_get_int32_const(&ctx->mod, opcode),
4712       params->tex, params->sampler,
4713       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
4714       params->offset[0], params->offset[1], params->offset[2],
4715       params->cmp, params->min_lod
4716    };
4717 
4718    return dxil_emit_call(&ctx->mod, func, args, numparam);
4719 }
4720 
4721 static const struct dxil_value *
emit_sample_grad(struct ntd_context * ctx,struct texop_parameters * params)4722 emit_sample_grad(struct ntd_context *ctx, struct texop_parameters *params)
4723 {
4724    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleGrad", params->overload);
4725    if (!func)
4726       return false;
4727 
4728    const struct dxil_value *args[17] = {
4729       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_GRAD),
4730       params->tex, params->sampler,
4731       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
4732       params->offset[0], params->offset[1], params->offset[2],
4733       params->dx[0], params->dx[1], params->dx[2],
4734       params->dy[0], params->dy[1], params->dy[2],
4735       params->min_lod
4736    };
4737 
4738    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4739 }
4740 
4741 static const struct dxil_value *
emit_texel_fetch(struct ntd_context * ctx,struct texop_parameters * params)4742 emit_texel_fetch(struct ntd_context *ctx, struct texop_parameters *params)
4743 {
4744    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", params->overload);
4745    if (!func)
4746       return false;
4747 
4748    if (!params->lod_or_sample)
4749       params->lod_or_sample = dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32));
4750 
4751    const struct dxil_value *args[] = {
4752       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD),
4753       params->tex,
4754       params->lod_or_sample, params->coord[0], params->coord[1], params->coord[2],
4755       params->offset[0], params->offset[1], params->offset[2]
4756    };
4757 
4758    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4759 }
4760 
4761 static const struct dxil_value *
emit_texture_lod(struct ntd_context * ctx,struct texop_parameters * params,bool clamped)4762 emit_texture_lod(struct ntd_context *ctx, struct texop_parameters *params, bool clamped)
4763 {
4764    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.calculateLOD", DXIL_F32);
4765    if (!func)
4766       return false;
4767 
4768    const struct dxil_value *args[] = {
4769       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOD),
4770       params->tex,
4771       params->sampler,
4772       params->coord[0],
4773       params->coord[1],
4774       params->coord[2],
4775       dxil_module_get_int1_const(&ctx->mod, clamped ? 1 : 0)
4776    };
4777 
4778    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4779 }
4780 
4781 static const struct dxil_value *
emit_texture_gather(struct ntd_context * ctx,struct texop_parameters * params,unsigned component)4782 emit_texture_gather(struct ntd_context *ctx, struct texop_parameters *params, unsigned component)
4783 {
4784    const struct dxil_func *func = dxil_get_function(&ctx->mod,
4785       params->cmp ? "dx.op.textureGatherCmp" : "dx.op.textureGather", params->overload);
4786    if (!func)
4787       return false;
4788 
4789    const struct dxil_value *args[] = {
4790       dxil_module_get_int32_const(&ctx->mod, params->cmp ?
4791          DXIL_INTR_TEXTURE_GATHER_CMP : DXIL_INTR_TEXTURE_GATHER),
4792       params->tex,
4793       params->sampler,
4794       params->coord[0],
4795       params->coord[1],
4796       params->coord[2],
4797       params->coord[3],
4798       params->offset[0],
4799       params->offset[1],
4800       dxil_module_get_int32_const(&ctx->mod, component),
4801       params->cmp
4802    };
4803 
4804    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args) - (params->cmp ? 0 : 1));
4805 }
4806 
4807 static bool
emit_tex(struct ntd_context * ctx,nir_tex_instr * instr)4808 emit_tex(struct ntd_context *ctx, nir_tex_instr *instr)
4809 {
4810    struct texop_parameters params;
4811    memset(&params, 0, sizeof(struct texop_parameters));
4812    if (ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN) {
4813       params.tex = ctx->srv_handles[instr->texture_index];
4814       params.sampler = ctx->sampler_handles[instr->sampler_index];
4815    }
4816 
4817    const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
4818    const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32);
4819    const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
4820    const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type);
4821 
4822    unsigned coord_components = 0, offset_components = 0, dx_components = 0, dy_components = 0;
4823    params.overload = get_overload(instr->dest_type, 32);
4824 
4825    for (unsigned i = 0; i < instr->num_srcs; i++) {
4826       nir_alu_type type = nir_tex_instr_src_type(instr, i);
4827 
4828       switch (instr->src[i].src_type) {
4829       case nir_tex_src_coord:
4830          coord_components = get_n_src(ctx, params.coord, ARRAY_SIZE(params.coord),
4831                                       &instr->src[i], type);
4832          if (!coord_components)
4833             return false;
4834          break;
4835 
4836       case nir_tex_src_offset:
4837          offset_components = get_n_src(ctx, params.offset, ARRAY_SIZE(params.offset),
4838                                        &instr->src[i],  nir_type_int);
4839          if (!offset_components)
4840             return false;
4841          break;
4842 
4843       case nir_tex_src_bias:
4844          assert(instr->op == nir_texop_txb);
4845          assert(nir_src_num_components(instr->src[i].src) == 1);
4846          params.bias = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
4847          if (!params.bias)
4848             return false;
4849          break;
4850 
4851       case nir_tex_src_lod:
4852          assert(nir_src_num_components(instr->src[i].src) == 1);
4853          if (instr->op == nir_texop_txf_ms) {
4854             assert(nir_src_as_int(instr->src[i].src) == 0);
4855             break;
4856          }
4857 
4858          /* Buffers don't have a LOD */
4859          if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF)
4860             params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, type);
4861          else
4862             params.lod_or_sample = int_undef;
4863          if (!params.lod_or_sample)
4864             return false;
4865          break;
4866 
4867       case nir_tex_src_min_lod:
4868          assert(nir_src_num_components(instr->src[i].src) == 1);
4869          params.min_lod = get_src(ctx, &instr->src[i].src, 0, type);
4870          if (!params.min_lod)
4871             return false;
4872          break;
4873 
4874       case nir_tex_src_comparator:
4875          assert(nir_src_num_components(instr->src[i].src) == 1);
4876          params.cmp = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
4877          if (!params.cmp)
4878             return false;
4879          break;
4880 
4881       case nir_tex_src_ddx:
4882          dx_components = get_n_src(ctx, params.dx, ARRAY_SIZE(params.dx),
4883                                    &instr->src[i], nir_type_float);
4884          if (!dx_components)
4885             return false;
4886          break;
4887 
4888       case nir_tex_src_ddy:
4889          dy_components = get_n_src(ctx, params.dy, ARRAY_SIZE(params.dy),
4890                                    &instr->src[i], nir_type_float);
4891          if (!dy_components)
4892             return false;
4893          break;
4894 
4895       case nir_tex_src_ms_index:
4896          params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, nir_type_int);
4897          if (!params.lod_or_sample)
4898             return false;
4899          break;
4900 
4901       case nir_tex_src_texture_deref:
4902          assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
4903          params.tex = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
4904          break;
4905 
4906       case nir_tex_src_sampler_deref:
4907          assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
4908          params.sampler = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
4909          break;
4910 
4911       case nir_tex_src_texture_offset:
4912          params.tex = emit_createhandle_call(ctx, DXIL_RESOURCE_CLASS_SRV,
4913             get_resource_id(ctx, DXIL_RESOURCE_CLASS_SRV, 0, instr->texture_index),
4914             dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
4915                get_src_ssa(ctx, instr->src[i].src.ssa, 0),
4916                dxil_module_get_int32_const(&ctx->mod, instr->texture_index), 0),
4917             instr->texture_non_uniform);
4918          break;
4919 
4920       case nir_tex_src_sampler_offset:
4921          if (nir_tex_instr_need_sampler(instr)) {
4922             params.sampler = emit_createhandle_call(ctx, DXIL_RESOURCE_CLASS_SAMPLER,
4923                get_resource_id(ctx, DXIL_RESOURCE_CLASS_SAMPLER, 0, instr->sampler_index),
4924                dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
4925                   get_src_ssa(ctx, instr->src[i].src.ssa, 0),
4926                   dxil_module_get_int32_const(&ctx->mod, instr->sampler_index), 0),
4927                instr->sampler_non_uniform);
4928          }
4929          break;
4930 
4931       case nir_tex_src_projector:
4932          unreachable("Texture projector should have been lowered");
4933 
4934       default:
4935          fprintf(stderr, "texture source: %d\n", instr->src[i].src_type);
4936          unreachable("unknown texture source");
4937       }
4938    }
4939 
4940    assert(params.tex != NULL);
4941    assert(instr->op == nir_texop_txf ||
4942           instr->op == nir_texop_txf_ms ||
4943           nir_tex_instr_is_query(instr) ||
4944           params.sampler != NULL);
4945 
4946    PAD_SRC(ctx, params.coord, coord_components, float_undef);
4947    PAD_SRC(ctx, params.offset, offset_components, int_undef);
4948    if (!params.min_lod) params.min_lod = float_undef;
4949 
4950    const struct dxil_value *sample = NULL;
4951    switch (instr->op) {
4952    case nir_texop_txb:
4953       sample = emit_sample_bias(ctx, &params);
4954       break;
4955 
4956    case nir_texop_tex:
4957       if (params.cmp != NULL) {
4958          sample = emit_sample_cmp(ctx, &params);
4959          break;
4960       } else if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) {
4961          sample = emit_sample(ctx, &params);
4962          break;
4963       }
4964       params.lod_or_sample = dxil_module_get_float_const(&ctx->mod, 0);
4965       FALLTHROUGH;
4966    case nir_texop_txl:
4967       sample = emit_sample_level(ctx, &params);
4968       break;
4969 
4970    case nir_texop_txd:
4971       PAD_SRC(ctx, params.dx, dx_components, float_undef);
4972       PAD_SRC(ctx, params.dy, dy_components,float_undef);
4973       sample = emit_sample_grad(ctx, &params);
4974       break;
4975 
4976    case nir_texop_txf:
4977    case nir_texop_txf_ms:
4978       if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
4979          params.coord[1] = int_undef;
4980          sample = emit_bufferload_call(ctx, params.tex, params.coord, params.overload);
4981       } else {
4982          PAD_SRC(ctx, params.coord, coord_components, int_undef);
4983          sample = emit_texel_fetch(ctx, &params);
4984       }
4985       break;
4986 
4987    case nir_texop_txs:
4988       sample = emit_texture_size(ctx, &params);
4989       break;
4990 
4991    case nir_texop_tg4:
4992       sample = emit_texture_gather(ctx, &params, instr->component);
4993       break;
4994 
4995    case nir_texop_lod:
4996       sample = emit_texture_lod(ctx, &params, true);
4997       store_dest(ctx, &instr->dest, 0, sample, nir_alu_type_get_base_type(instr->dest_type));
4998       sample = emit_texture_lod(ctx, &params, false);
4999       store_dest(ctx, &instr->dest, 1, sample, nir_alu_type_get_base_type(instr->dest_type));
5000       return true;
5001 
5002    case nir_texop_query_levels:
5003       params.lod_or_sample = dxil_module_get_int_const(&ctx->mod, 0, 32);
5004       sample = emit_texture_size(ctx, &params);
5005       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
5006       store_dest(ctx, &instr->dest, 0, retval, nir_alu_type_get_base_type(instr->dest_type));
5007       return true;
5008 
5009    default:
5010       fprintf(stderr, "texture op: %d\n", instr->op);
5011       unreachable("unknown texture op");
5012    }
5013 
5014    if (!sample)
5015       return false;
5016 
5017    for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
5018       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, i);
5019       store_dest(ctx, &instr->dest, i, retval, nir_alu_type_get_base_type(instr->dest_type));
5020    }
5021 
5022    return true;
5023 }
5024 
5025 static bool
emit_undefined(struct ntd_context * ctx,nir_ssa_undef_instr * undef)5026 emit_undefined(struct ntd_context *ctx, nir_ssa_undef_instr *undef)
5027 {
5028    for (unsigned i = 0; i < undef->def.num_components; ++i)
5029       store_ssa_def(ctx, &undef->def, i, dxil_module_get_int32_const(&ctx->mod, 0));
5030    return true;
5031 }
5032 
emit_instr(struct ntd_context * ctx,struct nir_instr * instr)5033 static bool emit_instr(struct ntd_context *ctx, struct nir_instr* instr)
5034 {
5035    switch (instr->type) {
5036    case nir_instr_type_alu:
5037       return emit_alu(ctx, nir_instr_as_alu(instr));
5038    case nir_instr_type_intrinsic:
5039       return emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
5040    case nir_instr_type_load_const:
5041       return emit_load_const(ctx, nir_instr_as_load_const(instr));
5042    case nir_instr_type_deref:
5043       return emit_deref(ctx, nir_instr_as_deref(instr));
5044    case nir_instr_type_jump:
5045       return emit_jump(ctx, nir_instr_as_jump(instr));
5046    case nir_instr_type_phi:
5047       return emit_phi(ctx, nir_instr_as_phi(instr));
5048    case nir_instr_type_tex:
5049       return emit_tex(ctx, nir_instr_as_tex(instr));
5050    case nir_instr_type_ssa_undef:
5051       return emit_undefined(ctx, nir_instr_as_ssa_undef(instr));
5052    default:
5053       NIR_INSTR_UNSUPPORTED(instr);
5054       unreachable("Unimplemented instruction type");
5055       return false;
5056    }
5057 }
5058 
5059 
5060 static bool
emit_block(struct ntd_context * ctx,struct nir_block * block)5061 emit_block(struct ntd_context *ctx, struct nir_block *block)
5062 {
5063    assert(block->index < ctx->mod.cur_emitting_func->num_basic_block_ids);
5064    ctx->mod.cur_emitting_func->basic_block_ids[block->index] = ctx->mod.cur_emitting_func->curr_block;
5065 
5066    nir_foreach_instr(instr, block) {
5067       TRACE_CONVERSION(instr);
5068 
5069       if (!emit_instr(ctx, instr))  {
5070          return false;
5071       }
5072    }
5073    return true;
5074 }
5075 
5076 static bool
5077 emit_cf_list(struct ntd_context *ctx, struct exec_list *list);
5078 
5079 static bool
emit_if(struct ntd_context * ctx,struct nir_if * if_stmt)5080 emit_if(struct ntd_context *ctx, struct nir_if *if_stmt)
5081 {
5082    assert(nir_src_num_components(if_stmt->condition) == 1);
5083    const struct dxil_value *cond = get_src(ctx, &if_stmt->condition, 0,
5084                                            nir_type_bool);
5085    if (!cond)
5086       return false;
5087 
5088    /* prepare blocks */
5089    nir_block *then_block = nir_if_first_then_block(if_stmt);
5090    assert(nir_if_last_then_block(if_stmt)->successors[0]);
5091    assert(!nir_if_last_then_block(if_stmt)->successors[1]);
5092    int then_succ = nir_if_last_then_block(if_stmt)->successors[0]->index;
5093 
5094    nir_block *else_block = NULL;
5095    int else_succ = -1;
5096    if (!exec_list_is_empty(&if_stmt->else_list)) {
5097       else_block = nir_if_first_else_block(if_stmt);
5098       assert(nir_if_last_else_block(if_stmt)->successors[0]);
5099       assert(!nir_if_last_else_block(if_stmt)->successors[1]);
5100       else_succ = nir_if_last_else_block(if_stmt)->successors[0]->index;
5101    }
5102 
5103    if (!emit_cond_branch(ctx, cond, then_block->index,
5104                          else_block ? else_block->index : then_succ))
5105       return false;
5106 
5107    /* handle then-block */
5108    if (!emit_cf_list(ctx, &if_stmt->then_list) ||
5109        (!nir_block_ends_in_jump(nir_if_last_then_block(if_stmt)) &&
5110         !emit_branch(ctx, then_succ)))
5111       return false;
5112 
5113    if (else_block) {
5114       /* handle else-block */
5115       if (!emit_cf_list(ctx, &if_stmt->else_list) ||
5116           (!nir_block_ends_in_jump(nir_if_last_else_block(if_stmt)) &&
5117            !emit_branch(ctx, else_succ)))
5118          return false;
5119    }
5120 
5121    return true;
5122 }
5123 
5124 static bool
emit_loop(struct ntd_context * ctx,nir_loop * loop)5125 emit_loop(struct ntd_context *ctx, nir_loop *loop)
5126 {
5127    nir_block *first_block = nir_loop_first_block(loop);
5128 
5129    assert(nir_loop_last_block(loop)->successors[0]);
5130    assert(!nir_loop_last_block(loop)->successors[1]);
5131 
5132    if (!emit_branch(ctx, first_block->index))
5133       return false;
5134 
5135    if (!emit_cf_list(ctx, &loop->body))
5136       return false;
5137 
5138    if (!emit_branch(ctx, first_block->index))
5139       return false;
5140 
5141    return true;
5142 }
5143 
5144 static bool
emit_cf_list(struct ntd_context * ctx,struct exec_list * list)5145 emit_cf_list(struct ntd_context *ctx, struct exec_list *list)
5146 {
5147    foreach_list_typed(nir_cf_node, node, node, list) {
5148       switch (node->type) {
5149       case nir_cf_node_block:
5150          if (!emit_block(ctx, nir_cf_node_as_block(node)))
5151             return false;
5152          break;
5153 
5154       case nir_cf_node_if:
5155          if (!emit_if(ctx, nir_cf_node_as_if(node)))
5156             return false;
5157          break;
5158 
5159       case nir_cf_node_loop:
5160          if (!emit_loop(ctx, nir_cf_node_as_loop(node)))
5161             return false;
5162          break;
5163 
5164       default:
5165          unreachable("unsupported cf-list node");
5166          break;
5167       }
5168    }
5169    return true;
5170 }
5171 
5172 static void
insert_sorted_by_binding(struct exec_list * var_list,nir_variable * new_var)5173 insert_sorted_by_binding(struct exec_list *var_list, nir_variable *new_var)
5174 {
5175    nir_foreach_variable_in_list(var, var_list) {
5176       if (var->data.binding > new_var->data.binding) {
5177          exec_node_insert_node_before(&var->node, &new_var->node);
5178          return;
5179       }
5180    }
5181    exec_list_push_tail(var_list, &new_var->node);
5182 }
5183 
5184 
5185 static void
sort_uniforms_by_binding_and_remove_structs(nir_shader * s)5186 sort_uniforms_by_binding_and_remove_structs(nir_shader *s)
5187 {
5188    struct exec_list new_list;
5189    exec_list_make_empty(&new_list);
5190 
5191    nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) {
5192       exec_node_remove(&var->node);
5193       const struct glsl_type *type = glsl_without_array(var->type);
5194       if (!glsl_type_is_struct(type))
5195          insert_sorted_by_binding(&new_list, var);
5196    }
5197    exec_list_append(&s->variables, &new_list);
5198 }
5199 
5200 static void
prepare_phi_values(struct ntd_context * ctx,nir_function_impl * impl)5201 prepare_phi_values(struct ntd_context *ctx, nir_function_impl *impl)
5202 {
5203    /* PHI nodes are difficult to get right when tracking the types:
5204     * Since the incoming sources are linked to blocks, we can't bitcast
5205     * on the fly while loading. So scan the shader and insert a typed dummy
5206     * value for each phi source, and when storing we convert if the incoming
5207     * value has a different type then the one expected by the phi node.
5208     * We choose int as default, because it supports more bit sizes.
5209     */
5210    nir_foreach_block(block, impl) {
5211       nir_foreach_instr(instr, block) {
5212          if (instr->type == nir_instr_type_phi) {
5213             nir_phi_instr *ir = nir_instr_as_phi(instr);
5214             unsigned bitsize = nir_dest_bit_size(ir->dest);
5215             const struct dxil_value *dummy = dxil_module_get_int_const(&ctx->mod, 0, bitsize);
5216             nir_foreach_phi_src(src, ir) {
5217                for(unsigned int i = 0; i < ir->dest.ssa.num_components; ++i)
5218                   store_ssa_def(ctx, src->src.ssa, i, dummy);
5219             }
5220          }
5221       }
5222    }
5223 }
5224 
5225 static bool
emit_cbvs(struct ntd_context * ctx)5226 emit_cbvs(struct ntd_context *ctx)
5227 {
5228    if (ctx->opts->environment != DXIL_ENVIRONMENT_GL) {
5229       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ubo) {
5230          if (!emit_ubo_var(ctx, var))
5231             return false;
5232       }
5233    } else {
5234       if (ctx->shader->info.num_ubos) {
5235          const unsigned ubo_size = 16384 /*4096 vec4's*/;
5236          bool has_ubo0 = !ctx->opts->no_ubo0;
5237          bool has_state_vars = ctx->opts->last_ubo_is_not_arrayed;
5238          unsigned ubo1_array_size = ctx->shader->info.num_ubos -
5239             (has_state_vars ? 2 : 1);
5240 
5241          if (has_ubo0 &&
5242              !emit_cbv(ctx, 0, 0, ubo_size, 1, "__ubo_uniforms"))
5243             return false;
5244          if (ubo1_array_size &&
5245              !emit_cbv(ctx, 1, 0, ubo_size, ubo1_array_size, "__ubos"))
5246             return false;
5247          if (has_state_vars &&
5248              !emit_cbv(ctx, ctx->shader->info.num_ubos - 1, 0, ubo_size, 1, "__ubo_state_vars"))
5249             return false;
5250       }
5251    }
5252 
5253    return true;
5254 }
5255 
5256 static bool
emit_scratch(struct ntd_context * ctx)5257 emit_scratch(struct ntd_context *ctx)
5258 {
5259    if (ctx->shader->scratch_size) {
5260       /*
5261        * We always allocate an u32 array, no matter the actual variable types.
5262        * According to the DXIL spec, the minimum load/store granularity is
5263        * 32-bit, anything smaller requires using a read-extract/read-write-modify
5264        * approach.
5265        */
5266       unsigned size = ALIGN_POT(ctx->shader->scratch_size, sizeof(uint32_t));
5267       const struct dxil_type *int32 = dxil_module_get_int_type(&ctx->mod, 32);
5268       const struct dxil_value *array_length = dxil_module_get_int32_const(&ctx->mod, size / sizeof(uint32_t));
5269       if (!int32 || !array_length)
5270          return false;
5271 
5272       const struct dxil_type *type = dxil_module_get_array_type(
5273          &ctx->mod, int32, size / sizeof(uint32_t));
5274       if (!type)
5275          return false;
5276 
5277       ctx->scratchvars = dxil_emit_alloca(&ctx->mod, type, int32, array_length, 4);
5278       if (!ctx->scratchvars)
5279          return false;
5280    }
5281 
5282    return true;
5283 }
5284 
5285 /* The validator complains if we don't have ops that reference a global variable. */
5286 static bool
shader_has_shared_ops(struct nir_shader * s)5287 shader_has_shared_ops(struct nir_shader *s)
5288 {
5289    nir_foreach_function(func, s) {
5290       if (!func->impl)
5291          continue;
5292       nir_foreach_block(block, func->impl) {
5293          nir_foreach_instr(instr, block) {
5294             if (instr->type != nir_instr_type_intrinsic)
5295                continue;
5296             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
5297             switch (intrin->intrinsic) {
5298             case nir_intrinsic_load_shared_dxil:
5299             case nir_intrinsic_store_shared_dxil:
5300             case nir_intrinsic_shared_atomic_add_dxil:
5301             case nir_intrinsic_shared_atomic_and_dxil:
5302             case nir_intrinsic_shared_atomic_comp_swap_dxil:
5303             case nir_intrinsic_shared_atomic_exchange_dxil:
5304             case nir_intrinsic_shared_atomic_imax_dxil:
5305             case nir_intrinsic_shared_atomic_imin_dxil:
5306             case nir_intrinsic_shared_atomic_or_dxil:
5307             case nir_intrinsic_shared_atomic_umax_dxil:
5308             case nir_intrinsic_shared_atomic_umin_dxil:
5309             case nir_intrinsic_shared_atomic_xor_dxil:
5310                return true;
5311             default: break;
5312             }
5313          }
5314       }
5315    }
5316    return false;
5317 }
5318 
5319 static bool
emit_function(struct ntd_context * ctx,nir_function * func)5320 emit_function(struct ntd_context *ctx, nir_function *func)
5321 {
5322    assert(func->num_params == 0);
5323    nir_function_impl *impl = func->impl;
5324    if (!impl)
5325       return true;
5326 
5327    nir_metadata_require(impl, nir_metadata_block_index);
5328 
5329    const struct dxil_type *void_type = dxil_module_get_void_type(&ctx->mod);
5330    const struct dxil_type *func_type = dxil_module_add_function_type(&ctx->mod, void_type, NULL, 0);
5331    struct dxil_func_def *func_def = dxil_add_function_def(&ctx->mod, func->name, func_type, impl->num_blocks);
5332    if (!func_def)
5333       return false;
5334 
5335    if (func->is_entrypoint)
5336       ctx->main_func_def = func_def;
5337    else if (func == ctx->tess_ctrl_patch_constant_func)
5338       ctx->tess_ctrl_patch_constant_func_def = func_def;
5339 
5340    ctx->defs = rzalloc_array(ctx->ralloc_ctx, struct dxil_def, impl->ssa_alloc);
5341    if (!ctx->defs)
5342       return false;
5343    ctx->num_defs = impl->ssa_alloc;
5344 
5345    ctx->phis = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
5346    if (!ctx->phis)
5347       return false;
5348 
5349    prepare_phi_values(ctx, impl);
5350 
5351    if (!emit_scratch(ctx))
5352       return false;
5353 
5354    if (!emit_static_indexing_handles(ctx))
5355       return false;
5356 
5357    if (!emit_cf_list(ctx, &impl->body))
5358       return false;
5359 
5360    hash_table_foreach(ctx->phis, entry) {
5361       if (!fixup_phi(ctx, (nir_phi_instr *)entry->key,
5362                      (struct phi_block *)entry->data))
5363          return false;
5364    }
5365 
5366    if (!dxil_emit_ret_void(&ctx->mod))
5367       return false;
5368 
5369    ralloc_free(ctx->defs);
5370    ctx->defs = NULL;
5371    _mesa_hash_table_destroy(ctx->phis, NULL);
5372    return true;
5373 }
5374 
5375 static bool
emit_module(struct ntd_context * ctx,const struct nir_to_dxil_options * opts)5376 emit_module(struct ntd_context *ctx, const struct nir_to_dxil_options *opts)
5377 {
5378    /* The validator forces us to emit resources in a specific order:
5379     * CBVs, Samplers, SRVs, UAVs. While we are at it also remove
5380     * stale struct uniforms, they are lowered but might not have been removed */
5381    sort_uniforms_by_binding_and_remove_structs(ctx->shader);
5382 
5383    /* CBVs */
5384    if (!emit_cbvs(ctx))
5385       return false;
5386 
5387    /* Samplers */
5388    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
5389       unsigned count = glsl_type_get_sampler_count(var->type);
5390       assert(count == 0 || glsl_type_is_bare_sampler(glsl_without_array(var->type)));
5391       if (count > 0 && !emit_sampler(ctx, var, count))
5392          return false;
5393    }
5394 
5395    /* SRVs */
5396    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
5397       if (glsl_type_is_texture(glsl_without_array(var->type)) &&
5398           !emit_srv(ctx, var, glsl_type_get_texture_count(var->type)))
5399          return false;
5400    }
5401 
5402    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
5403       nir_foreach_image_variable(var, ctx->shader) {
5404          if ((var->data.access & ACCESS_NON_WRITEABLE) &&
5405              !emit_srv(ctx, var, glsl_type_get_image_count(var->type)))
5406             return false;
5407       }
5408    }
5409 
5410    /* Handle read-only SSBOs as SRVs */
5411    if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
5412       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
5413          if ((var->data.access & ACCESS_NON_WRITEABLE) != 0) {
5414             unsigned count = 1;
5415             if (glsl_type_is_array(var->type))
5416                count = glsl_get_length(var->type);
5417             if (!emit_srv(ctx, var, count))
5418                return false;
5419          }
5420       }
5421    }
5422 
5423    if (ctx->shader->info.shared_size && shader_has_shared_ops(ctx->shader)) {
5424       const struct dxil_type *type;
5425       unsigned size;
5426 
5427      /*
5428       * We always allocate an u32 array, no matter the actual variable types.
5429       * According to the DXIL spec, the minimum load/store granularity is
5430       * 32-bit, anything smaller requires using a read-extract/read-write-modify
5431       * approach. Non-atomic 64-bit accesses are allowed, but the
5432       * GEP(cast(gvar, u64[] *), offset) and cast(GEP(gvar, offset), u64 *))
5433       * sequences don't seem to be accepted by the DXIL validator when the
5434       * pointer is in the groupshared address space, making the 32-bit -> 64-bit
5435       * pointer cast impossible.
5436       */
5437       size = ALIGN_POT(ctx->shader->info.shared_size, sizeof(uint32_t));
5438       type = dxil_module_get_array_type(&ctx->mod,
5439                                         dxil_module_get_int_type(&ctx->mod, 32),
5440                                         size / sizeof(uint32_t));
5441       ctx->sharedvars = dxil_add_global_ptr_var(&ctx->mod, "shared", type,
5442                                                 DXIL_AS_GROUPSHARED,
5443                                                 ffs(sizeof(uint64_t)),
5444                                                 NULL);
5445    }
5446 
5447    /* UAVs */
5448    if (ctx->shader->info.stage == MESA_SHADER_KERNEL) {
5449       if (!emit_globals(ctx, opts->num_kernel_globals))
5450          return false;
5451 
5452       ctx->consts = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
5453       if (!ctx->consts)
5454          return false;
5455       if (!emit_global_consts(ctx))
5456          return false;
5457    } else if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
5458       /* Handle read/write SSBOs as UAVs */
5459       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
5460          if ((var->data.access & ACCESS_NON_WRITEABLE) == 0) {
5461             unsigned count = 1;
5462             if (glsl_type_is_array(var->type))
5463                count = glsl_get_length(var->type);
5464             if (!emit_uav(ctx, var->data.binding, var->data.descriptor_set,
5465                         count, DXIL_COMP_TYPE_INVALID,
5466                         DXIL_RESOURCE_KIND_RAW_BUFFER, var->name))
5467                return false;
5468 
5469          }
5470       }
5471    } else {
5472       for (unsigned i = 0; i < ctx->shader->info.num_ssbos; ++i) {
5473          char name[64];
5474          snprintf(name, sizeof(name), "__ssbo%d", i);
5475          if (!emit_uav(ctx, i, 0, 1, DXIL_COMP_TYPE_INVALID,
5476                        DXIL_RESOURCE_KIND_RAW_BUFFER, name))
5477             return false;
5478       }
5479       /* To work around a WARP bug, bind these descriptors a second time in descriptor
5480        * space 2. Space 0 will be used for static indexing, while space 2 will be used
5481        * for dynamic indexing. Space 0 will be individual SSBOs in the DXIL shader, while
5482        * space 2 will be a single array.
5483        */
5484       if (ctx->shader->info.num_ssbos &&
5485           !emit_uav(ctx, 0, 2, ctx->shader->info.num_ssbos, DXIL_COMP_TYPE_INVALID,
5486                     DXIL_RESOURCE_KIND_RAW_BUFFER, "__ssbo_dynamic"))
5487          return false;
5488    }
5489 
5490    nir_foreach_image_variable(var, ctx->shader) {
5491       if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN &&
5492           var && (var->data.access & ACCESS_NON_WRITEABLE))
5493          continue; // already handled in SRV
5494 
5495       if (!emit_uav_var(ctx, var, glsl_type_get_image_count(var->type)))
5496          return false;
5497    }
5498 
5499    ctx->mod.info.has_per_sample_input =
5500       BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
5501    if (!ctx->mod.info.has_per_sample_input && ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
5502       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in | nir_var_system_value) {
5503          if (var->data.sample) {
5504             ctx->mod.info.has_per_sample_input = true;
5505             break;
5506          }
5507       }
5508    }
5509 
5510    unsigned input_clip_size = ctx->mod.shader_kind == DXIL_PIXEL_SHADER ?
5511       ctx->shader->info.clip_distance_array_size : ctx->opts->input_clip_size;
5512    preprocess_signatures(&ctx->mod, ctx->shader, input_clip_size);
5513 
5514    nir_foreach_function(func, ctx->shader) {
5515       if (!emit_function(ctx, func))
5516          return false;
5517    }
5518 
5519    if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
5520       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_out) {
5521          if (var->data.location == FRAG_RESULT_STENCIL) {
5522             ctx->mod.feats.stencil_ref = true;
5523          }
5524       }
5525    } else if (ctx->shader->info.stage == MESA_SHADER_VERTEX ||
5526               ctx->shader->info.stage == MESA_SHADER_TESS_EVAL) {
5527       if (ctx->shader->info.outputs_written &
5528           (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER))
5529          ctx->mod.feats.array_layer_from_vs_or_ds = true;
5530    }
5531 
5532    if (ctx->mod.feats.native_low_precision)
5533       ctx->mod.minor_version = MAX2(ctx->mod.minor_version, 2);
5534 
5535    return emit_metadata(ctx) &&
5536           dxil_emit_module(&ctx->mod);
5537 }
5538 
5539 static unsigned int
get_dxil_shader_kind(struct nir_shader * s)5540 get_dxil_shader_kind(struct nir_shader *s)
5541 {
5542    switch (s->info.stage) {
5543    case MESA_SHADER_VERTEX:
5544       return DXIL_VERTEX_SHADER;
5545    case MESA_SHADER_TESS_CTRL:
5546       return DXIL_HULL_SHADER;
5547    case MESA_SHADER_TESS_EVAL:
5548       return DXIL_DOMAIN_SHADER;
5549    case MESA_SHADER_GEOMETRY:
5550       return DXIL_GEOMETRY_SHADER;
5551    case MESA_SHADER_FRAGMENT:
5552       return DXIL_PIXEL_SHADER;
5553    case MESA_SHADER_KERNEL:
5554    case MESA_SHADER_COMPUTE:
5555       return DXIL_COMPUTE_SHADER;
5556    default:
5557       unreachable("unknown shader stage in nir_to_dxil");
5558       return DXIL_COMPUTE_SHADER;
5559    }
5560 }
5561 
5562 static unsigned
lower_bit_size_callback(const nir_instr * instr,void * data)5563 lower_bit_size_callback(const nir_instr* instr, void *data)
5564 {
5565    if (instr->type != nir_instr_type_alu)
5566       return 0;
5567    const nir_alu_instr *alu = nir_instr_as_alu(instr);
5568 
5569    if (nir_op_infos[alu->op].is_conversion)
5570       return 0;
5571 
5572    unsigned num_inputs = nir_op_infos[alu->op].num_inputs;
5573    const struct nir_to_dxil_options *opts = (const struct nir_to_dxil_options*)data;
5574    unsigned min_bit_size = opts->lower_int16 ? 32 : 16;
5575 
5576    unsigned ret = 0;
5577    for (unsigned i = 0; i < num_inputs; i++) {
5578       unsigned bit_size = nir_src_bit_size(alu->src[i].src);
5579       if (bit_size != 1 && bit_size < min_bit_size)
5580          ret = min_bit_size;
5581    }
5582 
5583    return ret;
5584 }
5585 
5586 static void
optimize_nir(struct nir_shader * s,const struct nir_to_dxil_options * opts)5587 optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts)
5588 {
5589    bool progress;
5590    do {
5591       progress = false;
5592       NIR_PASS_V(s, nir_lower_vars_to_ssa);
5593       NIR_PASS(progress, s, nir_lower_indirect_derefs, nir_var_function_temp, UINT32_MAX);
5594       NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
5595       NIR_PASS(progress, s, nir_copy_prop);
5596       NIR_PASS(progress, s, nir_opt_copy_prop_vars);
5597       NIR_PASS(progress, s, nir_lower_bit_size, lower_bit_size_callback, (void*)opts);
5598       NIR_PASS(progress, s, dxil_nir_lower_8bit_conv);
5599       if (opts->lower_int16)
5600          NIR_PASS(progress, s, dxil_nir_lower_16bit_conv);
5601       NIR_PASS(progress, s, nir_opt_remove_phis);
5602       NIR_PASS(progress, s, nir_opt_dce);
5603       NIR_PASS(progress, s, nir_opt_if, nir_opt_if_aggressive_last_continue | nir_opt_if_optimize_phi_true_false);
5604       NIR_PASS(progress, s, nir_opt_dead_cf);
5605       NIR_PASS(progress, s, nir_opt_cse);
5606       NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
5607       NIR_PASS(progress, s, nir_opt_algebraic);
5608       NIR_PASS(progress, s, dxil_nir_lower_x2b);
5609       if (s->options->lower_int64_options)
5610          NIR_PASS(progress, s, nir_lower_int64);
5611       NIR_PASS(progress, s, nir_lower_alu);
5612       NIR_PASS(progress, s, nir_opt_constant_folding);
5613       NIR_PASS(progress, s, nir_opt_undef);
5614       NIR_PASS(progress, s, nir_lower_undef_to_zero);
5615       NIR_PASS(progress, s, nir_opt_deref);
5616       NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16);
5617       NIR_PASS(progress, s, nir_lower_64bit_phis);
5618       NIR_PASS_V(s, nir_lower_system_values);
5619    } while (progress);
5620 
5621    do {
5622       progress = false;
5623       NIR_PASS(progress, s, nir_opt_algebraic_late);
5624    } while (progress);
5625 }
5626 
5627 static
dxil_fill_validation_state(struct ntd_context * ctx,struct dxil_validation_state * state)5628 void dxil_fill_validation_state(struct ntd_context *ctx,
5629                                 struct dxil_validation_state *state)
5630 {
5631    unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
5632       sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
5633    state->num_resources = ctx->resources.size / resource_element_size;
5634    state->resources.v0 = (struct dxil_resource_v0*)ctx->resources.data;
5635    state->state.psv1.psv0.max_expected_wave_lane_count = UINT_MAX;
5636    state->state.psv1.shader_stage = (uint8_t)ctx->mod.shader_kind;
5637    state->state.psv1.sig_input_elements = (uint8_t)ctx->mod.num_sig_inputs;
5638    state->state.psv1.sig_output_elements = (uint8_t)ctx->mod.num_sig_outputs;
5639    state->state.psv1.sig_patch_const_or_prim_elements = (uint8_t)ctx->mod.num_sig_patch_consts;
5640 
5641    switch (ctx->mod.shader_kind) {
5642    case DXIL_VERTEX_SHADER:
5643       state->state.psv1.psv0.vs.output_position_present = ctx->mod.info.has_out_position;
5644       break;
5645    case DXIL_PIXEL_SHADER:
5646       /* TODO: handle depth outputs */
5647       state->state.psv1.psv0.ps.depth_output = ctx->mod.info.has_out_depth;
5648       state->state.psv1.psv0.ps.sample_frequency =
5649          ctx->mod.info.has_per_sample_input;
5650       break;
5651    case DXIL_COMPUTE_SHADER:
5652       state->state.num_threads_x = MAX2(ctx->shader->info.workgroup_size[0], 1);
5653       state->state.num_threads_y = MAX2(ctx->shader->info.workgroup_size[1], 1);
5654       state->state.num_threads_z = MAX2(ctx->shader->info.workgroup_size[2], 1);
5655       break;
5656    case DXIL_GEOMETRY_SHADER:
5657       state->state.psv1.max_vertex_count = ctx->shader->info.gs.vertices_out;
5658       state->state.psv1.psv0.gs.input_primitive = dxil_get_input_primitive(ctx->shader->info.gs.input_primitive);
5659       state->state.psv1.psv0.gs.output_toplology = dxil_get_primitive_topology(ctx->shader->info.gs.output_primitive);
5660       state->state.psv1.psv0.gs.output_stream_mask = MAX2(ctx->shader->info.gs.active_stream_mask, 1);
5661       state->state.psv1.psv0.gs.output_position_present = ctx->mod.info.has_out_position;
5662       break;
5663    case DXIL_HULL_SHADER:
5664       state->state.psv1.psv0.hs.input_control_point_count = ctx->tess_input_control_point_count;
5665       state->state.psv1.psv0.hs.output_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
5666       state->state.psv1.psv0.hs.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
5667       state->state.psv1.psv0.hs.tessellator_output_primitive = get_tessellator_output_primitive(&ctx->shader->info);
5668       state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
5669       break;
5670    case DXIL_DOMAIN_SHADER:
5671       state->state.psv1.psv0.ds.input_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
5672       state->state.psv1.psv0.ds.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
5673       state->state.psv1.psv0.ds.output_position_present = ctx->mod.info.has_out_position;
5674       state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
5675       break;
5676    default:
5677       assert(0 && "Shader type not (yet) supported");
5678    }
5679 }
5680 
5681 static nir_variable *
add_sysvalue(struct ntd_context * ctx,uint8_t value,char * name,int driver_location)5682 add_sysvalue(struct ntd_context *ctx,
5683               uint8_t value, char *name,
5684               int driver_location)
5685 {
5686 
5687    nir_variable *var = rzalloc(ctx->shader, nir_variable);
5688    if (!var)
5689       return NULL;
5690    var->data.driver_location = driver_location;
5691    var->data.location = value;
5692    var->type = glsl_uint_type();
5693    var->name = name;
5694    var->data.mode = nir_var_system_value;
5695    var->data.interpolation = INTERP_MODE_FLAT;
5696    return var;
5697 }
5698 
5699 static bool
append_input_or_sysvalue(struct ntd_context * ctx,int input_loc,int sv_slot,char * name,int driver_location)5700 append_input_or_sysvalue(struct ntd_context *ctx,
5701                          int input_loc,  int sv_slot,
5702                          char *name, int driver_location)
5703 {
5704    if (input_loc >= 0) {
5705       /* Check inputs whether a variable is available the corresponds
5706        * to the sysvalue */
5707       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
5708          if (var->data.location == input_loc) {
5709             ctx->system_value[sv_slot] = var;
5710             return true;
5711          }
5712       }
5713    }
5714 
5715    ctx->system_value[sv_slot] = add_sysvalue(ctx, sv_slot, name, driver_location);
5716    if (!ctx->system_value[sv_slot])
5717       return false;
5718 
5719    nir_shader_add_variable(ctx->shader, ctx->system_value[sv_slot]);
5720    return true;
5721 }
5722 
5723 struct sysvalue_name {
5724    gl_system_value value;
5725    int slot;
5726    char *name;
5727    gl_shader_stage only_in_shader;
5728 } possible_sysvalues[] = {
5729    {SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, -1, "SV_VertexID", MESA_SHADER_NONE},
5730    {SYSTEM_VALUE_INSTANCE_ID, -1, "SV_InstanceID", MESA_SHADER_NONE},
5731    {SYSTEM_VALUE_FRONT_FACE, VARYING_SLOT_FACE, "SV_IsFrontFace", MESA_SHADER_NONE},
5732    {SYSTEM_VALUE_PRIMITIVE_ID, VARYING_SLOT_PRIMITIVE_ID, "SV_PrimitiveID", MESA_SHADER_GEOMETRY},
5733    {SYSTEM_VALUE_SAMPLE_ID, -1, "SV_SampleIndex", MESA_SHADER_NONE},
5734 };
5735 
5736 static bool
allocate_sysvalues(struct ntd_context * ctx)5737 allocate_sysvalues(struct ntd_context *ctx)
5738 {
5739    unsigned driver_location = 0;
5740    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in)
5741       driver_location++;
5742    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_system_value)
5743       driver_location++;
5744 
5745    if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
5746        ctx->shader->info.inputs_read &&
5747        !BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID)) {
5748       bool need_sample_id = true;
5749 
5750       /* "var->data.sample = true" sometimes just mean, "I want per-sample
5751        * shading", which explains why we can end up with vars having flat
5752        * interpolation with the per-sample bit set. If there's only such
5753        * type of variables, we need to tell DXIL that we read SV_SampleIndex
5754        * to make DXIL validation happy.
5755        */
5756       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
5757          if (!var->data.sample || var->data.interpolation != INTERP_MODE_FLAT) {
5758             need_sample_id = false;
5759             break;
5760          }
5761       }
5762 
5763       if (need_sample_id)
5764          BITSET_SET(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
5765    }
5766 
5767    for (unsigned i = 0; i < ARRAY_SIZE(possible_sysvalues); ++i) {
5768       struct sysvalue_name *info = &possible_sysvalues[i];
5769       if (info->only_in_shader != MESA_SHADER_NONE &&
5770           info->only_in_shader != ctx->shader->info.stage)
5771          continue;
5772       if (BITSET_TEST(ctx->shader->info.system_values_read, info->value)) {
5773          if (!append_input_or_sysvalue(ctx, info->slot,
5774                                        info->value, info->name,
5775                                        driver_location++))
5776             return false;
5777       }
5778    }
5779    return true;
5780 }
5781 
5782 static int
type_size_vec4(const struct glsl_type * type,bool bindless)5783 type_size_vec4(const struct glsl_type *type, bool bindless)
5784 {
5785    return glsl_count_attribute_slots(type, false);
5786 }
5787 
5788 static bool
dxil_validator_can_validate_shader_model(unsigned sm_minor,unsigned val_minor)5789 dxil_validator_can_validate_shader_model(unsigned sm_minor, unsigned val_minor)
5790 {
5791    /* Currently the validators are versioned such that val 1.x is needed for SM6.x */
5792    return sm_minor <= val_minor;
5793 }
5794 
5795 static const unsigned dxil_validator_min_capable_version = DXIL_VALIDATOR_1_4;
5796 static const unsigned dxil_validator_max_capable_version = DXIL_VALIDATOR_1_7;
5797 
5798 bool
nir_to_dxil(struct nir_shader * s,const struct nir_to_dxil_options * opts,struct blob * blob)5799 nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts,
5800             struct blob *blob)
5801 {
5802    assert(opts);
5803    bool retval = true;
5804    debug_dxil = (int)debug_get_option_debug_dxil();
5805    blob_init(blob);
5806 
5807    if (opts->shader_model_max < SHADER_MODEL_6_1) {
5808       debug_printf("D3D12: cannot support emitting shader model 6.0 or lower\n");
5809       return false;
5810    }
5811 
5812    if (opts->validator_version_max != NO_DXIL_VALIDATION &&
5813        opts->validator_version_max < dxil_validator_min_capable_version) {
5814       debug_printf("D3D12: Invalid validator version %d.%d, must be 1.4 or greater\n",
5815          opts->validator_version_max >> 16,
5816          opts->validator_version_max & 0xffff);
5817       return false;
5818    }
5819 
5820    /* If no validation, write a blob as if it was going to be validated by the newest understood validator.
5821     * Same if the validator is newer than we know how to write for.
5822     */
5823    uint32_t validator_version =
5824       opts->validator_version_max == NO_DXIL_VALIDATION ||
5825       opts->validator_version_max > dxil_validator_max_capable_version ?
5826       dxil_validator_max_capable_version : opts->validator_version_max;
5827 
5828    struct ntd_context *ctx = calloc(1, sizeof(*ctx));
5829    if (!ctx)
5830       return false;
5831 
5832    ctx->opts = opts;
5833    ctx->shader = s;
5834 
5835    ctx->ralloc_ctx = ralloc_context(NULL);
5836    if (!ctx->ralloc_ctx) {
5837       retval = false;
5838       goto out;
5839    }
5840 
5841    util_dynarray_init(&ctx->srv_metadata_nodes, ctx->ralloc_ctx);
5842    util_dynarray_init(&ctx->uav_metadata_nodes, ctx->ralloc_ctx);
5843    util_dynarray_init(&ctx->cbv_metadata_nodes, ctx->ralloc_ctx);
5844    util_dynarray_init(&ctx->sampler_metadata_nodes, ctx->ralloc_ctx);
5845    util_dynarray_init(&ctx->resources, ctx->ralloc_ctx);
5846    dxil_module_init(&ctx->mod, ctx->ralloc_ctx);
5847    ctx->mod.shader_kind = get_dxil_shader_kind(s);
5848    ctx->mod.major_version = 6;
5849    ctx->mod.minor_version = 1;
5850    ctx->mod.major_validator = validator_version >> 16;
5851    ctx->mod.minor_validator = validator_version & 0xffff;
5852 
5853    if (s->info.stage <= MESA_SHADER_FRAGMENT) {
5854       uint64_t in_mask =
5855          s->info.stage == MESA_SHADER_VERTEX ?
5856          0 : (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
5857       uint64_t out_mask =
5858          s->info.stage == MESA_SHADER_FRAGMENT ?
5859          ((1ull << FRAG_RESULT_STENCIL) | (1ull << FRAG_RESULT_SAMPLE_MASK)) :
5860          (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
5861 
5862       NIR_PASS_V(s, dxil_nir_fix_io_uint_type, in_mask, out_mask);
5863    }
5864 
5865    NIR_PASS_V(s, dxil_nir_lower_fquantize2f16);
5866    NIR_PASS_V(s, nir_lower_frexp);
5867    NIR_PASS_V(s, nir_lower_flrp, 16 | 32 | 64, true);
5868    NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32);
5869    NIR_PASS_V(s, dxil_nir_ensure_position_writes);
5870    NIR_PASS_V(s, nir_lower_pack);
5871    NIR_PASS_V(s, dxil_nir_lower_system_values);
5872    NIR_PASS_V(s, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_system_value | nir_var_shader_out);
5873 
5874    if (ctx->mod.shader_kind == DXIL_HULL_SHADER)
5875       NIR_PASS_V(s, dxil_nir_split_tess_ctrl, &ctx->tess_ctrl_patch_constant_func);
5876 
5877    if (ctx->mod.shader_kind == DXIL_HULL_SHADER ||
5878        ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
5879       /* Make sure any derefs are gone after lower_io before updating tess level vars */
5880       NIR_PASS_V(s, nir_opt_dce);
5881       NIR_PASS_V(s, dxil_nir_fixup_tess_level_for_domain);
5882    }
5883 
5884    optimize_nir(s, opts);
5885 
5886    NIR_PASS_V(s, nir_remove_dead_variables,
5887               nir_var_function_temp | nir_var_shader_temp, NULL);
5888 
5889    if (!allocate_sysvalues(ctx))
5890       return false;
5891 
5892    NIR_PASS_V(s, dxil_nir_lower_sysval_to_load_input, ctx->system_value);
5893    NIR_PASS_V(s, nir_opt_dce);
5894 
5895    if (debug_dxil & DXIL_DEBUG_VERBOSE)
5896       nir_print_shader(s, stderr);
5897 
5898    if (!emit_module(ctx, opts)) {
5899       debug_printf("D3D12: dxil_container_add_module failed\n");
5900       retval = false;
5901       goto out;
5902    }
5903 
5904    assert(ctx->mod.major_version == 6 && ctx->mod.minor_version >= 1);
5905    if ((ctx->mod.major_version << 16 | ctx->mod.minor_version) > opts->shader_model_max) {
5906       debug_printf("D3D12: max shader model exceeded\n");
5907       retval = false;
5908       goto out;
5909    }
5910 
5911    assert(ctx->mod.major_validator == 1);
5912    if (!dxil_validator_can_validate_shader_model(ctx->mod.minor_version, ctx->mod.minor_validator)) {
5913       debug_printf("D3D12: shader model exceeds max that can be validated\n");
5914       retval = false;
5915       goto out;
5916    }
5917 
5918    if (debug_dxil & DXIL_DEBUG_DUMP_MODULE) {
5919       struct dxil_dumper *dumper = dxil_dump_create();
5920       dxil_dump_module(dumper, &ctx->mod);
5921       fprintf(stderr, "\n");
5922       dxil_dump_buf_to_file(dumper, stderr);
5923       fprintf(stderr, "\n\n");
5924       dxil_dump_free(dumper);
5925    }
5926 
5927    struct dxil_container container;
5928    dxil_container_init(&container);
5929    if (!dxil_container_add_features(&container, &ctx->mod.feats)) {
5930       debug_printf("D3D12: dxil_container_add_features failed\n");
5931       retval = false;
5932       goto out;
5933    }
5934 
5935    if (!dxil_container_add_io_signature(&container,
5936                                         DXIL_ISG1,
5937                                         ctx->mod.num_sig_inputs,
5938                                         ctx->mod.inputs,
5939                                         ctx->mod.minor_validator >= 7)) {
5940       debug_printf("D3D12: failed to write input signature\n");
5941       retval = false;
5942       goto out;
5943    }
5944 
5945    if (!dxil_container_add_io_signature(&container,
5946                                         DXIL_OSG1,
5947                                         ctx->mod.num_sig_outputs,
5948                                         ctx->mod.outputs,
5949                                         ctx->mod.minor_validator >= 7)) {
5950       debug_printf("D3D12: failed to write output signature\n");
5951       retval = false;
5952       goto out;
5953    }
5954 
5955    if ((ctx->mod.shader_kind == DXIL_HULL_SHADER ||
5956         ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) &&
5957        !dxil_container_add_io_signature(&container,
5958                                         DXIL_PSG1,
5959                                         ctx->mod.num_sig_patch_consts,
5960                                         ctx->mod.patch_consts,
5961                                         ctx->mod.minor_validator >= 7)) {
5962       debug_printf("D3D12: failed to write patch constant signature\n");
5963       retval = false;
5964       goto out;
5965    }
5966 
5967    struct dxil_validation_state validation_state;
5968    memset(&validation_state, 0, sizeof(validation_state));
5969    dxil_fill_validation_state(ctx, &validation_state);
5970 
5971    if (!dxil_container_add_state_validation(&container,&ctx->mod,
5972                                             &validation_state)) {
5973       debug_printf("D3D12: failed to write state-validation\n");
5974       retval = false;
5975       goto out;
5976    }
5977 
5978    if (!dxil_container_add_module(&container, &ctx->mod)) {
5979       debug_printf("D3D12: failed to write module\n");
5980       retval = false;
5981       goto out;
5982    }
5983 
5984    if (!dxil_container_write(&container, blob)) {
5985       debug_printf("D3D12: dxil_container_write failed\n");
5986       retval = false;
5987       goto out;
5988    }
5989    dxil_container_finish(&container);
5990 
5991    if (debug_dxil & DXIL_DEBUG_DUMP_BLOB) {
5992       static int shader_id = 0;
5993       char buffer[64];
5994       snprintf(buffer, sizeof(buffer), "shader_%s_%d.blob",
5995                get_shader_kind_str(ctx->mod.shader_kind), shader_id++);
5996       debug_printf("Try to write blob to %s\n", buffer);
5997       FILE *f = fopen(buffer, "wb");
5998       if (f) {
5999          fwrite(blob->data, 1, blob->size, f);
6000          fclose(f);
6001       }
6002    }
6003 
6004 out:
6005    dxil_module_release(&ctx->mod);
6006    ralloc_free(ctx->ralloc_ctx);
6007    free(ctx);
6008    return retval;
6009 }
6010 
6011 enum dxil_sysvalue_type
nir_var_to_dxil_sysvalue_type(nir_variable * var,uint64_t other_stage_mask)6012 nir_var_to_dxil_sysvalue_type(nir_variable *var, uint64_t other_stage_mask)
6013 {
6014    switch (var->data.location) {
6015    case VARYING_SLOT_FACE:
6016       return DXIL_GENERATED_SYSVALUE;
6017    case VARYING_SLOT_POS:
6018    case VARYING_SLOT_PRIMITIVE_ID:
6019    case VARYING_SLOT_CLIP_DIST0:
6020    case VARYING_SLOT_CLIP_DIST1:
6021    case VARYING_SLOT_PSIZ:
6022    case VARYING_SLOT_TESS_LEVEL_INNER:
6023    case VARYING_SLOT_TESS_LEVEL_OUTER:
6024    case VARYING_SLOT_VIEWPORT:
6025    case VARYING_SLOT_LAYER:
6026       if (!((1ull << var->data.location) & other_stage_mask))
6027          return DXIL_SYSVALUE;
6028       FALLTHROUGH;
6029    default:
6030       return DXIL_NO_SYSVALUE;
6031    }
6032 }
6033