• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir_to_dxil.h"
25 
26 #include "dxil_container.h"
27 #include "dxil_dump.h"
28 #include "dxil_enums.h"
29 #include "dxil_function.h"
30 #include "dxil_module.h"
31 #include "dxil_nir.h"
32 #include "dxil_signature.h"
33 
34 #include "nir/nir_builder.h"
35 #include "util/u_debug.h"
36 #include "util/u_dynarray.h"
37 #include "util/u_math.h"
38 
39 #include "git_sha1.h"
40 
41 #include "vulkan/vulkan_core.h"
42 
43 #include <stdint.h>
44 
45 int debug_dxil = 0;
46 
47 static const struct debug_named_value
48 dxil_debug_options[] = {
49    { "verbose", DXIL_DEBUG_VERBOSE, NULL },
50    { "dump_blob",  DXIL_DEBUG_DUMP_BLOB , "Write shader blobs" },
51    { "trace",  DXIL_DEBUG_TRACE , "Trace instruction conversion" },
52    { "dump_module", DXIL_DEBUG_DUMP_MODULE, "dump module tree to stderr"},
53    DEBUG_NAMED_VALUE_END
54 };
55 
56 DEBUG_GET_ONCE_FLAGS_OPTION(debug_dxil, "DXIL_DEBUG", dxil_debug_options, 0)
57 
58 #define NIR_INSTR_UNSUPPORTED(instr) \
59    if (debug_dxil & DXIL_DEBUG_VERBOSE) \
60    do { \
61       fprintf(stderr, "Unsupported instruction:"); \
62       nir_print_instr(instr, stderr); \
63       fprintf(stderr, "\n"); \
64    } while (0)
65 
66 #define TRACE_CONVERSION(instr) \
67    if (debug_dxil & DXIL_DEBUG_TRACE) \
68       do { \
69          fprintf(stderr, "Convert '"); \
70          nir_print_instr(instr, stderr); \
71          fprintf(stderr, "'\n"); \
72       } while (0)
73 
74 static const nir_shader_compiler_options
75 nir_options = {
76    .lower_ineg = true,
77    .lower_fneg = true,
78    .lower_ffma16 = true,
79    .lower_ffma32 = true,
80    .lower_isign = true,
81    .lower_fsign = true,
82    .lower_iabs = true,
83    .lower_fmod = true,
84    .lower_fpow = true,
85    .lower_scmp = true,
86    .lower_ldexp = true,
87    .lower_flrp16 = true,
88    .lower_flrp32 = true,
89    .lower_flrp64 = true,
90    .lower_bitfield_extract_to_shifts = true,
91    .lower_extract_word = true,
92    .lower_extract_byte = true,
93    .lower_insert_word = true,
94    .lower_insert_byte = true,
95    .lower_all_io_to_elements = true,
96    .lower_all_io_to_temps = true,
97    .lower_hadd = true,
98    .lower_uadd_sat = true,
99    .lower_iadd_sat = true,
100    .lower_uadd_carry = true,
101    .lower_mul_high = true,
102    .lower_rotate = true,
103    .lower_pack_64_2x32_split = true,
104    .lower_pack_32_2x16_split = true,
105    .lower_unpack_64_2x32_split = true,
106    .lower_unpack_32_2x16_split = true,
107    .has_fsub = true,
108    .has_isub = true,
109    .use_scoped_barrier = true,
110    .vertex_id_zero_based = true,
111    .lower_base_vertex = true,
112    .has_cs_global_id = true,
113    .has_txs = true,
114 };
115 
116 const nir_shader_compiler_options*
dxil_get_nir_compiler_options(void)117 dxil_get_nir_compiler_options(void)
118 {
119    return &nir_options;
120 }
121 
122 static bool
emit_llvm_ident(struct dxil_module * m)123 emit_llvm_ident(struct dxil_module *m)
124 {
125    const struct dxil_mdnode *compiler = dxil_get_metadata_string(m, "Mesa version " PACKAGE_VERSION MESA_GIT_SHA1);
126    if (!compiler)
127       return false;
128 
129    const struct dxil_mdnode *llvm_ident = dxil_get_metadata_node(m, &compiler, 1);
130    return llvm_ident &&
131           dxil_add_metadata_named_node(m, "llvm.ident", &llvm_ident, 1);
132 }
133 
134 static bool
emit_named_version(struct dxil_module * m,const char * name,int major,int minor)135 emit_named_version(struct dxil_module *m, const char *name,
136                    int major, int minor)
137 {
138    const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, major);
139    const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, minor);
140    const struct dxil_mdnode *version_nodes[] = { major_node, minor_node };
141    const struct dxil_mdnode *version = dxil_get_metadata_node(m, version_nodes,
142                                                      ARRAY_SIZE(version_nodes));
143    return dxil_add_metadata_named_node(m, name, &version, 1);
144 }
145 
146 static const char *
get_shader_kind_str(enum dxil_shader_kind kind)147 get_shader_kind_str(enum dxil_shader_kind kind)
148 {
149    switch (kind) {
150    case DXIL_PIXEL_SHADER:
151       return "ps";
152    case DXIL_VERTEX_SHADER:
153       return "vs";
154    case DXIL_GEOMETRY_SHADER:
155       return "gs";
156    case DXIL_HULL_SHADER:
157       return "hs";
158    case DXIL_DOMAIN_SHADER:
159       return "ds";
160    case DXIL_COMPUTE_SHADER:
161       return "cs";
162    default:
163       unreachable("invalid shader kind");
164    }
165 }
166 
167 static bool
emit_dx_shader_model(struct dxil_module * m)168 emit_dx_shader_model(struct dxil_module *m)
169 {
170    const struct dxil_mdnode *type_node = dxil_get_metadata_string(m, get_shader_kind_str(m->shader_kind));
171    const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, m->major_version);
172    const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, m->minor_version);
173    const struct dxil_mdnode *shader_model[] = { type_node, major_node,
174                                                 minor_node };
175    const struct dxil_mdnode *dx_shader_model = dxil_get_metadata_node(m, shader_model, ARRAY_SIZE(shader_model));
176 
177    return dxil_add_metadata_named_node(m, "dx.shaderModel",
178                                        &dx_shader_model, 1);
179 }
180 
181 enum {
182    DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG = 0,
183    DXIL_STRUCTURED_BUFFER_ELEMENT_STRIDE_TAG = 1
184 };
185 
186 enum dxil_intr {
187    DXIL_INTR_LOAD_INPUT = 4,
188    DXIL_INTR_STORE_OUTPUT = 5,
189    DXIL_INTR_FABS = 6,
190    DXIL_INTR_SATURATE = 7,
191 
192    DXIL_INTR_ISFINITE = 10,
193    DXIL_INTR_ISNORMAL = 11,
194 
195    DXIL_INTR_FCOS = 12,
196    DXIL_INTR_FSIN = 13,
197 
198    DXIL_INTR_FEXP2 = 21,
199    DXIL_INTR_FRC = 22,
200    DXIL_INTR_FLOG2 = 23,
201 
202    DXIL_INTR_SQRT = 24,
203    DXIL_INTR_RSQRT = 25,
204    DXIL_INTR_ROUND_NE = 26,
205    DXIL_INTR_ROUND_NI = 27,
206    DXIL_INTR_ROUND_PI = 28,
207    DXIL_INTR_ROUND_Z = 29,
208 
209    DXIL_INTR_COUNTBITS = 31,
210    DXIL_INTR_FIRSTBIT_HI = 33,
211 
212    DXIL_INTR_FMAX = 35,
213    DXIL_INTR_FMIN = 36,
214    DXIL_INTR_IMAX = 37,
215    DXIL_INTR_IMIN = 38,
216    DXIL_INTR_UMAX = 39,
217    DXIL_INTR_UMIN = 40,
218 
219    DXIL_INTR_FMA = 47,
220 
221    DXIL_INTR_CREATE_HANDLE = 57,
222    DXIL_INTR_CBUFFER_LOAD_LEGACY = 59,
223 
224    DXIL_INTR_SAMPLE = 60,
225    DXIL_INTR_SAMPLE_BIAS = 61,
226    DXIL_INTR_SAMPLE_LEVEL = 62,
227    DXIL_INTR_SAMPLE_GRAD = 63,
228    DXIL_INTR_SAMPLE_CMP = 64,
229    DXIL_INTR_SAMPLE_CMP_LVL_ZERO = 65,
230 
231    DXIL_INTR_TEXTURE_LOAD = 66,
232    DXIL_INTR_TEXTURE_STORE = 67,
233 
234    DXIL_INTR_BUFFER_LOAD = 68,
235    DXIL_INTR_BUFFER_STORE = 69,
236 
237    DXIL_INTR_TEXTURE_SIZE = 72,
238 
239    DXIL_INTR_ATOMIC_BINOP = 78,
240    DXIL_INTR_ATOMIC_CMPXCHG = 79,
241    DXIL_INTR_BARRIER = 80,
242    DXIL_INTR_TEXTURE_LOD = 81,
243 
244    DXIL_INTR_DISCARD = 82,
245    DXIL_INTR_DDX_COARSE = 83,
246    DXIL_INTR_DDY_COARSE = 84,
247    DXIL_INTR_DDX_FINE = 85,
248    DXIL_INTR_DDY_FINE = 86,
249 
250    DXIL_INTR_SAMPLE_INDEX = 90,
251 
252    DXIL_INTR_THREAD_ID = 93,
253    DXIL_INTR_GROUP_ID = 94,
254    DXIL_INTR_THREAD_ID_IN_GROUP = 95,
255    DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP = 96,
256 
257    DXIL_INTR_EMIT_STREAM = 97,
258    DXIL_INTR_CUT_STREAM = 98,
259 
260    DXIL_INTR_MAKE_DOUBLE = 101,
261    DXIL_INTR_SPLIT_DOUBLE = 102,
262 
263    DXIL_INTR_PRIMITIVE_ID = 108,
264 
265    DXIL_INTR_LEGACY_F32TOF16 = 130,
266    DXIL_INTR_LEGACY_F16TOF32 = 131,
267 
268    DXIL_INTR_ATTRIBUTE_AT_VERTEX = 137,
269 };
270 
271 enum dxil_atomic_op {
272    DXIL_ATOMIC_ADD = 0,
273    DXIL_ATOMIC_AND = 1,
274    DXIL_ATOMIC_OR = 2,
275    DXIL_ATOMIC_XOR = 3,
276    DXIL_ATOMIC_IMIN = 4,
277    DXIL_ATOMIC_IMAX = 5,
278    DXIL_ATOMIC_UMIN = 6,
279    DXIL_ATOMIC_UMAX = 7,
280    DXIL_ATOMIC_EXCHANGE = 8,
281 };
282 
283 typedef struct {
284    unsigned id;
285    unsigned binding;
286    unsigned size;
287    unsigned space;
288 } resource_array_layout;
289 
290 static void
fill_resource_metadata(struct dxil_module * m,const struct dxil_mdnode ** fields,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout)291 fill_resource_metadata(struct dxil_module *m, const struct dxil_mdnode **fields,
292                        const struct dxil_type *struct_type,
293                        const char *name, const resource_array_layout *layout)
294 {
295    const struct dxil_type *pointer_type = dxil_module_get_pointer_type(m, struct_type);
296    const struct dxil_value *pointer_undef = dxil_module_get_undef(m, pointer_type);
297 
298    fields[0] = dxil_get_metadata_int32(m, layout->id); // resource ID
299    fields[1] = dxil_get_metadata_value(m, pointer_type, pointer_undef); // global constant symbol
300    fields[2] = dxil_get_metadata_string(m, name ? name : ""); // name
301    fields[3] = dxil_get_metadata_int32(m, layout->space); // space ID
302    fields[4] = dxil_get_metadata_int32(m, layout->binding); // lower bound
303    fields[5] = dxil_get_metadata_int32(m, layout->size); // range size
304 }
305 
306 static const struct dxil_mdnode *
emit_srv_metadata(struct dxil_module * m,const struct dxil_type * elem_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind)307 emit_srv_metadata(struct dxil_module *m, const struct dxil_type *elem_type,
308                   const char *name, const resource_array_layout *layout,
309                   enum dxil_component_type comp_type,
310                   enum dxil_resource_kind res_kind)
311 {
312    const struct dxil_mdnode *fields[9];
313 
314    const struct dxil_mdnode *metadata_tag_nodes[2];
315 
316    fill_resource_metadata(m, fields, elem_type, name, layout);
317    fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
318    fields[7] = dxil_get_metadata_int1(m, 0); // sample count
319    if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
320        res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
321       metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
322       metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
323       fields[8] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
324    } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
325       fields[8] = NULL;
326    else
327       unreachable("Structured buffers not supported yet");
328 
329    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
330 }
331 
332 static const struct dxil_mdnode *
emit_uav_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind)333 emit_uav_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
334                   const char *name, const resource_array_layout *layout,
335                   enum dxil_component_type comp_type,
336                   enum dxil_resource_kind res_kind)
337 {
338    const struct dxil_mdnode *fields[11];
339 
340    const struct dxil_mdnode *metadata_tag_nodes[2];
341 
342    fill_resource_metadata(m, fields, struct_type, name, layout);
343    fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
344    fields[7] = dxil_get_metadata_int1(m, false); // globally-coherent
345    fields[8] = dxil_get_metadata_int1(m, false); // has counter
346    fields[9] = dxil_get_metadata_int1(m, false); // is ROV
347    if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
348        res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
349       metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
350       metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
351       fields[10] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
352    } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
353       fields[10] = NULL;
354    else
355       unreachable("Structured buffers not supported yet");
356 
357    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
358 }
359 
360 static const struct dxil_mdnode *
emit_cbv_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,unsigned size)361 emit_cbv_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
362                   const char *name, const resource_array_layout *layout,
363                   unsigned size)
364 {
365    const struct dxil_mdnode *fields[8];
366 
367    fill_resource_metadata(m, fields, struct_type, name, layout);
368    fields[6] = dxil_get_metadata_int32(m, size); // constant buffer size
369    fields[7] = NULL; // metadata
370 
371    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
372 }
373 
374 static const struct dxil_mdnode *
emit_sampler_metadata(struct dxil_module * m,const struct dxil_type * struct_type,nir_variable * var,const resource_array_layout * layout)375 emit_sampler_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
376                       nir_variable *var, const resource_array_layout *layout)
377 {
378    const struct dxil_mdnode *fields[8];
379    const struct glsl_type *type = glsl_without_array(var->type);
380 
381    fill_resource_metadata(m, fields, struct_type, var->name, layout);
382    fields[6] = dxil_get_metadata_int32(m, DXIL_SAMPLER_KIND_DEFAULT); // sampler kind
383    enum dxil_sampler_kind sampler_kind = glsl_sampler_type_is_shadow(type) ?
384           DXIL_SAMPLER_KIND_COMPARISON : DXIL_SAMPLER_KIND_DEFAULT;
385    fields[6] = dxil_get_metadata_int32(m, sampler_kind); // sampler kind
386    fields[7] = NULL; // metadata
387 
388    return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
389 }
390 
391 
392 #define MAX_SRVS 128
393 #define MAX_UAVS 64
394 #define MAX_CBVS 64 // ??
395 #define MAX_SAMPLERS 64 // ??
396 
397 struct dxil_def {
398    const struct dxil_value *chans[NIR_MAX_VEC_COMPONENTS];
399 };
400 
401 struct ntd_context {
402    void *ralloc_ctx;
403    const struct nir_to_dxil_options *opts;
404    struct nir_shader *shader;
405 
406    struct dxil_module mod;
407 
408    struct util_dynarray srv_metadata_nodes;
409    const struct dxil_value *srv_handles[MAX_SRVS];
410 
411    struct util_dynarray uav_metadata_nodes;
412    const struct dxil_value *uav_handles[MAX_UAVS];
413 
414    struct util_dynarray cbv_metadata_nodes;
415    const struct dxil_value *cbv_handles[MAX_CBVS];
416 
417    struct util_dynarray sampler_metadata_nodes;
418    const struct dxil_value *sampler_handles[MAX_SAMPLERS];
419 
420    struct util_dynarray resources;
421 
422    const struct dxil_mdnode *shader_property_nodes[6];
423    size_t num_shader_property_nodes;
424 
425    struct dxil_def *defs;
426    unsigned num_defs;
427    struct hash_table *phis;
428 
429    const struct dxil_value *sharedvars;
430    const struct dxil_value *scratchvars;
431    struct hash_table *consts;
432 
433    nir_variable *ps_front_face;
434    nir_variable *system_value[SYSTEM_VALUE_MAX];
435 };
436 
437 static const char*
unary_func_name(enum dxil_intr intr)438 unary_func_name(enum dxil_intr intr)
439 {
440    switch (intr) {
441    case DXIL_INTR_COUNTBITS:
442    case DXIL_INTR_FIRSTBIT_HI:
443       return "dx.op.unaryBits";
444    case DXIL_INTR_ISFINITE:
445    case DXIL_INTR_ISNORMAL:
446       return "dx.op.isSpecialFloat";
447    default:
448       return "dx.op.unary";
449    }
450 }
451 
452 static const struct dxil_value *
emit_unary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0)453 emit_unary_call(struct ntd_context *ctx, enum overload_type overload,
454                 enum dxil_intr intr,
455                 const struct dxil_value *op0)
456 {
457    const struct dxil_func *func = dxil_get_function(&ctx->mod,
458                                                     unary_func_name(intr),
459                                                     overload);
460    if (!func)
461       return NULL;
462 
463    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
464    if (!opcode)
465       return NULL;
466 
467    const struct dxil_value *args[] = {
468      opcode,
469      op0
470    };
471 
472    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
473 }
474 
475 static const struct dxil_value *
emit_binary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)476 emit_binary_call(struct ntd_context *ctx, enum overload_type overload,
477                  enum dxil_intr intr,
478                  const struct dxil_value *op0, const struct dxil_value *op1)
479 {
480    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.binary", overload);
481    if (!func)
482       return NULL;
483 
484    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
485    if (!opcode)
486       return NULL;
487 
488    const struct dxil_value *args[] = {
489      opcode,
490      op0,
491      op1
492    };
493 
494    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
495 }
496 
497 static const struct dxil_value *
emit_tertiary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)498 emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload,
499                    enum dxil_intr intr,
500                    const struct dxil_value *op0,
501                    const struct dxil_value *op1,
502                    const struct dxil_value *op2)
503 {
504    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.tertiary", overload);
505    if (!func)
506       return NULL;
507 
508    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
509    if (!opcode)
510       return NULL;
511 
512    const struct dxil_value *args[] = {
513      opcode,
514      op0,
515      op1,
516      op2
517    };
518 
519    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
520 }
521 
522 static const struct dxil_value *
emit_threadid_call(struct ntd_context * ctx,const struct dxil_value * comp)523 emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp)
524 {
525    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadId", DXIL_I32);
526    if (!func)
527       return NULL;
528 
529    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
530        DXIL_INTR_THREAD_ID);
531    if (!opcode)
532       return NULL;
533 
534    const struct dxil_value *args[] = {
535      opcode,
536      comp
537    };
538 
539    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
540 }
541 
542 static const struct dxil_value *
emit_threadidingroup_call(struct ntd_context * ctx,const struct dxil_value * comp)543 emit_threadidingroup_call(struct ntd_context *ctx,
544                           const struct dxil_value *comp)
545 {
546    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadIdInGroup", DXIL_I32);
547 
548    if (!func)
549       return NULL;
550 
551    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
552        DXIL_INTR_THREAD_ID_IN_GROUP);
553    if (!opcode)
554       return NULL;
555 
556    const struct dxil_value *args[] = {
557      opcode,
558      comp
559    };
560 
561    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
562 }
563 
564 static const struct dxil_value *
emit_flattenedthreadidingroup_call(struct ntd_context * ctx)565 emit_flattenedthreadidingroup_call(struct ntd_context *ctx)
566 {
567    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.flattenedThreadIdInGroup", DXIL_I32);
568 
569    if (!func)
570       return NULL;
571 
572    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
573       DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP);
574    if (!opcode)
575       return NULL;
576 
577    const struct dxil_value *args[] = {
578      opcode
579    };
580 
581    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
582 }
583 
584 static const struct dxil_value *
emit_groupid_call(struct ntd_context * ctx,const struct dxil_value * comp)585 emit_groupid_call(struct ntd_context *ctx, const struct dxil_value *comp)
586 {
587    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.groupId", DXIL_I32);
588 
589    if (!func)
590       return NULL;
591 
592    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
593        DXIL_INTR_GROUP_ID);
594    if (!opcode)
595       return NULL;
596 
597    const struct dxil_value *args[] = {
598      opcode,
599      comp
600    };
601 
602    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
603 }
604 
605 static const struct dxil_value *
emit_bufferload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],enum overload_type overload)606 emit_bufferload_call(struct ntd_context *ctx,
607                      const struct dxil_value *handle,
608                      const struct dxil_value *coord[2],
609                      enum overload_type overload)
610 {
611    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferLoad", overload);
612    if (!func)
613       return NULL;
614 
615    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
616       DXIL_INTR_BUFFER_LOAD);
617    const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1] };
618 
619    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
620 }
621 
622 static bool
emit_bufferstore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)623 emit_bufferstore_call(struct ntd_context *ctx,
624                       const struct dxil_value *handle,
625                       const struct dxil_value *coord[2],
626                       const struct dxil_value *value[4],
627                       const struct dxil_value *write_mask,
628                       enum overload_type overload)
629 {
630    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferStore", overload);
631 
632    if (!func)
633       return false;
634 
635    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
636       DXIL_INTR_BUFFER_STORE);
637    const struct dxil_value *args[] = {
638       opcode, handle, coord[0], coord[1],
639       value[0], value[1], value[2], value[3],
640       write_mask
641    };
642 
643    return dxil_emit_call_void(&ctx->mod, func,
644                               args, ARRAY_SIZE(args));
645 }
646 
647 static const struct dxil_value *
emit_textureload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],enum overload_type overload)648 emit_textureload_call(struct ntd_context *ctx,
649                       const struct dxil_value *handle,
650                       const struct dxil_value *coord[3],
651                       enum overload_type overload)
652 {
653    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", overload);
654    if (!func)
655       return NULL;
656    const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
657    const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
658 
659    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
660       DXIL_INTR_TEXTURE_LOAD);
661    const struct dxil_value *args[] = { opcode, handle,
662       /*lod_or_sample*/ int_undef,
663       coord[0], coord[1], coord[2],
664       /* offsets */ int_undef, int_undef, int_undef};
665 
666    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
667 }
668 
669 static bool
emit_texturestore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)670 emit_texturestore_call(struct ntd_context *ctx,
671                        const struct dxil_value *handle,
672                        const struct dxil_value *coord[3],
673                        const struct dxil_value *value[4],
674                        const struct dxil_value *write_mask,
675                        enum overload_type overload)
676 {
677    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureStore", overload);
678 
679    if (!func)
680       return false;
681 
682    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
683       DXIL_INTR_TEXTURE_STORE);
684    const struct dxil_value *args[] = {
685       opcode, handle, coord[0], coord[1], coord[2],
686       value[0], value[1], value[2], value[3],
687       write_mask
688    };
689 
690    return dxil_emit_call_void(&ctx->mod, func,
691                               args, ARRAY_SIZE(args));
692 }
693 
694 static const struct dxil_value *
emit_atomic_binop(struct ntd_context * ctx,const struct dxil_value * handle,enum dxil_atomic_op atomic_op,const struct dxil_value * coord[3],const struct dxil_value * value)695 emit_atomic_binop(struct ntd_context *ctx,
696                   const struct dxil_value *handle,
697                   enum dxil_atomic_op atomic_op,
698                   const struct dxil_value *coord[3],
699                   const struct dxil_value *value)
700 {
701    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicBinOp", DXIL_I32);
702 
703    if (!func)
704       return false;
705 
706    const struct dxil_value *opcode =
707       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_BINOP);
708    const struct dxil_value *atomic_op_value =
709       dxil_module_get_int32_const(&ctx->mod, atomic_op);
710    const struct dxil_value *args[] = {
711       opcode, handle, atomic_op_value,
712       coord[0], coord[1], coord[2], value
713    };
714 
715    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
716 }
717 
718 static const struct dxil_value *
emit_atomic_cmpxchg(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * cmpval,const struct dxil_value * newval)719 emit_atomic_cmpxchg(struct ntd_context *ctx,
720                     const struct dxil_value *handle,
721                     const struct dxil_value *coord[3],
722                     const struct dxil_value *cmpval,
723                     const struct dxil_value *newval)
724 {
725    const struct dxil_func *func =
726       dxil_get_function(&ctx->mod, "dx.op.atomicCompareExchange", DXIL_I32);
727 
728    if (!func)
729       return false;
730 
731    const struct dxil_value *opcode =
732       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_CMPXCHG);
733    const struct dxil_value *args[] = {
734       opcode, handle, coord[0], coord[1], coord[2], cmpval, newval
735    };
736 
737    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
738 }
739 
740 static const struct dxil_value *
emit_createhandle_call(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)741 emit_createhandle_call(struct ntd_context *ctx,
742                        enum dxil_resource_class resource_class,
743                        unsigned resource_range_id,
744                        const struct dxil_value *resource_range_index,
745                        bool non_uniform_resource_index)
746 {
747    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE);
748    const struct dxil_value *resource_class_value = dxil_module_get_int8_const(&ctx->mod, resource_class);
749    const struct dxil_value *resource_range_id_value = dxil_module_get_int32_const(&ctx->mod, resource_range_id);
750    const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
751    if (!opcode || !resource_class_value || !resource_range_id_value ||
752        !non_uniform_resource_index_value)
753       return NULL;
754 
755    const struct dxil_value *args[] = {
756       opcode,
757       resource_class_value,
758       resource_range_id_value,
759       resource_range_index,
760       non_uniform_resource_index_value
761    };
762 
763    const struct dxil_func *func =
764          dxil_get_function(&ctx->mod, "dx.op.createHandle", DXIL_NONE);
765 
766    if (!func)
767          return NULL;
768 
769    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
770 }
771 
772 static const struct dxil_value *
emit_createhandle_call_const_index(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned resource_range_id,unsigned resource_range_index,bool non_uniform_resource_index)773 emit_createhandle_call_const_index(struct ntd_context *ctx,
774                                    enum dxil_resource_class resource_class,
775                                    unsigned resource_range_id,
776                                    unsigned resource_range_index,
777                                    bool non_uniform_resource_index)
778 {
779 
780    const struct dxil_value *resource_range_index_value = dxil_module_get_int32_const(&ctx->mod, resource_range_index);
781    if (!resource_range_index_value)
782       return NULL;
783 
784    return emit_createhandle_call(ctx, resource_class, resource_range_id,
785                                  resource_range_index_value,
786                                  non_uniform_resource_index);
787 }
788 
789 static void
add_resource(struct ntd_context * ctx,enum dxil_resource_type type,const resource_array_layout * layout)790 add_resource(struct ntd_context *ctx, enum dxil_resource_type type,
791              const resource_array_layout *layout)
792 {
793    struct dxil_resource *resource = util_dynarray_grow(&ctx->resources, struct dxil_resource, 1);
794    resource->resource_type = type;
795    resource->space = layout->space;
796    resource->lower_bound = layout->binding;
797    if (layout->size == 0 || (uint64_t)layout->size + layout->binding >= UINT_MAX)
798       resource->upper_bound = UINT_MAX;
799    else
800       resource->upper_bound = layout->binding + layout->size - 1;
801 }
802 
803 static unsigned
get_resource_id(struct ntd_context * ctx,enum dxil_resource_class class,unsigned space,unsigned binding)804 get_resource_id(struct ntd_context *ctx, enum dxil_resource_class class,
805                 unsigned space, unsigned binding)
806 {
807    unsigned offset = 0;
808    unsigned count = 0;
809 
810    unsigned num_srvs = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
811    unsigned num_uavs = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
812    unsigned num_cbvs = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
813    unsigned num_samplers = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
814 
815    switch (class) {
816    case DXIL_RESOURCE_CLASS_UAV:
817       offset = num_srvs + num_samplers + num_cbvs;
818       count = num_uavs;
819       break;
820    case DXIL_RESOURCE_CLASS_SRV:
821       offset = num_samplers + num_cbvs;
822       count = num_srvs;
823       break;
824    case DXIL_RESOURCE_CLASS_SAMPLER:
825       offset = num_cbvs;
826       count = num_samplers;
827       break;
828    case DXIL_RESOURCE_CLASS_CBV:
829       offset = 0;
830       count = num_cbvs;
831       break;
832    }
833 
834    assert(offset + count <= util_dynarray_num_elements(&ctx->resources, struct dxil_resource));
835    for (unsigned i = offset; i < offset + count; ++i) {
836       const struct dxil_resource *resource = util_dynarray_element(&ctx->resources, struct dxil_resource, i);
837       if (resource->space == space &&
838           resource->lower_bound <= binding &&
839           resource->upper_bound >= binding) {
840          return i - offset;
841       }
842    }
843 
844    unreachable("Resource access for undeclared range");
845    return 0;
846 }
847 
848 static bool
emit_srv(struct ntd_context * ctx,nir_variable * var,unsigned count)849 emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned count)
850 {
851    unsigned id = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
852    unsigned binding = var->data.binding;
853    resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
854 
855    enum dxil_component_type comp_type;
856    enum dxil_resource_kind res_kind;
857    enum dxil_resource_type res_type;
858    if (var->data.mode == nir_var_mem_ssbo) {
859       comp_type = DXIL_COMP_TYPE_INVALID;
860       res_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
861       res_type = DXIL_RES_SRV_RAW;
862    } else {
863       comp_type = dxil_get_comp_type(var->type);
864       res_kind = dxil_get_resource_kind(var->type);
865       res_type = DXIL_RES_SRV_TYPED;
866    }
867    const struct dxil_type *res_type_as_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, false /* readwrite */);
868    const struct dxil_mdnode *srv_meta = emit_srv_metadata(&ctx->mod, res_type_as_type, var->name,
869                                                           &layout, comp_type, res_kind);
870 
871    if (!srv_meta)
872       return false;
873 
874    util_dynarray_append(&ctx->srv_metadata_nodes, const struct dxil_mdnode *, srv_meta);
875    add_resource(ctx, res_type, &layout);
876    if (res_type == DXIL_RES_SRV_RAW)
877       ctx->mod.raw_and_structured_buffers = true;
878 
879    if (!ctx->opts->vulkan_environment) {
880       for (unsigned i = 0; i < count; ++i) {
881          const struct dxil_value *handle =
882             emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_SRV,
883                                                id, binding + i, false);
884          if (!handle)
885             return false;
886 
887          int idx = var->data.binding + i;
888          ctx->srv_handles[idx] = handle;
889       }
890    }
891 
892    return true;
893 }
894 
895 static bool
emit_globals(struct ntd_context * ctx,unsigned size)896 emit_globals(struct ntd_context *ctx, unsigned size)
897 {
898    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo)
899       size++;
900 
901    if (!size)
902       return true;
903 
904    const struct dxil_type *struct_type = dxil_module_get_res_type(&ctx->mod,
905       DXIL_RESOURCE_KIND_RAW_BUFFER, DXIL_COMP_TYPE_INVALID, true /* readwrite */);
906    if (!struct_type)
907       return false;
908 
909    const struct dxil_type *array_type =
910       dxil_module_get_array_type(&ctx->mod, struct_type, size);
911    if (!array_type)
912       return false;
913 
914    resource_array_layout layout = {0, 0, size, 0};
915    const struct dxil_mdnode *uav_meta =
916       emit_uav_metadata(&ctx->mod, array_type,
917                                    "globals", &layout,
918                                    DXIL_COMP_TYPE_INVALID,
919                                    DXIL_RESOURCE_KIND_RAW_BUFFER);
920    if (!uav_meta)
921       return false;
922 
923    util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
924    if (util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
925       ctx->mod.feats.use_64uavs = 1;
926    /* Handles to UAVs used for kernel globals are created on-demand */
927    add_resource(ctx, DXIL_RES_UAV_RAW, &layout);
928    ctx->mod.raw_and_structured_buffers = true;
929    return true;
930 }
931 
932 static bool
emit_uav(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned count,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind,const char * name)933 emit_uav(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned count,
934          enum dxil_component_type comp_type, enum dxil_resource_kind res_kind, const char *name)
935 {
936    unsigned id = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
937    resource_array_layout layout = { id, binding, count, space };
938 
939    const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, true /* readwrite */);
940    const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, res_type, name,
941                                                           &layout, comp_type, res_kind);
942 
943    if (!uav_meta)
944       return false;
945 
946    util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
947    if (util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
948       ctx->mod.feats.use_64uavs = 1;
949 
950    add_resource(ctx, res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER ? DXIL_RES_UAV_RAW : DXIL_RES_UAV_TYPED, &layout);
951    if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
952       ctx->mod.raw_and_structured_buffers = true;
953 
954    if (!ctx->opts->vulkan_environment) {
955       for (unsigned i = 0; i < count; ++i) {
956          const struct dxil_value *handle = emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_UAV,
957                                                                               id, binding + i, false);
958          if (!handle)
959             return false;
960 
961          ctx->uav_handles[binding + i] = handle;
962       }
963    }
964 
965    return true;
966 }
967 
968 static bool
emit_uav_var(struct ntd_context * ctx,nir_variable * var,unsigned count)969 emit_uav_var(struct ntd_context *ctx, nir_variable *var, unsigned count)
970 {
971    unsigned binding = var->data.binding;
972    unsigned space = var->data.descriptor_set;
973    enum dxil_component_type comp_type = dxil_get_comp_type(var->type);
974    enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type);
975    const char *name = var->name;
976 
977    return emit_uav(ctx, binding, space, count, comp_type, res_kind, name);
978 }
979 
get_dword_size(const struct glsl_type * type)980 static unsigned get_dword_size(const struct glsl_type *type)
981 {
982    if (glsl_type_is_array(type)) {
983       type = glsl_without_array(type);
984    }
985    assert(glsl_type_is_struct(type) || glsl_type_is_interface(type));
986    return glsl_get_explicit_size(type, false);
987 }
988 
989 static void
var_fill_const_array_with_vector_or_scalar(struct ntd_context * ctx,const struct nir_constant * c,const struct glsl_type * type,void * const_vals,unsigned int offset)990 var_fill_const_array_with_vector_or_scalar(struct ntd_context *ctx,
991                                            const struct nir_constant *c,
992                                            const struct glsl_type *type,
993                                            void *const_vals,
994                                            unsigned int offset)
995 {
996    assert(glsl_type_is_vector_or_scalar(type));
997    unsigned int components = glsl_get_vector_elements(type);
998    unsigned bit_size = glsl_get_bit_size(type);
999    unsigned int increment = bit_size / 8;
1000 
1001    for (unsigned int comp = 0; comp < components; comp++) {
1002       uint8_t *dst = (uint8_t *)const_vals + offset;
1003 
1004       switch (bit_size) {
1005       case 64:
1006          memcpy(dst, &c->values[comp].u64, sizeof(c->values[0].u64));
1007          break;
1008       case 32:
1009          memcpy(dst, &c->values[comp].u32, sizeof(c->values[0].u32));
1010          break;
1011       case 16:
1012          memcpy(dst, &c->values[comp].u16, sizeof(c->values[0].u16));
1013          break;
1014       case 8:
1015          assert(glsl_base_type_is_integer(glsl_get_base_type(type)));
1016          memcpy(dst, &c->values[comp].u8, sizeof(c->values[0].u8));
1017          break;
1018       default:
1019          unreachable("unexpeted bit-size");
1020       }
1021 
1022       offset += increment;
1023    }
1024 }
1025 
1026 static void
var_fill_const_array(struct ntd_context * ctx,const struct nir_constant * c,const struct glsl_type * type,void * const_vals,unsigned int offset)1027 var_fill_const_array(struct ntd_context *ctx, const struct nir_constant *c,
1028                      const struct glsl_type *type, void *const_vals,
1029                      unsigned int offset)
1030 {
1031    assert(!glsl_type_is_interface(type));
1032 
1033    if (glsl_type_is_vector_or_scalar(type)) {
1034       var_fill_const_array_with_vector_or_scalar(ctx, c, type,
1035                                                  const_vals,
1036                                                  offset);
1037    } else if (glsl_type_is_array(type)) {
1038       assert(!glsl_type_is_unsized_array(type));
1039       const struct glsl_type *without = glsl_without_array(type);
1040       unsigned stride = glsl_get_explicit_stride(without);
1041 
1042       for (unsigned elt = 0; elt < glsl_get_length(type); elt++) {
1043          var_fill_const_array(ctx, c->elements[elt], without,
1044                               const_vals, offset + (elt * stride));
1045          offset += glsl_get_cl_size(without);
1046       }
1047    } else if (glsl_type_is_struct(type)) {
1048       for (unsigned int elt = 0; elt < glsl_get_length(type); elt++) {
1049          const struct glsl_type *elt_type = glsl_get_struct_field(type, elt);
1050          unsigned field_offset = glsl_get_struct_field_offset(type, elt);
1051 
1052          var_fill_const_array(ctx, c->elements[elt],
1053                               elt_type, const_vals,
1054                               offset + field_offset);
1055       }
1056    } else
1057       unreachable("unknown GLSL type in var_fill_const_array");
1058 }
1059 
1060 static bool
emit_global_consts(struct ntd_context * ctx)1061 emit_global_consts(struct ntd_context *ctx)
1062 {
1063    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_temp) {
1064       assert(var->constant_initializer);
1065 
1066       unsigned int num_members = DIV_ROUND_UP(glsl_get_cl_size(var->type), 4);
1067       uint32_t *const_ints = ralloc_array(ctx->ralloc_ctx, uint32_t, num_members);
1068       var_fill_const_array(ctx, var->constant_initializer, var->type,
1069                                  const_ints, 0);
1070       const struct dxil_value **const_vals =
1071          ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, num_members);
1072       if (!const_vals)
1073          return false;
1074       for (int i = 0; i < num_members; i++)
1075          const_vals[i] = dxil_module_get_int32_const(&ctx->mod, const_ints[i]);
1076 
1077       const struct dxil_type *elt_type = dxil_module_get_int_type(&ctx->mod, 32);
1078       if (!elt_type)
1079          return false;
1080       const struct dxil_type *type =
1081          dxil_module_get_array_type(&ctx->mod, elt_type, num_members);
1082       if (!type)
1083          return false;
1084       const struct dxil_value *agg_vals =
1085          dxil_module_get_array_const(&ctx->mod, type, const_vals);
1086       if (!agg_vals)
1087          return false;
1088 
1089       const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name, type,
1090                                                               DXIL_AS_DEFAULT, 4,
1091                                                               agg_vals);
1092       if (!gvar)
1093          return false;
1094 
1095       if (!_mesa_hash_table_insert(ctx->consts, var, (void *)gvar))
1096          return false;
1097    }
1098 
1099    return true;
1100 }
1101 
1102 static bool
emit_cbv(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned size,unsigned count,char * name)1103 emit_cbv(struct ntd_context *ctx, unsigned binding, unsigned space,
1104          unsigned size, unsigned count, char *name)
1105 {
1106    unsigned idx = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1107 
1108    const struct dxil_type *float32 = dxil_module_get_float_type(&ctx->mod, 32);
1109    const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, float32, size);
1110    const struct dxil_type *buffer_type = dxil_module_get_struct_type(&ctx->mod, name,
1111                                                                      &array_type, 1);
1112    const struct dxil_type *final_type = count != 1 ? dxil_module_get_array_type(&ctx->mod, buffer_type, count) : buffer_type;
1113    resource_array_layout layout = {idx, binding, count, space};
1114    const struct dxil_mdnode *cbv_meta = emit_cbv_metadata(&ctx->mod, final_type,
1115                                                           name, &layout, 4 * size);
1116 
1117    if (!cbv_meta)
1118       return false;
1119 
1120    util_dynarray_append(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *, cbv_meta);
1121    add_resource(ctx, DXIL_RES_CBV, &layout);
1122 
1123    if (!ctx->opts->vulkan_environment) {
1124       for (unsigned i = 0; i < count; ++i) {
1125          const struct dxil_value *handle = emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_CBV,
1126                                                                               idx, binding + i, false);
1127          if (!handle)
1128             return false;
1129 
1130          assert(!ctx->cbv_handles[binding + i]);
1131          ctx->cbv_handles[binding + i] = handle;
1132       }
1133    }
1134 
1135    return true;
1136 }
1137 
1138 static bool
emit_ubo_var(struct ntd_context * ctx,nir_variable * var)1139 emit_ubo_var(struct ntd_context *ctx, nir_variable *var)
1140 {
1141    unsigned count = 1;
1142    if (glsl_type_is_array(var->type))
1143       count = glsl_get_length(var->type);
1144    return emit_cbv(ctx, var->data.binding, var->data.descriptor_set, get_dword_size(var->type), count, var->name);
1145 }
1146 
1147 static bool
emit_sampler(struct ntd_context * ctx,nir_variable * var,unsigned count)1148 emit_sampler(struct ntd_context *ctx, nir_variable *var, unsigned count)
1149 {
1150    unsigned id = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1151    unsigned binding = var->data.binding;
1152    resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1153    const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
1154    const struct dxil_type *sampler_type = dxil_module_get_struct_type(&ctx->mod, "struct.SamplerState", &int32_type, 1);
1155    const struct dxil_mdnode *sampler_meta = emit_sampler_metadata(&ctx->mod, sampler_type, var, &layout);
1156 
1157    if (!sampler_meta)
1158       return false;
1159 
1160    util_dynarray_append(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *, sampler_meta);
1161    add_resource(ctx, DXIL_RES_SAMPLER, &layout);
1162 
1163    if (!ctx->opts->vulkan_environment) {
1164       for (unsigned i = 0; i < count; ++i) {
1165          const struct dxil_value *handle =
1166             emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_SAMPLER,
1167                                                id, binding + i, false);
1168          if (!handle)
1169             return false;
1170 
1171          unsigned idx = var->data.binding + i;
1172          ctx->sampler_handles[idx] = handle;
1173       }
1174    }
1175 
1176    return true;
1177 }
1178 
1179 static const struct dxil_mdnode *
emit_gs_state(struct ntd_context * ctx)1180 emit_gs_state(struct ntd_context *ctx)
1181 {
1182    const struct dxil_mdnode *gs_state_nodes[5];
1183    const nir_shader *s = ctx->shader;
1184 
1185    gs_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, dxil_get_input_primitive(s->info.gs.input_primitive));
1186    gs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.vertices_out);
1187    gs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.active_stream_mask);
1188    gs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, dxil_get_primitive_topology(s->info.gs.output_primitive));
1189    gs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.invocations);
1190 
1191    for (unsigned i = 0; i < ARRAY_SIZE(gs_state_nodes); ++i) {
1192       if (!gs_state_nodes[i])
1193          return NULL;
1194    }
1195 
1196    return dxil_get_metadata_node(&ctx->mod, gs_state_nodes, ARRAY_SIZE(gs_state_nodes));
1197 }
1198 
1199 static const struct dxil_mdnode *
emit_threads(struct ntd_context * ctx)1200 emit_threads(struct ntd_context *ctx)
1201 {
1202    const nir_shader *s = ctx->shader;
1203    const struct dxil_mdnode *threads_x = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[0], 1));
1204    const struct dxil_mdnode *threads_y = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[1], 1));
1205    const struct dxil_mdnode *threads_z = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[2], 1));
1206    if (!threads_x || !threads_y || !threads_z)
1207       return false;
1208 
1209    const struct dxil_mdnode *threads_nodes[] = { threads_x, threads_y, threads_z };
1210    return dxil_get_metadata_node(&ctx->mod, threads_nodes, ARRAY_SIZE(threads_nodes));
1211 }
1212 
1213 static int64_t
get_module_flags(struct ntd_context * ctx)1214 get_module_flags(struct ntd_context *ctx)
1215 {
1216    /* See the DXIL documentation for the definition of these flags:
1217     *
1218     * https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-flags
1219     */
1220 
1221    uint64_t flags = 0;
1222    if (ctx->mod.feats.doubles)
1223       flags |= (1 << 2);
1224    if (ctx->mod.raw_and_structured_buffers)
1225       flags |= (1 << 4);
1226    if (ctx->mod.feats.min_precision)
1227       flags |= (1 << 5);
1228    if (ctx->mod.feats.dx11_1_double_extensions)
1229       flags |= (1 << 6);
1230    if (ctx->mod.feats.inner_coverage)
1231       flags |= (1 << 10);
1232    if (ctx->mod.feats.typed_uav_load_additional_formats)
1233       flags |= (1 << 13);
1234    if (ctx->mod.feats.use_64uavs)
1235       flags |= (1 << 15);
1236    if (ctx->mod.feats.cs_4x_raw_sb)
1237       flags |= (1 << 17);
1238    if (ctx->mod.feats.wave_ops)
1239       flags |= (1 << 19);
1240    if (ctx->mod.feats.int64_ops)
1241       flags |= (1 << 20);
1242    if (ctx->mod.feats.stencil_ref)
1243       flags |= (1 << 11);
1244    if (ctx->mod.feats.native_low_precision)
1245       flags |= (1 << 23) | (1 << 5);
1246 
1247    if (ctx->opts->disable_math_refactoring)
1248       flags |= (1 << 1);
1249 
1250    return flags;
1251 }
1252 
1253 static const struct dxil_mdnode *
emit_entrypoint(struct ntd_context * ctx,const struct dxil_func * func,const char * name,const struct dxil_mdnode * signatures,const struct dxil_mdnode * resources,const struct dxil_mdnode * shader_props)1254 emit_entrypoint(struct ntd_context *ctx,
1255                 const struct dxil_func *func, const char *name,
1256                 const struct dxil_mdnode *signatures,
1257                 const struct dxil_mdnode *resources,
1258                 const struct dxil_mdnode *shader_props)
1259 {
1260    const struct dxil_mdnode *func_md = dxil_get_metadata_func(&ctx->mod, func);
1261    const struct dxil_mdnode *name_md = dxil_get_metadata_string(&ctx->mod, name);
1262    const struct dxil_mdnode *nodes[] = {
1263       func_md,
1264       name_md,
1265       signatures,
1266       resources,
1267       shader_props
1268    };
1269    return dxil_get_metadata_node(&ctx->mod, nodes,
1270                                  ARRAY_SIZE(nodes));
1271 }
1272 
1273 static const struct dxil_mdnode *
emit_resources(struct ntd_context * ctx)1274 emit_resources(struct ntd_context *ctx)
1275 {
1276    bool emit_resources = false;
1277    const struct dxil_mdnode *resources_nodes[] = {
1278       NULL, NULL, NULL, NULL
1279    };
1280 
1281 #define ARRAY_AND_SIZE(arr) arr.data, util_dynarray_num_elements(&arr, const struct dxil_mdnode *)
1282 
1283    if (ctx->srv_metadata_nodes.size) {
1284       resources_nodes[0] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->srv_metadata_nodes));
1285       emit_resources = true;
1286    }
1287 
1288    if (ctx->uav_metadata_nodes.size) {
1289       resources_nodes[1] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->uav_metadata_nodes));
1290       emit_resources = true;
1291    }
1292 
1293    if (ctx->cbv_metadata_nodes.size) {
1294       resources_nodes[2] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->cbv_metadata_nodes));
1295       emit_resources = true;
1296    }
1297 
1298    if (ctx->sampler_metadata_nodes.size) {
1299       resources_nodes[3] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->sampler_metadata_nodes));
1300       emit_resources = true;
1301    }
1302 
1303 #undef ARRAY_AND_SIZE
1304 
1305    return emit_resources ?
1306       dxil_get_metadata_node(&ctx->mod, resources_nodes, ARRAY_SIZE(resources_nodes)): NULL;
1307 }
1308 
1309 static boolean
emit_tag(struct ntd_context * ctx,enum dxil_shader_tag tag,const struct dxil_mdnode * value_node)1310 emit_tag(struct ntd_context *ctx, enum dxil_shader_tag tag,
1311          const struct dxil_mdnode *value_node)
1312 {
1313    const struct dxil_mdnode *tag_node = dxil_get_metadata_int32(&ctx->mod, tag);
1314    if (!tag_node || !value_node)
1315       return false;
1316    assert(ctx->num_shader_property_nodes <= ARRAY_SIZE(ctx->shader_property_nodes) - 2);
1317    ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = tag_node;
1318    ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = value_node;
1319 
1320    return true;
1321 }
1322 
1323 static bool
emit_metadata(struct ntd_context * ctx)1324 emit_metadata(struct ntd_context *ctx)
1325 {
1326    unsigned dxilMinor = ctx->mod.minor_version;
1327    if (!emit_llvm_ident(&ctx->mod) ||
1328        !emit_named_version(&ctx->mod, "dx.version", 1, dxilMinor) ||
1329        !emit_named_version(&ctx->mod, "dx.valver", 1, 4) ||
1330        !emit_dx_shader_model(&ctx->mod))
1331       return false;
1332 
1333    const struct dxil_type *void_type = dxil_module_get_void_type(&ctx->mod);
1334    const struct dxil_type *main_func_type = dxil_module_add_function_type(&ctx->mod, void_type, NULL, 0);
1335    const struct dxil_func *main_func = dxil_add_function_def(&ctx->mod, "main", main_func_type);
1336    if (!main_func)
1337       return false;
1338 
1339    const struct dxil_mdnode *resources_node = emit_resources(ctx);
1340 
1341    const struct dxil_mdnode *main_entrypoint = dxil_get_metadata_func(&ctx->mod, main_func);
1342    const struct dxil_mdnode *node27 = dxil_get_metadata_node(&ctx->mod, NULL, 0);
1343 
1344    const struct dxil_mdnode *node4 = dxil_get_metadata_int32(&ctx->mod, 0);
1345    const struct dxil_mdnode *nodes_4_27_27[] = {
1346       node4, node27, node27
1347    };
1348    const struct dxil_mdnode *node28 = dxil_get_metadata_node(&ctx->mod, nodes_4_27_27,
1349                                                       ARRAY_SIZE(nodes_4_27_27));
1350 
1351    const struct dxil_mdnode *node29 = dxil_get_metadata_node(&ctx->mod, &node28, 1);
1352 
1353    const struct dxil_mdnode *node3 = dxil_get_metadata_int32(&ctx->mod, 1);
1354    const struct dxil_mdnode *main_type_annotation_nodes[] = {
1355       node3, main_entrypoint, node29
1356    };
1357    const struct dxil_mdnode *main_type_annotation = dxil_get_metadata_node(&ctx->mod, main_type_annotation_nodes,
1358                                                                            ARRAY_SIZE(main_type_annotation_nodes));
1359 
1360    if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {
1361       if (!emit_tag(ctx, DXIL_SHADER_TAG_GS_STATE, emit_gs_state(ctx)))
1362          return false;
1363    } else if (ctx->mod.shader_kind == DXIL_COMPUTE_SHADER) {
1364       if (!emit_tag(ctx, DXIL_SHADER_TAG_NUM_THREADS, emit_threads(ctx)))
1365          return false;
1366    }
1367 
1368    uint64_t flags = get_module_flags(ctx);
1369    if (flags != 0) {
1370       if (!emit_tag(ctx, DXIL_SHADER_TAG_FLAGS, dxil_get_metadata_int64(&ctx->mod, flags)))
1371          return false;
1372    }
1373    const struct dxil_mdnode *shader_properties = NULL;
1374    if (ctx->num_shader_property_nodes > 0) {
1375       shader_properties = dxil_get_metadata_node(&ctx->mod, ctx->shader_property_nodes,
1376                                                  ctx->num_shader_property_nodes);
1377       if (!shader_properties)
1378          return false;
1379    }
1380 
1381    const struct dxil_mdnode *signatures = get_signatures(&ctx->mod, ctx->shader,
1382                                                          ctx->opts->vulkan_environment);
1383 
1384    const struct dxil_mdnode *dx_entry_point = emit_entrypoint(ctx, main_func,
1385        "main", signatures, resources_node, shader_properties);
1386    if (!dx_entry_point)
1387       return false;
1388 
1389    if (resources_node) {
1390       const struct dxil_mdnode *dx_resources = resources_node;
1391       dxil_add_metadata_named_node(&ctx->mod, "dx.resources",
1392                                        &dx_resources, 1);
1393    }
1394 
1395    const struct dxil_mdnode *dx_type_annotations[] = { main_type_annotation };
1396    return dxil_add_metadata_named_node(&ctx->mod, "dx.typeAnnotations",
1397                                        dx_type_annotations,
1398                                        ARRAY_SIZE(dx_type_annotations)) &&
1399           dxil_add_metadata_named_node(&ctx->mod, "dx.entryPoints",
1400                                        &dx_entry_point, 1);
1401 }
1402 
1403 static const struct dxil_value *
bitcast_to_int(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)1404 bitcast_to_int(struct ntd_context *ctx, unsigned bit_size,
1405                const struct dxil_value *value)
1406 {
1407    const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size);
1408    if (!type)
1409       return NULL;
1410 
1411    return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
1412 }
1413 
1414 static const struct dxil_value *
bitcast_to_float(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)1415 bitcast_to_float(struct ntd_context *ctx, unsigned bit_size,
1416                  const struct dxil_value *value)
1417 {
1418    const struct dxil_type *type = dxil_module_get_float_type(&ctx->mod, bit_size);
1419    if (!type)
1420       return NULL;
1421 
1422    return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
1423 }
1424 
1425 static void
store_ssa_def(struct ntd_context * ctx,nir_ssa_def * ssa,unsigned chan,const struct dxil_value * value)1426 store_ssa_def(struct ntd_context *ctx, nir_ssa_def *ssa, unsigned chan,
1427               const struct dxil_value *value)
1428 {
1429    assert(ssa->index < ctx->num_defs);
1430    assert(chan < ssa->num_components);
1431    /* We pre-defined the dest value because of a phi node, so bitcast while storing if the
1432     * base type differs */
1433    if (ctx->defs[ssa->index].chans[chan]) {
1434       const struct dxil_type *expect_type = dxil_value_get_type(ctx->defs[ssa->index].chans[chan]);
1435       const struct dxil_type *value_type = dxil_value_get_type(value);
1436       if (dxil_type_to_nir_type(expect_type) != dxil_type_to_nir_type(value_type))
1437          value = dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, expect_type, value);
1438    }
1439    ctx->defs[ssa->index].chans[chan] = value;
1440 }
1441 
1442 static void
store_dest_value(struct ntd_context * ctx,nir_dest * dest,unsigned chan,const struct dxil_value * value)1443 store_dest_value(struct ntd_context *ctx, nir_dest *dest, unsigned chan,
1444                  const struct dxil_value *value)
1445 {
1446    assert(dest->is_ssa);
1447    assert(value);
1448    store_ssa_def(ctx, &dest->ssa, chan, value);
1449 }
1450 
1451 static void
store_dest(struct ntd_context * ctx,nir_dest * dest,unsigned chan,const struct dxil_value * value,nir_alu_type type)1452 store_dest(struct ntd_context *ctx, nir_dest *dest, unsigned chan,
1453            const struct dxil_value *value, nir_alu_type type)
1454 {
1455    switch (nir_alu_type_get_base_type(type)) {
1456    case nir_type_float:
1457       if (nir_dest_bit_size(*dest) == 64)
1458          ctx->mod.feats.doubles = true;
1459       FALLTHROUGH;
1460    case nir_type_uint:
1461    case nir_type_int:
1462       if (nir_dest_bit_size(*dest) == 16)
1463          ctx->mod.feats.native_low_precision = true;
1464       if (nir_dest_bit_size(*dest) == 64)
1465          ctx->mod.feats.int64_ops = true;
1466       FALLTHROUGH;
1467    case nir_type_bool:
1468       store_dest_value(ctx, dest, chan, value);
1469       break;
1470    default:
1471       unreachable("unexpected nir_alu_type");
1472    }
1473 }
1474 
1475 static void
store_alu_dest(struct ntd_context * ctx,nir_alu_instr * alu,unsigned chan,const struct dxil_value * value)1476 store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan,
1477                const struct dxil_value *value)
1478 {
1479    assert(!alu->dest.saturate);
1480    store_dest(ctx, &alu->dest.dest, chan, value,
1481               nir_op_infos[alu->op].output_type);
1482 }
1483 
1484 static const struct dxil_value *
get_src_ssa(struct ntd_context * ctx,const nir_ssa_def * ssa,unsigned chan)1485 get_src_ssa(struct ntd_context *ctx, const nir_ssa_def *ssa, unsigned chan)
1486 {
1487    assert(ssa->index < ctx->num_defs);
1488    assert(chan < ssa->num_components);
1489    assert(ctx->defs[ssa->index].chans[chan]);
1490    return ctx->defs[ssa->index].chans[chan];
1491 }
1492 
1493 static const struct dxil_value *
get_src(struct ntd_context * ctx,nir_src * src,unsigned chan,nir_alu_type type)1494 get_src(struct ntd_context *ctx, nir_src *src, unsigned chan,
1495         nir_alu_type type)
1496 {
1497    assert(src->is_ssa);
1498    const struct dxil_value *value = get_src_ssa(ctx, src->ssa, chan);
1499 
1500    const int bit_size = nir_src_bit_size(*src);
1501 
1502    switch (nir_alu_type_get_base_type(type)) {
1503    case nir_type_int:
1504    case nir_type_uint: {
1505       assert(bit_size != 64 || ctx->mod.feats.int64_ops);
1506       const struct dxil_type *expect_type =  dxil_module_get_int_type(&ctx->mod, bit_size);
1507       /* nohing to do */
1508       if (dxil_value_type_equal_to(value, expect_type))
1509          return value;
1510       assert(dxil_value_type_bitsize_equal_to(value, bit_size));
1511       return bitcast_to_int(ctx,  bit_size, value);
1512       }
1513 
1514    case nir_type_float:
1515       assert(nir_src_bit_size(*src) >= 16);
1516       assert(nir_src_bit_size(*src) != 64 || (ctx->mod.feats.doubles &&
1517                                               ctx->mod.feats.int64_ops));
1518       if (dxil_value_type_equal_to(value, dxil_module_get_float_type(&ctx->mod, bit_size)))
1519          return value;
1520       assert(dxil_value_type_bitsize_equal_to(value, bit_size));
1521       return bitcast_to_float(ctx, bit_size, value);
1522 
1523    case nir_type_bool:
1524       if (!dxil_value_type_bitsize_equal_to(value, 1)) {
1525          return dxil_emit_cast(&ctx->mod, DXIL_CAST_TRUNC,
1526                                dxil_module_get_int_type(&ctx->mod, 1), value);
1527       }
1528       return value;
1529 
1530    default:
1531       unreachable("unexpected nir_alu_type");
1532    }
1533 }
1534 
1535 static const struct dxil_type *
get_alu_src_type(struct ntd_context * ctx,nir_alu_instr * alu,unsigned src)1536 get_alu_src_type(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)
1537 {
1538    assert(!alu->src[src].abs);
1539    assert(!alu->src[src].negate);
1540    nir_ssa_def *ssa_src = alu->src[src].src.ssa;
1541    unsigned chan = alu->src[src].swizzle[0];
1542    const struct dxil_value *value = get_src_ssa(ctx, ssa_src, chan);
1543    return dxil_value_get_type(value);
1544 }
1545 
1546 static const struct dxil_value *
get_alu_src(struct ntd_context * ctx,nir_alu_instr * alu,unsigned src)1547 get_alu_src(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)
1548 {
1549    assert(!alu->src[src].abs);
1550    assert(!alu->src[src].negate);
1551 
1552    unsigned chan = alu->src[src].swizzle[0];
1553    return get_src(ctx, &alu->src[src].src, chan,
1554                   nir_op_infos[alu->op].input_types[src]);
1555 }
1556 
1557 static bool
emit_binop(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)1558 emit_binop(struct ntd_context *ctx, nir_alu_instr *alu,
1559            enum dxil_bin_opcode opcode,
1560            const struct dxil_value *op0, const struct dxil_value *op1)
1561 {
1562    bool is_float_op = nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float;
1563 
1564    enum dxil_opt_flags flags = 0;
1565    if (is_float_op && !alu->exact)
1566       flags |= DXIL_UNSAFE_ALGEBRA;
1567 
1568    const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, flags);
1569    if (!v)
1570       return false;
1571    store_alu_dest(ctx, alu, 0, v);
1572    return true;
1573 }
1574 
1575 static bool
emit_shift(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)1576 emit_shift(struct ntd_context *ctx, nir_alu_instr *alu,
1577            enum dxil_bin_opcode opcode,
1578            const struct dxil_value *op0, const struct dxil_value *op1)
1579 {
1580    unsigned op0_bit_size = nir_src_bit_size(alu->src[0].src);
1581    unsigned op1_bit_size = nir_src_bit_size(alu->src[1].src);
1582    if (op0_bit_size != op1_bit_size) {
1583       const struct dxil_type *type =
1584          dxil_module_get_int_type(&ctx->mod, op0_bit_size);
1585       enum dxil_cast_opcode cast_op =
1586          op1_bit_size < op0_bit_size ? DXIL_CAST_ZEXT : DXIL_CAST_TRUNC;
1587       op1 = dxil_emit_cast(&ctx->mod, cast_op, type, op1);
1588    }
1589 
1590    const struct dxil_value *v =
1591       dxil_emit_binop(&ctx->mod, opcode, op0, op1, 0);
1592    if (!v)
1593       return false;
1594    store_alu_dest(ctx, alu, 0, v);
1595    return true;
1596 }
1597 
1598 static bool
emit_cmp(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_cmp_pred pred,const struct dxil_value * op0,const struct dxil_value * op1)1599 emit_cmp(struct ntd_context *ctx, nir_alu_instr *alu,
1600          enum dxil_cmp_pred pred,
1601          const struct dxil_value *op0, const struct dxil_value *op1)
1602 {
1603    const struct dxil_value *v = dxil_emit_cmp(&ctx->mod, pred, op0, op1);
1604    if (!v)
1605       return false;
1606    store_alu_dest(ctx, alu, 0, v);
1607    return true;
1608 }
1609 
1610 static enum dxil_cast_opcode
get_cast_op(nir_alu_instr * alu)1611 get_cast_op(nir_alu_instr *alu)
1612 {
1613    unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1614    unsigned src_bits = nir_src_bit_size(alu->src[0].src);
1615 
1616    switch (alu->op) {
1617    /* bool -> int */
1618    case nir_op_b2i16:
1619    case nir_op_b2i32:
1620    case nir_op_b2i64:
1621       return DXIL_CAST_ZEXT;
1622 
1623    /* float -> float */
1624    case nir_op_f2f16_rtz:
1625    case nir_op_f2f32:
1626    case nir_op_f2f64:
1627       assert(dst_bits != src_bits);
1628       if (dst_bits < src_bits)
1629          return DXIL_CAST_FPTRUNC;
1630       else
1631          return DXIL_CAST_FPEXT;
1632 
1633    /* int -> int */
1634    case nir_op_i2i16:
1635    case nir_op_i2i32:
1636    case nir_op_i2i64:
1637       assert(dst_bits != src_bits);
1638       if (dst_bits < src_bits)
1639          return DXIL_CAST_TRUNC;
1640       else
1641          return DXIL_CAST_SEXT;
1642 
1643    /* uint -> uint */
1644    case nir_op_u2u16:
1645    case nir_op_u2u32:
1646    case nir_op_u2u64:
1647       assert(dst_bits != src_bits);
1648       if (dst_bits < src_bits)
1649          return DXIL_CAST_TRUNC;
1650       else
1651          return DXIL_CAST_ZEXT;
1652 
1653    /* float -> int */
1654    case nir_op_f2i16:
1655    case nir_op_f2i32:
1656    case nir_op_f2i64:
1657       return DXIL_CAST_FPTOSI;
1658 
1659    /* float -> uint */
1660    case nir_op_f2u16:
1661    case nir_op_f2u32:
1662    case nir_op_f2u64:
1663       return DXIL_CAST_FPTOUI;
1664 
1665    /* int -> float */
1666    case nir_op_i2f16:
1667    case nir_op_i2f32:
1668    case nir_op_i2f64:
1669       return DXIL_CAST_SITOFP;
1670 
1671    /* uint -> float */
1672    case nir_op_u2f16:
1673    case nir_op_u2f32:
1674    case nir_op_u2f64:
1675       return DXIL_CAST_UITOFP;
1676 
1677    default:
1678       unreachable("unexpected cast op");
1679    }
1680 }
1681 
1682 static const struct dxil_type *
get_cast_dest_type(struct ntd_context * ctx,nir_alu_instr * alu)1683 get_cast_dest_type(struct ntd_context *ctx, nir_alu_instr *alu)
1684 {
1685    unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1686    switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type)) {
1687    case nir_type_bool:
1688       assert(dst_bits == 1);
1689       FALLTHROUGH;
1690    case nir_type_int:
1691    case nir_type_uint:
1692       return dxil_module_get_int_type(&ctx->mod, dst_bits);
1693 
1694    case nir_type_float:
1695       return dxil_module_get_float_type(&ctx->mod, dst_bits);
1696 
1697    default:
1698       unreachable("unknown nir_alu_type");
1699    }
1700 }
1701 
1702 static bool
is_double(nir_alu_type alu_type,unsigned bit_size)1703 is_double(nir_alu_type alu_type, unsigned bit_size)
1704 {
1705    return nir_alu_type_get_base_type(alu_type) == nir_type_float &&
1706           bit_size == 64;
1707 }
1708 
1709 static bool
emit_cast(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * value)1710 emit_cast(struct ntd_context *ctx, nir_alu_instr *alu,
1711           const struct dxil_value *value)
1712 {
1713    enum dxil_cast_opcode opcode = get_cast_op(alu);
1714    const struct dxil_type *type = get_cast_dest_type(ctx, alu);
1715    if (!type)
1716       return false;
1717 
1718    const nir_op_info *info = &nir_op_infos[alu->op];
1719    switch (opcode) {
1720    case DXIL_CAST_UITOFP:
1721    case DXIL_CAST_SITOFP:
1722       if (is_double(info->output_type, nir_dest_bit_size(alu->dest.dest)))
1723          ctx->mod.feats.dx11_1_double_extensions = true;
1724       break;
1725    case DXIL_CAST_FPTOUI:
1726    case DXIL_CAST_FPTOSI:
1727       if (is_double(info->input_types[0], nir_src_bit_size(alu->src[0].src)))
1728          ctx->mod.feats.dx11_1_double_extensions = true;
1729       break;
1730    default:
1731       break;
1732    }
1733 
1734    const struct dxil_value *v = dxil_emit_cast(&ctx->mod, opcode, type,
1735                                                value);
1736    if (!v)
1737       return false;
1738    store_alu_dest(ctx, alu, 0, v);
1739    return true;
1740 }
1741 
1742 static enum overload_type
get_overload(nir_alu_type alu_type,unsigned bit_size)1743 get_overload(nir_alu_type alu_type, unsigned bit_size)
1744 {
1745    switch (nir_alu_type_get_base_type(alu_type)) {
1746    case nir_type_int:
1747    case nir_type_uint:
1748       switch (bit_size) {
1749       case 16: return DXIL_I16;
1750       case 32: return DXIL_I32;
1751       case 64: return DXIL_I64;
1752       default:
1753          unreachable("unexpected bit_size");
1754       }
1755    case nir_type_float:
1756       switch (bit_size) {
1757       case 16: return DXIL_F16;
1758       case 32: return DXIL_F32;
1759       case 64: return DXIL_F64;
1760       default:
1761          unreachable("unexpected bit_size");
1762       }
1763    default:
1764       unreachable("unexpected output type");
1765    }
1766 }
1767 
1768 static bool
emit_unary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op)1769 emit_unary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
1770                  enum dxil_intr intr, const struct dxil_value *op)
1771 {
1772    const nir_op_info *info = &nir_op_infos[alu->op];
1773    unsigned src_bits = nir_src_bit_size(alu->src[0].src);
1774    enum overload_type overload = get_overload(info->input_types[0], src_bits);
1775 
1776    const struct dxil_value *v = emit_unary_call(ctx, overload, intr, op);
1777    if (!v)
1778       return false;
1779    store_alu_dest(ctx, alu, 0, v);
1780    return true;
1781 }
1782 
1783 static bool
emit_binary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)1784 emit_binary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
1785                   enum dxil_intr intr,
1786                   const struct dxil_value *op0, const struct dxil_value *op1)
1787 {
1788    const nir_op_info *info = &nir_op_infos[alu->op];
1789    assert(info->output_type == info->input_types[0]);
1790    assert(info->output_type == info->input_types[1]);
1791    unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1792    assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
1793    assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
1794    enum overload_type overload = get_overload(info->output_type, dst_bits);
1795 
1796    const struct dxil_value *v = emit_binary_call(ctx, overload, intr,
1797                                                  op0, op1);
1798    if (!v)
1799       return false;
1800    store_alu_dest(ctx, alu, 0, v);
1801    return true;
1802 }
1803 
1804 static bool
emit_tertiary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)1805 emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
1806                     enum dxil_intr intr,
1807                     const struct dxil_value *op0,
1808                     const struct dxil_value *op1,
1809                     const struct dxil_value *op2)
1810 {
1811    const nir_op_info *info = &nir_op_infos[alu->op];
1812    assert(info->output_type == info->input_types[0]);
1813    assert(info->output_type == info->input_types[1]);
1814    assert(info->output_type == info->input_types[2]);
1815 
1816    unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1817    assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
1818    assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
1819    assert(nir_src_bit_size(alu->src[2].src) == dst_bits);
1820 
1821    enum overload_type overload = get_overload(info->output_type, dst_bits);
1822 
1823    const struct dxil_value *v = emit_tertiary_call(ctx, overload, intr,
1824                                                    op0, op1, op2);
1825    if (!v)
1826       return false;
1827    store_alu_dest(ctx, alu, 0, v);
1828    return true;
1829 }
1830 
emit_select(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * sel,const struct dxil_value * val_true,const struct dxil_value * val_false)1831 static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu,
1832                         const struct dxil_value *sel,
1833                         const struct dxil_value *val_true,
1834                         const struct dxil_value *val_false)
1835 {
1836    assert(sel);
1837    assert(val_true);
1838    assert(val_false);
1839 
1840    const struct dxil_value *v = dxil_emit_select(&ctx->mod, sel, val_true, val_false);
1841    if (!v)
1842       return false;
1843 
1844    store_alu_dest(ctx, alu, 0, v);
1845    return true;
1846 }
1847 
1848 static bool
emit_b2f16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)1849 emit_b2f16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
1850 {
1851    assert(val);
1852 
1853    struct dxil_module *m = &ctx->mod;
1854 
1855    const struct dxil_value *c1 = dxil_module_get_float16_const(m, 0x3C00);
1856    const struct dxil_value *c0 = dxil_module_get_float16_const(m, 0);
1857 
1858    if (!c0 || !c1)
1859       return false;
1860 
1861    return emit_select(ctx, alu, val, c1, c0);
1862 }
1863 
1864 static bool
emit_b2f32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)1865 emit_b2f32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
1866 {
1867    assert(val);
1868 
1869    struct dxil_module *m = &ctx->mod;
1870 
1871    const struct dxil_value *c1 = dxil_module_get_float_const(m, 1.0f);
1872    const struct dxil_value *c0 = dxil_module_get_float_const(m, 0.0f);
1873 
1874    if (!c0 || !c1)
1875       return false;
1876 
1877    return emit_select(ctx, alu, val, c1, c0);
1878 }
1879 
1880 static bool
emit_f2b32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)1881 emit_f2b32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
1882 {
1883    assert(val);
1884 
1885    const struct dxil_value *zero = dxil_module_get_float_const(&ctx->mod, 0.0f);
1886    return emit_cmp(ctx, alu, DXIL_FCMP_UNE, val, zero);
1887 }
1888 
1889 static bool
emit_ufind_msb(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)1890 emit_ufind_msb(struct ntd_context *ctx, nir_alu_instr *alu,
1891                const struct dxil_value *val)
1892 {
1893    const nir_op_info *info = &nir_op_infos[alu->op];
1894    unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1895    unsigned src_bits = nir_src_bit_size(alu->src[0].src);
1896    enum overload_type overload = get_overload(info->output_type, src_bits);
1897 
1898    const struct dxil_value *v = emit_unary_call(ctx, overload,
1899                                                 DXIL_INTR_FIRSTBIT_HI, val);
1900    if (!v)
1901       return false;
1902 
1903    const struct dxil_value *size = dxil_module_get_int32_const(&ctx->mod,
1904       src_bits - 1);
1905    const struct dxil_value *zero = dxil_module_get_int_const(&ctx->mod, 0,
1906                                                              src_bits);
1907    if (!size || !zero)
1908       return false;
1909 
1910    v = dxil_emit_binop(&ctx->mod, DXIL_BINOP_SUB, size, v, 0);
1911    const struct dxil_value *cnd = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_NE,
1912                                                 val, zero);
1913    if (!v || !cnd)
1914       return false;
1915 
1916    const struct dxil_value *minus_one =
1917       dxil_module_get_int_const(&ctx->mod, -1, dst_bits);
1918    if (!minus_one)
1919       return false;
1920 
1921    v = dxil_emit_select(&ctx->mod, cnd, v, minus_one);
1922    if (!v)
1923       return false;
1924 
1925    store_alu_dest(ctx, alu, 0, v);
1926    return true;
1927 }
1928 
1929 static bool
emit_f16tof32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)1930 emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
1931 {
1932    const struct dxil_func *func = dxil_get_function(&ctx->mod,
1933                                                     "dx.op.legacyF16ToF32",
1934                                                     DXIL_NONE);
1935    if (!func)
1936       return false;
1937 
1938    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F16TOF32);
1939    if (!opcode)
1940       return false;
1941 
1942    const struct dxil_value *args[] = {
1943      opcode,
1944      val
1945    };
1946 
1947    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1948    if (!v)
1949       return false;
1950    store_alu_dest(ctx, alu, 0, v);
1951    return true;
1952 }
1953 
1954 static bool
emit_f32tof16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)1955 emit_f32tof16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
1956 {
1957    const struct dxil_func *func = dxil_get_function(&ctx->mod,
1958                                                     "dx.op.legacyF32ToF16",
1959                                                     DXIL_NONE);
1960    if (!func)
1961       return false;
1962 
1963    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F32TOF16);
1964    if (!opcode)
1965       return false;
1966 
1967    const struct dxil_value *args[] = {
1968      opcode,
1969      val
1970    };
1971 
1972    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1973    if (!v)
1974       return false;
1975    store_alu_dest(ctx, alu, 0, v);
1976    return true;
1977 }
1978 
1979 static bool
emit_vec(struct ntd_context * ctx,nir_alu_instr * alu,unsigned num_inputs)1980 emit_vec(struct ntd_context *ctx, nir_alu_instr *alu, unsigned num_inputs)
1981 {
1982    const struct dxil_type *type = get_alu_src_type(ctx, alu, 0);
1983    nir_alu_type t = dxil_type_to_nir_type(type);
1984 
1985    for (unsigned i = 0; i < num_inputs; i++) {
1986       const struct dxil_value *src =
1987          get_src(ctx, &alu->src[i].src, alu->src[i].swizzle[0], t);
1988       if (!src)
1989          return false;
1990 
1991       store_alu_dest(ctx, alu, i, src);
1992    }
1993    return true;
1994 }
1995 
1996 static bool
emit_make_double(struct ntd_context * ctx,nir_alu_instr * alu)1997 emit_make_double(struct ntd_context *ctx, nir_alu_instr *alu)
1998 {
1999    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.makeDouble", DXIL_F64);
2000    if (!func)
2001       return false;
2002 
2003    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_MAKE_DOUBLE);
2004    if (!opcode)
2005       return false;
2006 
2007    const struct dxil_value *args[3] = {
2008       opcode,
2009       get_src(ctx, &alu->src[0].src, 0, nir_type_uint32),
2010       get_src(ctx, &alu->src[0].src, 1, nir_type_uint32),
2011    };
2012    if (!args[1] || !args[2])
2013       return false;
2014 
2015    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2016    if (!v)
2017       return false;
2018    store_dest(ctx, &alu->dest.dest, 0, v, nir_type_float64);
2019    return true;
2020 }
2021 
2022 static bool
emit_split_double(struct ntd_context * ctx,nir_alu_instr * alu)2023 emit_split_double(struct ntd_context *ctx, nir_alu_instr *alu)
2024 {
2025    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.splitDouble", DXIL_F64);
2026    if (!func)
2027       return false;
2028 
2029    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SPLIT_DOUBLE);
2030    if (!opcode)
2031       return false;
2032 
2033    const struct dxil_value *args[] = {
2034       opcode,
2035       get_src(ctx, &alu->src[0].src, 0, nir_type_float64)
2036    };
2037    if (!args[1])
2038       return false;
2039 
2040    const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2041    if (!v)
2042       return false;
2043 
2044    const struct dxil_value *hi = dxil_emit_extractval(&ctx->mod, v, 0);
2045    const struct dxil_value *lo = dxil_emit_extractval(&ctx->mod, v, 1);
2046    if (!hi || !lo)
2047       return false;
2048 
2049    store_dest_value(ctx, &alu->dest.dest, 0, hi);
2050    store_dest_value(ctx, &alu->dest.dest, 1, lo);
2051    return true;
2052 }
2053 
2054 static bool
emit_alu(struct ntd_context * ctx,nir_alu_instr * alu)2055 emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
2056 {
2057    /* handle vec-instructions first; they are the only ones that produce
2058     * vector results.
2059     */
2060    switch (alu->op) {
2061    case nir_op_vec2:
2062    case nir_op_vec3:
2063    case nir_op_vec4:
2064    case nir_op_vec8:
2065    case nir_op_vec16:
2066       return emit_vec(ctx, alu, nir_op_infos[alu->op].num_inputs);
2067    case nir_op_mov: {
2068          assert(nir_dest_num_components(alu->dest.dest) == 1);
2069          store_ssa_def(ctx, &alu->dest.dest.ssa, 0, get_src_ssa(ctx,
2070                         alu->src->src.ssa, alu->src->swizzle[0]));
2071          return true;
2072       }
2073    case nir_op_pack_double_2x32_dxil:
2074       return emit_make_double(ctx, alu);
2075    case nir_op_unpack_double_2x32_dxil:
2076       return emit_split_double(ctx, alu);
2077    default:
2078       /* silence warnings */
2079       ;
2080    }
2081 
2082    /* other ops should be scalar */
2083    assert(alu->dest.write_mask == 1);
2084    const struct dxil_value *src[4];
2085    assert(nir_op_infos[alu->op].num_inputs <= 4);
2086    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
2087       src[i] = get_alu_src(ctx, alu, i);
2088       if (!src[i])
2089          return false;
2090    }
2091 
2092    switch (alu->op) {
2093    case nir_op_iadd:
2094    case nir_op_fadd: return emit_binop(ctx, alu, DXIL_BINOP_ADD, src[0], src[1]);
2095 
2096    case nir_op_isub:
2097    case nir_op_fsub: return emit_binop(ctx, alu, DXIL_BINOP_SUB, src[0], src[1]);
2098 
2099    case nir_op_imul:
2100    case nir_op_fmul: return emit_binop(ctx, alu, DXIL_BINOP_MUL, src[0], src[1]);
2101 
2102    case nir_op_idiv:
2103    case nir_op_fdiv: return emit_binop(ctx, alu, DXIL_BINOP_SDIV, src[0], src[1]);
2104 
2105    case nir_op_udiv: return emit_binop(ctx, alu, DXIL_BINOP_UDIV, src[0], src[1]);
2106    case nir_op_irem: return emit_binop(ctx, alu, DXIL_BINOP_SREM, src[0], src[1]);
2107    case nir_op_imod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2108    case nir_op_umod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2109    case nir_op_ishl: return emit_shift(ctx, alu, DXIL_BINOP_SHL, src[0], src[1]);
2110    case nir_op_ishr: return emit_shift(ctx, alu, DXIL_BINOP_ASHR, src[0], src[1]);
2111    case nir_op_ushr: return emit_shift(ctx, alu, DXIL_BINOP_LSHR, src[0], src[1]);
2112    case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]);
2113    case nir_op_ior:  return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]);
2114    case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]);
2115    case nir_op_inot: {
2116       unsigned bit_size = alu->dest.dest.ssa.bit_size;
2117       intmax_t val = bit_size == 1 ? 1 : -1;
2118       const struct dxil_value *negative_one = dxil_module_get_int_const(&ctx->mod, val, bit_size);
2119       return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], negative_one);
2120    }
2121    case nir_op_ieq:  return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]);
2122    case nir_op_ine:  return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]);
2123    case nir_op_ige:  return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]);
2124    case nir_op_uge:  return emit_cmp(ctx, alu, DXIL_ICMP_UGE, src[0], src[1]);
2125    case nir_op_ilt:  return emit_cmp(ctx, alu, DXIL_ICMP_SLT, src[0], src[1]);
2126    case nir_op_ult:  return emit_cmp(ctx, alu, DXIL_ICMP_ULT, src[0], src[1]);
2127    case nir_op_feq:  return emit_cmp(ctx, alu, DXIL_FCMP_OEQ, src[0], src[1]);
2128    case nir_op_fneu: return emit_cmp(ctx, alu, DXIL_FCMP_UNE, src[0], src[1]);
2129    case nir_op_flt:  return emit_cmp(ctx, alu, DXIL_FCMP_OLT, src[0], src[1]);
2130    case nir_op_fge:  return emit_cmp(ctx, alu, DXIL_FCMP_OGE, src[0], src[1]);
2131    case nir_op_bcsel: return emit_select(ctx, alu, src[0], src[1], src[2]);
2132    case nir_op_ftrunc: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_Z, src[0]);
2133    case nir_op_fabs: return emit_unary_intin(ctx, alu, DXIL_INTR_FABS, src[0]);
2134    case nir_op_fcos: return emit_unary_intin(ctx, alu, DXIL_INTR_FCOS, src[0]);
2135    case nir_op_fsin: return emit_unary_intin(ctx, alu, DXIL_INTR_FSIN, src[0]);
2136    case nir_op_fceil: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_PI, src[0]);
2137    case nir_op_fexp2: return emit_unary_intin(ctx, alu, DXIL_INTR_FEXP2, src[0]);
2138    case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]);
2139    case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]);
2140    case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]);
2141    case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]);
2142    case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]);
2143 
2144    case nir_op_fddx:
2145    case nir_op_fddx_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_COARSE, src[0]);
2146    case nir_op_fddx_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_FINE, src[0]);
2147    case nir_op_fddy:
2148    case nir_op_fddy_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_COARSE, src[0]);
2149    case nir_op_fddy_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_FINE, src[0]);
2150 
2151    case nir_op_fround_even: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NE, src[0]);
2152    case nir_op_frcp: {
2153          const struct dxil_value *one = dxil_module_get_float_const(&ctx->mod, 1.0f);
2154          return emit_binop(ctx, alu, DXIL_BINOP_SDIV, one, src[0]);
2155       }
2156    case nir_op_fsat: return emit_unary_intin(ctx, alu, DXIL_INTR_SATURATE, src[0]);
2157    case nir_op_bit_count: return emit_unary_intin(ctx, alu, DXIL_INTR_COUNTBITS, src[0]);
2158    case nir_op_ufind_msb: return emit_ufind_msb(ctx, alu, src[0]);
2159    case nir_op_imax: return emit_binary_intin(ctx, alu, DXIL_INTR_IMAX, src[0], src[1]);
2160    case nir_op_imin: return emit_binary_intin(ctx, alu, DXIL_INTR_IMIN, src[0], src[1]);
2161    case nir_op_umax: return emit_binary_intin(ctx, alu, DXIL_INTR_UMAX, src[0], src[1]);
2162    case nir_op_umin: return emit_binary_intin(ctx, alu, DXIL_INTR_UMIN, src[0], src[1]);
2163    case nir_op_frsq: return emit_unary_intin(ctx, alu, DXIL_INTR_RSQRT, src[0]);
2164    case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]);
2165    case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]);
2166    case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]);
2167    case nir_op_ffma: return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]);
2168 
2169    case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0]);
2170    case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0]);
2171 
2172    case nir_op_b2i16:
2173    case nir_op_i2i16:
2174    case nir_op_f2i16:
2175    case nir_op_f2u16:
2176    case nir_op_u2u16:
2177    case nir_op_u2f16:
2178    case nir_op_i2f16:
2179    case nir_op_f2f16_rtz:
2180    case nir_op_b2i32:
2181    case nir_op_f2f32:
2182    case nir_op_f2i32:
2183    case nir_op_f2u32:
2184    case nir_op_i2f32:
2185    case nir_op_i2i32:
2186    case nir_op_u2f32:
2187    case nir_op_u2u32:
2188    case nir_op_b2i64:
2189    case nir_op_f2f64:
2190    case nir_op_f2i64:
2191    case nir_op_f2u64:
2192    case nir_op_i2f64:
2193    case nir_op_i2i64:
2194    case nir_op_u2f64:
2195    case nir_op_u2u64:
2196       return emit_cast(ctx, alu, src[0]);
2197 
2198    case nir_op_f2b32: return emit_f2b32(ctx, alu, src[0]);
2199    case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]);
2200    case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]);
2201    default:
2202       NIR_INSTR_UNSUPPORTED(&alu->instr);
2203       assert("Unimplemented ALU instruction");
2204       return false;
2205    }
2206 }
2207 
2208 static const struct dxil_value *
load_ubo(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * offset,enum overload_type overload)2209 load_ubo(struct ntd_context *ctx, const struct dxil_value *handle,
2210          const struct dxil_value *offset, enum overload_type overload)
2211 {
2212    assert(handle && offset);
2213 
2214    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CBUFFER_LOAD_LEGACY);
2215    if (!opcode)
2216       return NULL;
2217 
2218    const struct dxil_value *args[] = {
2219       opcode, handle, offset
2220    };
2221 
2222    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cbufferLoadLegacy", overload);
2223    if (!func)
2224       return NULL;
2225    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2226 }
2227 
2228 static bool
emit_barrier(struct ntd_context * ctx,nir_intrinsic_instr * intr)2229 emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2230 {
2231    const struct dxil_value *opcode, *mode;
2232    const struct dxil_func *func;
2233    uint32_t flags = 0;
2234 
2235    if (nir_intrinsic_execution_scope(intr) == NIR_SCOPE_WORKGROUP)
2236       flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP;
2237 
2238    nir_variable_mode modes = nir_intrinsic_memory_modes(intr);
2239    nir_scope mem_scope = nir_intrinsic_memory_scope(intr);
2240 
2241    /* Currently vtn uses uniform to indicate image memory, which DXIL considers global */
2242    if (modes & nir_var_uniform)
2243       modes |= nir_var_mem_global;
2244 
2245    if (modes & (nir_var_mem_ssbo | nir_var_mem_global)) {
2246       if (mem_scope > NIR_SCOPE_WORKGROUP)
2247          flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL;
2248       else
2249          flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP;
2250    }
2251 
2252    if (modes & nir_var_mem_shared)
2253       flags |= DXIL_BARRIER_MODE_GROUPSHARED_MEM_FENCE;
2254 
2255    func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE);
2256    if (!func)
2257       return false;
2258 
2259    opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER);
2260    if (!opcode)
2261       return false;
2262 
2263    mode = dxil_module_get_int32_const(&ctx->mod, flags);
2264    if (!mode)
2265       return false;
2266 
2267    const struct dxil_value *args[] = { opcode, mode };
2268 
2269    return dxil_emit_call_void(&ctx->mod, func,
2270                               args, ARRAY_SIZE(args));
2271 }
2272 
2273 static bool
emit_load_global_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)2274 emit_load_global_invocation_id(struct ntd_context *ctx,
2275                                     nir_intrinsic_instr *intr)
2276 {
2277    assert(intr->dest.is_ssa);
2278    nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
2279 
2280    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2281       if (comps & (1 << i)) {
2282          const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
2283          if (!idx)
2284             return false;
2285          const struct dxil_value *globalid = emit_threadid_call(ctx, idx);
2286 
2287          if (!globalid)
2288             return false;
2289 
2290          store_dest_value(ctx, &intr->dest, i, globalid);
2291       }
2292    }
2293    return true;
2294 }
2295 
2296 static bool
emit_load_local_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)2297 emit_load_local_invocation_id(struct ntd_context *ctx,
2298                               nir_intrinsic_instr *intr)
2299 {
2300    assert(intr->dest.is_ssa);
2301    nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
2302 
2303    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2304       if (comps & (1 << i)) {
2305          const struct dxil_value
2306             *idx = dxil_module_get_int32_const(&ctx->mod, i);
2307          if (!idx)
2308             return false;
2309          const struct dxil_value
2310             *threadidingroup = emit_threadidingroup_call(ctx, idx);
2311          if (!threadidingroup)
2312             return false;
2313          store_dest_value(ctx, &intr->dest, i, threadidingroup);
2314       }
2315    }
2316    return true;
2317 }
2318 
2319 static bool
emit_load_local_invocation_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)2320 emit_load_local_invocation_index(struct ntd_context *ctx,
2321                                  nir_intrinsic_instr *intr)
2322 {
2323    assert(intr->dest.is_ssa);
2324 
2325    const struct dxil_value
2326       *flattenedthreadidingroup = emit_flattenedthreadidingroup_call(ctx);
2327    if (!flattenedthreadidingroup)
2328       return false;
2329    store_dest_value(ctx, &intr->dest, 0, flattenedthreadidingroup);
2330 
2331    return true;
2332 }
2333 
2334 static bool
emit_load_local_workgroup_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)2335 emit_load_local_workgroup_id(struct ntd_context *ctx,
2336                               nir_intrinsic_instr *intr)
2337 {
2338    assert(intr->dest.is_ssa);
2339    nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
2340 
2341    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2342       if (comps & (1 << i)) {
2343          const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
2344          if (!idx)
2345             return false;
2346          const struct dxil_value *groupid = emit_groupid_call(ctx, idx);
2347          if (!groupid)
2348             return false;
2349          store_dest_value(ctx, &intr->dest, i, groupid);
2350       }
2351    }
2352    return true;
2353 }
2354 
2355 static bool
emit_load_unary_external_function(struct ntd_context * ctx,nir_intrinsic_instr * intr,const char * name,int32_t dxil_intr)2356 emit_load_unary_external_function(struct ntd_context *ctx,
2357                                   nir_intrinsic_instr *intr, const char *name,
2358                                   int32_t dxil_intr)
2359 {
2360    const struct dxil_func *func =
2361       dxil_get_function(&ctx->mod, name, DXIL_I32);
2362    if (!func)
2363       return false;
2364 
2365    const struct dxil_value *opcode =
2366       dxil_module_get_int32_const(&ctx->mod, dxil_intr);
2367    if (!opcode)
2368       return false;
2369 
2370    const struct dxil_value *args[] = {opcode};
2371 
2372    const struct dxil_value *value =
2373       dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2374    store_dest_value(ctx, &intr->dest, 0, value);
2375 
2376    return true;
2377 }
2378 
2379 static const struct dxil_value *
get_int32_undef(struct dxil_module * m)2380 get_int32_undef(struct dxil_module *m)
2381 {
2382    const struct dxil_type *int32_type =
2383       dxil_module_get_int_type(m, 32);
2384    if (!int32_type)
2385       return NULL;
2386 
2387    return dxil_module_get_undef(m, int32_type);
2388 }
2389 
2390 static const struct dxil_value *
emit_gep_for_index(struct ntd_context * ctx,const nir_variable * var,const struct dxil_value * index)2391 emit_gep_for_index(struct ntd_context *ctx, const nir_variable *var,
2392                    const struct dxil_value *index)
2393 {
2394    assert(var->data.mode == nir_var_shader_temp);
2395 
2396    struct hash_entry *he = _mesa_hash_table_search(ctx->consts, var);
2397    assert(he != NULL);
2398    const struct dxil_value *ptr = he->data;
2399 
2400    const struct dxil_value *zero = dxil_module_get_int32_const(&ctx->mod, 0);
2401    if (!zero)
2402       return NULL;
2403 
2404    const struct dxil_value *ops[] = { ptr, zero, index };
2405    return dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
2406 }
2407 
2408 static const struct dxil_value *
get_ubo_ssbo_handle(struct ntd_context * ctx,nir_src * src,enum dxil_resource_class class,unsigned base_binding)2409 get_ubo_ssbo_handle(struct ntd_context *ctx, nir_src *src, enum dxil_resource_class class, unsigned base_binding)
2410 {
2411    /* This source might be one of:
2412     * 1. Constant resource index - just look it up in precomputed handle arrays
2413     *    If it's null in that array, create a handle, and store the result
2414     * 2. A handle from load_vulkan_descriptor - just get the stored SSA value
2415     * 3. Dynamic resource index - create a handle for it here
2416     */
2417    assert(src->ssa->num_components == 1 && src->ssa->bit_size == 32);
2418    nir_const_value *const_block_index = nir_src_as_const_value(*src);
2419    const struct dxil_value **handle_entry = NULL;
2420    if (const_block_index) {
2421       assert(!ctx->opts->vulkan_environment);
2422       switch (class) {
2423       case DXIL_RESOURCE_CLASS_CBV:
2424          handle_entry = &ctx->cbv_handles[const_block_index->u32];
2425          break;
2426       case DXIL_RESOURCE_CLASS_UAV:
2427          handle_entry = &ctx->uav_handles[const_block_index->u32];
2428          break;
2429       case DXIL_RESOURCE_CLASS_SRV:
2430          handle_entry = &ctx->srv_handles[const_block_index->u32];
2431          break;
2432       default:
2433          unreachable("Unexpected resource class");
2434       }
2435    }
2436 
2437    if (handle_entry && *handle_entry)
2438       return *handle_entry;
2439 
2440    const struct dxil_value *value = get_src_ssa(ctx, src->ssa, 0);
2441    if (ctx->opts->vulkan_environment) {
2442       return value;
2443    }
2444 
2445    const struct dxil_value *handle = emit_createhandle_call(ctx, class,
2446       get_resource_id(ctx, class, 0, base_binding), value, !const_block_index);
2447    if (handle_entry)
2448       *handle_entry = handle;
2449 
2450    return handle;
2451 }
2452 
2453 static bool
emit_load_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)2454 emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2455 {
2456    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
2457 
2458    nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
2459    enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
2460    if (var && var->data.access & ACCESS_NON_WRITEABLE)
2461       class = DXIL_RESOURCE_CLASS_SRV;
2462 
2463    const struct dxil_value *handle = get_ubo_ssbo_handle(ctx, &intr->src[0], class, 0);
2464    const struct dxil_value *offset =
2465       get_src(ctx, &intr->src[1], 0, nir_type_uint);
2466    if (!int32_undef || !handle || !offset)
2467       return false;
2468 
2469    assert(nir_src_bit_size(intr->src[0]) == 32);
2470    assert(nir_intrinsic_dest_components(intr) <= 4);
2471 
2472    const struct dxil_value *coord[2] = {
2473       offset,
2474       int32_undef
2475    };
2476 
2477    const struct dxil_value *load = emit_bufferload_call(ctx, handle, coord, DXIL_I32);
2478    if (!load)
2479       return false;
2480 
2481    for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2482       const struct dxil_value *val =
2483          dxil_emit_extractval(&ctx->mod, load, i);
2484       if (!val)
2485          return false;
2486       store_dest_value(ctx, &intr->dest, i, val);
2487    }
2488    return true;
2489 }
2490 
2491 static bool
emit_store_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)2492 emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2493 {
2494    const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[1], DXIL_RESOURCE_CLASS_UAV, 0);
2495    const struct dxil_value *offset =
2496       get_src(ctx, &intr->src[2], 0, nir_type_uint);
2497    if (!handle || !offset)
2498       return false;
2499 
2500    assert(nir_src_bit_size(intr->src[0]) == 32);
2501    unsigned num_components = nir_src_num_components(intr->src[0]);
2502    assert(num_components <= 4);
2503    const struct dxil_value *value[4];
2504    for (unsigned i = 0; i < num_components; ++i) {
2505       value[i] = get_src(ctx, &intr->src[0], i, nir_type_uint);
2506       if (!value[i])
2507          return false;
2508    }
2509 
2510    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
2511    if (!int32_undef)
2512       return false;
2513 
2514    const struct dxil_value *coord[2] = {
2515       offset,
2516       int32_undef
2517    };
2518 
2519    for (int i = num_components; i < 4; ++i)
2520       value[i] = int32_undef;
2521 
2522    const struct dxil_value *write_mask =
2523       dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
2524    if (!write_mask)
2525       return false;
2526 
2527    return emit_bufferstore_call(ctx, handle, coord, value, write_mask, DXIL_I32);
2528 }
2529 
2530 static bool
emit_store_ssbo_masked(struct ntd_context * ctx,nir_intrinsic_instr * intr)2531 emit_store_ssbo_masked(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2532 {
2533    const struct dxil_value *value =
2534       get_src(ctx, &intr->src[0], 0, nir_type_uint);
2535    const struct dxil_value *mask =
2536       get_src(ctx, &intr->src[1], 0, nir_type_uint);
2537    const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[2], DXIL_RESOURCE_CLASS_UAV, 0);
2538    const struct dxil_value *offset =
2539       get_src(ctx, &intr->src[3], 0, nir_type_uint);
2540    if (!value || !mask || !handle || !offset)
2541       return false;
2542 
2543    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
2544    if (!int32_undef)
2545       return false;
2546 
2547    const struct dxil_value *coord[3] = {
2548       offset, int32_undef, int32_undef
2549    };
2550 
2551    return
2552       emit_atomic_binop(ctx, handle, DXIL_ATOMIC_AND, coord, mask) != NULL &&
2553       emit_atomic_binop(ctx, handle, DXIL_ATOMIC_OR, coord, value) != NULL;
2554 }
2555 
2556 static bool
emit_store_shared(struct ntd_context * ctx,nir_intrinsic_instr * intr)2557 emit_store_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2558 {
2559    const struct dxil_value *zero, *index;
2560 
2561    /* All shared mem accesses should have been lowered to scalar 32bit
2562     * accesses.
2563     */
2564    assert(nir_src_bit_size(intr->src[0]) == 32);
2565    assert(nir_src_num_components(intr->src[0]) == 1);
2566 
2567    zero = dxil_module_get_int32_const(&ctx->mod, 0);
2568    if (!zero)
2569       return false;
2570 
2571    if (intr->intrinsic == nir_intrinsic_store_shared_dxil)
2572       index = get_src(ctx, &intr->src[1], 0, nir_type_uint);
2573    else
2574       index = get_src(ctx, &intr->src[2], 0, nir_type_uint);
2575    if (!index)
2576       return false;
2577 
2578    const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
2579    const struct dxil_value *ptr, *value;
2580 
2581    ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
2582    if (!ptr)
2583       return false;
2584 
2585    value = get_src(ctx, &intr->src[0], 0, nir_type_uint);
2586    if (!value)
2587       return false;
2588 
2589    if (intr->intrinsic == nir_intrinsic_store_shared_dxil)
2590       return dxil_emit_store(&ctx->mod, value, ptr, 4, false);
2591 
2592    const struct dxil_value *mask = get_src(ctx, &intr->src[1], 0, nir_type_uint);
2593    if (!mask)
2594       return false;
2595 
2596    if (!dxil_emit_atomicrmw(&ctx->mod, mask, ptr, DXIL_RMWOP_AND, false,
2597                             DXIL_ATOMIC_ORDERING_ACQREL,
2598                             DXIL_SYNC_SCOPE_CROSSTHREAD))
2599       return false;
2600 
2601    if (!dxil_emit_atomicrmw(&ctx->mod, value, ptr, DXIL_RMWOP_OR, false,
2602                             DXIL_ATOMIC_ORDERING_ACQREL,
2603                             DXIL_SYNC_SCOPE_CROSSTHREAD))
2604       return false;
2605 
2606    return true;
2607 }
2608 
2609 static bool
emit_store_scratch(struct ntd_context * ctx,nir_intrinsic_instr * intr)2610 emit_store_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2611 {
2612    const struct dxil_value *zero, *index;
2613 
2614    /* All scratch mem accesses should have been lowered to scalar 32bit
2615     * accesses.
2616     */
2617    assert(nir_src_bit_size(intr->src[0]) == 32);
2618    assert(nir_src_num_components(intr->src[0]) == 1);
2619 
2620    zero = dxil_module_get_int32_const(&ctx->mod, 0);
2621    if (!zero)
2622       return false;
2623 
2624    index = get_src(ctx, &intr->src[1], 0, nir_type_uint);
2625    if (!index)
2626       return false;
2627 
2628    const struct dxil_value *ops[] = { ctx->scratchvars, zero, index };
2629    const struct dxil_value *ptr, *value;
2630 
2631    ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
2632    if (!ptr)
2633       return false;
2634 
2635    value = get_src(ctx, &intr->src[0], 0, nir_type_uint);
2636    if (!value)
2637       return false;
2638 
2639    return dxil_emit_store(&ctx->mod, value, ptr, 4, false);
2640 }
2641 
2642 static bool
emit_load_ubo(struct ntd_context * ctx,nir_intrinsic_instr * intr)2643 emit_load_ubo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2644 {
2645    const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, 0);
2646    if (!handle)
2647       return false;
2648 
2649    const struct dxil_value *offset;
2650    nir_const_value *const_offset = nir_src_as_const_value(intr->src[1]);
2651    if (const_offset) {
2652       offset = dxil_module_get_int32_const(&ctx->mod, const_offset->i32 >> 4);
2653    } else {
2654       const struct dxil_value *offset_src = get_src(ctx, &intr->src[1], 0, nir_type_uint);
2655       const struct dxil_value *c4 = dxil_module_get_int32_const(&ctx->mod, 4);
2656       if (!offset_src || !c4)
2657          return false;
2658 
2659       offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ASHR, offset_src, c4, 0);
2660    }
2661 
2662    const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_F32);
2663 
2664    if (!agg)
2665       return false;
2666 
2667    for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
2668       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, agg, i);
2669       store_dest(ctx, &intr->dest, i, retval,
2670                  nir_dest_bit_size(intr->dest) > 1 ? nir_type_float : nir_type_bool);
2671    }
2672    return true;
2673 }
2674 
2675 static bool
emit_load_ubo_dxil(struct ntd_context * ctx,nir_intrinsic_instr * intr)2676 emit_load_ubo_dxil(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2677 {
2678    assert(nir_dest_num_components(intr->dest) <= 4);
2679    assert(nir_dest_bit_size(intr->dest) == 32);
2680 
2681    const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, 0);
2682    const struct dxil_value *offset =
2683       get_src(ctx, &intr->src[1], 0, nir_type_uint);
2684 
2685    if (!handle || !offset)
2686       return false;
2687 
2688    const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_I32);
2689    if (!agg)
2690       return false;
2691 
2692    for (unsigned i = 0; i < nir_dest_num_components(intr->dest); i++)
2693       store_dest_value(ctx, &intr->dest, i,
2694                        dxil_emit_extractval(&ctx->mod, agg, i));
2695 
2696    return true;
2697 }
2698 
2699 static bool
emit_store_output(struct ntd_context * ctx,nir_intrinsic_instr * intr,nir_variable * output)2700 emit_store_output(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2701                   nir_variable *output)
2702 {
2703    nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(output->type));
2704    enum overload_type overload = DXIL_F32;
2705    if (output->data.compact)
2706       out_type = nir_type_float;
2707    else
2708       overload = get_overload(out_type, glsl_get_bit_size(output->type));
2709    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.storeOutput", overload);
2710 
2711    if (!func)
2712       return false;
2713 
2714    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_STORE_OUTPUT);
2715    const struct dxil_value *output_id = dxil_module_get_int32_const(&ctx->mod, (int)output->data.driver_location);
2716    const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0);
2717 
2718    bool success = true;
2719    if (output->data.compact) {
2720       nir_deref_instr *array_deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
2721       unsigned array_index = nir_src_as_uint(array_deref->arr.index);
2722 
2723       const struct dxil_value *col = dxil_module_get_int8_const(&ctx->mod, array_index);
2724       const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, out_type);
2725       if (!col || !value)
2726          return false;
2727 
2728       const struct dxil_value *args[] = {
2729          opcode, output_id, row, col, value
2730       };
2731       success = dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
2732    } else {
2733       uint32_t writemask = nir_intrinsic_write_mask(intr);
2734       for (unsigned i = 0; i < nir_src_num_components(intr->src[1]) && success; ++i) {
2735          if (writemask & (1 << i)) {
2736             const struct dxil_value *col = dxil_module_get_int8_const(&ctx->mod, i);
2737             const struct dxil_value *value = get_src(ctx, &intr->src[1], i, out_type);
2738             if (!col || !value)
2739                return false;
2740 
2741             const struct dxil_value *args[] = {
2742                opcode, output_id, row, col, value
2743             };
2744             success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
2745          }
2746       }
2747    }
2748    return success;
2749 }
2750 
2751 static bool
emit_store_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)2752 emit_store_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2753 {
2754    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2755    nir_variable *var = nir_deref_instr_get_variable(deref);
2756 
2757    switch (var->data.mode) {
2758    case nir_var_shader_out:
2759       return emit_store_output(ctx, intr, var);
2760 
2761    default:
2762       unreachable("unsupported nir_variable_mode");
2763    }
2764 }
2765 
2766 static bool
emit_load_input_array(struct ntd_context * ctx,nir_intrinsic_instr * intr,nir_variable * var,nir_src * index)2767 emit_load_input_array(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable *var, nir_src *index)
2768 {
2769    assert(var);
2770    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LOAD_INPUT);
2771    const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, var->data.driver_location);
2772    const struct dxil_value *vertex_id;
2773    const struct dxil_value *row;
2774 
2775    if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {
2776       vertex_id = get_src(ctx, index, 0, nir_type_int);
2777       row = dxil_module_get_int32_const(&ctx->mod, 0);
2778    } else {
2779       const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
2780       vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);
2781       row = get_src(ctx, index, 0, nir_type_int);
2782    }
2783 
2784    if (!opcode || !input_id || !vertex_id || !row)
2785       return false;
2786 
2787    nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(glsl_get_array_element(var->type)));
2788    enum overload_type overload = get_overload(out_type, glsl_get_bit_size(glsl_get_array_element(var->type)));
2789 
2790    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.loadInput", overload);
2791 
2792    if (!func)
2793       return false;
2794 
2795    for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
2796       const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, i);
2797       if (!comp)
2798          return false;
2799 
2800       const struct dxil_value *args[] = {
2801          opcode, input_id, row, comp, vertex_id
2802       };
2803 
2804       const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2805       if (!retval)
2806          return false;
2807       store_dest(ctx, &intr->dest, i, retval, out_type);
2808    }
2809    return true;
2810 }
2811 
2812 static bool
emit_load_compact_input_array(struct ntd_context * ctx,nir_intrinsic_instr * intr,nir_variable * var,nir_deref_instr * deref)2813 emit_load_compact_input_array(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable *var, nir_deref_instr *deref)
2814 {
2815    assert(var);
2816    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LOAD_INPUT);
2817    const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, var->data.driver_location);
2818    const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0);
2819    const struct dxil_value *vertex_id;
2820 
2821    nir_src *col = &deref->arr.index;
2822    nir_src_is_const(*col);
2823 
2824    if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {
2825       nir_deref_instr *deref_parent = nir_deref_instr_parent(deref);
2826       assert(deref_parent->deref_type == nir_deref_type_array);
2827 
2828       vertex_id = get_src(ctx, &deref_parent->arr.index, 0, nir_type_int);
2829    } else {
2830       const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
2831       vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);
2832    }
2833 
2834    if (!opcode || !input_id || !row || !vertex_id)
2835       return false;
2836 
2837    nir_alu_type out_type = nir_type_float;
2838    enum overload_type overload = get_overload(out_type, 32);
2839 
2840    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.loadInput", overload);
2841 
2842    if (!func)
2843       return false;
2844 
2845    const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, nir_src_as_int(*col));
2846    if (!comp)
2847       return false;
2848 
2849    const struct dxil_value *args[] = {
2850       opcode, input_id, row, comp, vertex_id
2851    };
2852 
2853    const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2854    if (!retval)
2855       return false;
2856    store_dest(ctx, &intr->dest, 0, retval, out_type);
2857    return true;
2858 }
2859 
2860 static bool
emit_load_input_interpolated(struct ntd_context * ctx,nir_intrinsic_instr * intr,nir_variable * var)2861 emit_load_input_interpolated(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable *var)
2862 {
2863    assert(var);
2864    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LOAD_INPUT);
2865    const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, var->data.driver_location);
2866    const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0);
2867    const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
2868    const struct dxil_value *vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);
2869 
2870    if (!opcode || !input_id || !row || !int32_type || !vertex_id)
2871       return false;
2872 
2873    nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(var->type));
2874    enum overload_type overload = get_overload(out_type, glsl_get_bit_size(var->type));
2875 
2876    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.loadInput", overload);
2877 
2878    if (!func)
2879       return false;
2880 
2881    for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
2882       const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, i);
2883 
2884       const struct dxil_value *args[] = {
2885          opcode, input_id, row, comp, vertex_id
2886       };
2887 
2888       const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2889       if (!retval)
2890          return false;
2891       store_dest(ctx, &intr->dest, i, retval, out_type);
2892    }
2893    return true;
2894 }
2895 
2896 static bool
emit_load_input_flat(struct ntd_context * ctx,nir_intrinsic_instr * intr,nir_variable * var)2897 emit_load_input_flat(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable* var)
2898 {
2899    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATTRIBUTE_AT_VERTEX);
2900    const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, (int)var->data.driver_location);
2901    const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0);
2902    const struct dxil_value *vertex_id = dxil_module_get_int8_const(&ctx->mod, ctx->opts->provoking_vertex);
2903 
2904    nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(var->type));
2905    enum overload_type overload = get_overload(out_type, glsl_get_bit_size(var->type));
2906 
2907    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.attributeAtVertex", overload);
2908    if (!func)
2909       return false;
2910 
2911    for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
2912       const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, i);
2913       const struct dxil_value *args[] = {
2914          opcode, input_id, row, comp, vertex_id
2915       };
2916 
2917       const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2918       if (!retval)
2919          return false;
2920 
2921       store_dest(ctx, &intr->dest, i, retval, out_type);
2922    }
2923    return true;
2924 }
2925 
2926 static bool
emit_load_input(struct ntd_context * ctx,nir_intrinsic_instr * intr,nir_variable * input)2927 emit_load_input(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2928                 nir_variable *input)
2929 {
2930    if (ctx->mod.shader_kind != DXIL_PIXEL_SHADER ||
2931        input->data.interpolation != INTERP_MODE_FLAT ||
2932        !ctx->opts->interpolate_at_vertex ||
2933        ctx->opts->provoking_vertex == 0 ||
2934        glsl_type_is_integer(input->type))
2935       return emit_load_input_interpolated(ctx, intr, input);
2936    else
2937       return emit_load_input_flat(ctx, intr, input);
2938 }
2939 
2940 static bool
emit_load_ptr(struct ntd_context * ctx,nir_intrinsic_instr * intr)2941 emit_load_ptr(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2942 {
2943    struct nir_variable *var =
2944       nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
2945 
2946    const struct dxil_value *index =
2947       get_src(ctx, &intr->src[1], 0, nir_type_uint);
2948    if (!index)
2949       return false;
2950 
2951    const struct dxil_value *ptr = emit_gep_for_index(ctx, var, index);
2952    if (!ptr)
2953       return false;
2954 
2955    const struct dxil_value *retval =
2956       dxil_emit_load(&ctx->mod, ptr, 4, false);
2957    if (!retval)
2958       return false;
2959 
2960    store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
2961    return true;
2962 }
2963 
2964 static bool
emit_load_shared(struct ntd_context * ctx,nir_intrinsic_instr * intr)2965 emit_load_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2966 {
2967    const struct dxil_value *zero, *index;
2968    unsigned bit_size = nir_dest_bit_size(intr->dest);
2969    unsigned align = bit_size / 8;
2970 
2971    /* All shared mem accesses should have been lowered to scalar 32bit
2972     * accesses.
2973     */
2974    assert(bit_size == 32);
2975    assert(nir_dest_num_components(intr->dest) == 1);
2976 
2977    zero = dxil_module_get_int32_const(&ctx->mod, 0);
2978    if (!zero)
2979       return false;
2980 
2981    index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
2982    if (!index)
2983       return false;
2984 
2985    const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
2986    const struct dxil_value *ptr, *retval;
2987 
2988    ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
2989    if (!ptr)
2990       return false;
2991 
2992    retval = dxil_emit_load(&ctx->mod, ptr, align, false);
2993    if (!retval)
2994       return false;
2995 
2996    store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
2997    return true;
2998 }
2999 
3000 static bool
emit_load_scratch(struct ntd_context * ctx,nir_intrinsic_instr * intr)3001 emit_load_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3002 {
3003    const struct dxil_value *zero, *index;
3004    unsigned bit_size = nir_dest_bit_size(intr->dest);
3005    unsigned align = bit_size / 8;
3006 
3007    /* All scratch mem accesses should have been lowered to scalar 32bit
3008     * accesses.
3009     */
3010    assert(bit_size == 32);
3011    assert(nir_dest_num_components(intr->dest) == 1);
3012 
3013    zero = dxil_module_get_int32_const(&ctx->mod, 0);
3014    if (!zero)
3015       return false;
3016 
3017    index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3018    if (!index)
3019       return false;
3020 
3021    const struct dxil_value *ops[] = { ctx->scratchvars, zero, index };
3022    const struct dxil_value *ptr, *retval;
3023 
3024    ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3025    if (!ptr)
3026       return false;
3027 
3028    retval = dxil_emit_load(&ctx->mod, ptr, align, false);
3029    if (!retval)
3030       return false;
3031 
3032    store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3033    return true;
3034 }
3035 
3036 static bool
emit_load_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3037 emit_load_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3038 {
3039    assert(intr->src[0].is_ssa);
3040    nir_deref_instr *deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
3041    nir_variable *var = nir_deref_instr_get_variable(deref);
3042 
3043    switch (var->data.mode) {
3044    case nir_var_shader_in:
3045       if (glsl_type_is_array(var->type)) {
3046          if (var->data.compact)
3047             return emit_load_compact_input_array(ctx, intr, var, deref);
3048          else
3049             return emit_load_input_array(ctx, intr, var, &deref->arr.index);
3050       }
3051       return emit_load_input(ctx, intr, var);
3052 
3053    default:
3054       unreachable("unsupported nir_variable_mode");
3055    }
3056 }
3057 
3058 static bool
emit_discard_if_with_value(struct ntd_context * ctx,const struct dxil_value * value)3059 emit_discard_if_with_value(struct ntd_context *ctx, const struct dxil_value *value)
3060 {
3061    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DISCARD);
3062    if (!opcode)
3063       return false;
3064 
3065    const struct dxil_value *args[] = {
3066      opcode,
3067      value
3068    };
3069 
3070    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.discard", DXIL_NONE);
3071    if (!func)
3072       return false;
3073 
3074    return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3075 }
3076 
3077 static bool
emit_discard_if(struct ntd_context * ctx,nir_intrinsic_instr * intr)3078 emit_discard_if(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3079 {
3080    const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_bool);
3081    if (!value)
3082       return false;
3083 
3084    return emit_discard_if_with_value(ctx, value);
3085 }
3086 
3087 static bool
emit_discard(struct ntd_context * ctx)3088 emit_discard(struct ntd_context *ctx)
3089 {
3090    const struct dxil_value *value = dxil_module_get_int1_const(&ctx->mod, true);
3091    return emit_discard_if_with_value(ctx, value);
3092 }
3093 
3094 static bool
emit_emit_vertex(struct ntd_context * ctx,nir_intrinsic_instr * intr)3095 emit_emit_vertex(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3096 {
3097    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_EMIT_STREAM);
3098    const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
3099    if (!opcode || !stream_id)
3100       return false;
3101 
3102    const struct dxil_value *args[] = {
3103      opcode,
3104      stream_id
3105    };
3106 
3107    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.emitStream", DXIL_NONE);
3108    if (!func)
3109       return false;
3110 
3111    return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3112 }
3113 
3114 static bool
emit_end_primitive(struct ntd_context * ctx,nir_intrinsic_instr * intr)3115 emit_end_primitive(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3116 {
3117    const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CUT_STREAM);
3118    const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
3119    if (!opcode || !stream_id)
3120       return false;
3121 
3122    const struct dxil_value *args[] = {
3123      opcode,
3124      stream_id
3125    };
3126 
3127    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cutStream", DXIL_NONE);
3128    if (!func)
3129       return false;
3130 
3131    return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3132 }
3133 
3134 static bool
emit_image_store(struct ntd_context * ctx,nir_intrinsic_instr * intr)3135 emit_image_store(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3136 {
3137    const struct dxil_value *handle;
3138    bool is_array = false;
3139    if (ctx->opts->vulkan_environment) {
3140       assert(intr->intrinsic == nir_intrinsic_image_deref_store);
3141       handle = get_src_ssa(ctx, intr->src[0].ssa, 0);
3142       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
3143    } else {
3144       assert(intr->intrinsic == nir_intrinsic_image_store);
3145       int binding = nir_src_as_int(intr->src[0]);
3146       is_array = nir_intrinsic_image_array(intr);
3147       handle = ctx->uav_handles[binding];
3148    }
3149    if (!handle)
3150       return false;
3151 
3152    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3153    if (!int32_undef)
3154       return false;
3155 
3156    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
3157    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_store ?
3158       nir_intrinsic_image_dim(intr) :
3159       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type);
3160    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
3161    if (is_array)
3162       ++num_coords;
3163 
3164    assert(num_coords <= nir_src_num_components(intr->src[1]));
3165    for (unsigned i = 0; i < num_coords; ++i) {
3166       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
3167       if (!coord[i])
3168          return false;
3169    }
3170 
3171    nir_alu_type in_type = nir_intrinsic_src_type(intr);
3172    enum overload_type overload = get_overload(in_type, 32);
3173 
3174    assert(nir_src_bit_size(intr->src[3]) == 32);
3175    unsigned num_components = nir_src_num_components(intr->src[3]);
3176    assert(num_components <= 4);
3177    const struct dxil_value *value[4];
3178    for (unsigned i = 0; i < num_components; ++i) {
3179       value[i] = get_src(ctx, &intr->src[3], i, in_type);
3180       if (!value[i])
3181          return false;
3182    }
3183 
3184    for (int i = num_components; i < 4; ++i)
3185       value[i] = int32_undef;
3186 
3187    const struct dxil_value *write_mask =
3188       dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
3189    if (!write_mask)
3190       return false;
3191 
3192    if (image_dim == GLSL_SAMPLER_DIM_BUF) {
3193       coord[1] = int32_undef;
3194       return emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
3195    } else
3196       return emit_texturestore_call(ctx, handle, coord, value, write_mask, overload);
3197 }
3198 
3199 static bool
emit_image_load(struct ntd_context * ctx,nir_intrinsic_instr * intr)3200 emit_image_load(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3201 {
3202    const struct dxil_value *handle;
3203    bool is_array = false;
3204    if (ctx->opts->vulkan_environment) {
3205       assert(intr->intrinsic == nir_intrinsic_image_deref_load);
3206       handle = get_src_ssa(ctx, intr->src[0].ssa, 0);
3207       is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
3208    } else {
3209       assert(intr->intrinsic == nir_intrinsic_image_load);
3210       int binding = nir_src_as_int(intr->src[0]);
3211       is_array = nir_intrinsic_image_array(intr);
3212       handle = ctx->uav_handles[binding];
3213    }
3214    if (!handle)
3215       return false;
3216 
3217    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3218    if (!int32_undef)
3219       return false;
3220 
3221    const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
3222    enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_load ?
3223       nir_intrinsic_image_dim(intr) :
3224       glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type);
3225    unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
3226    if (is_array)
3227       ++num_coords;
3228 
3229    assert(num_coords <= nir_src_num_components(intr->src[1]));
3230    for (unsigned i = 0; i < num_coords; ++i) {
3231       coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
3232       if (!coord[i])
3233          return false;
3234    }
3235 
3236    nir_alu_type out_type = nir_intrinsic_dest_type(intr);
3237    enum overload_type overload = get_overload(out_type, 32);
3238 
3239    const struct dxil_value *load_result;
3240    if (image_dim == GLSL_SAMPLER_DIM_BUF) {
3241       coord[1] = int32_undef;
3242       load_result = emit_bufferload_call(ctx, handle, coord, overload);
3243    } else
3244       load_result = emit_textureload_call(ctx, handle, coord, overload);
3245 
3246    if (!load_result)
3247       return false;
3248 
3249    assert(nir_dest_bit_size(intr->dest) == 32);
3250    unsigned num_components = nir_dest_num_components(intr->dest);
3251    assert(num_components <= 4);
3252    for (unsigned i = 0; i < num_components; ++i) {
3253       const struct dxil_value *component = dxil_emit_extractval(&ctx->mod, load_result, i);
3254       if (!component)
3255          return false;
3256       store_dest(ctx, &intr->dest, i, component, out_type);
3257    }
3258 
3259    if (num_components > 1)
3260       ctx->mod.feats.typed_uav_load_additional_formats = true;
3261 
3262    return true;
3263 }
3264 
3265 struct texop_parameters {
3266    const struct dxil_value *tex;
3267    const struct dxil_value *sampler;
3268    const struct dxil_value *bias, *lod_or_sample, *min_lod;
3269    const struct dxil_value *coord[4], *offset[3], *dx[3], *dy[3];
3270    const struct dxil_value *cmp;
3271    enum overload_type overload;
3272 };
3273 
3274 static const struct dxil_value *
emit_texture_size(struct ntd_context * ctx,struct texop_parameters * params)3275 emit_texture_size(struct ntd_context *ctx, struct texop_parameters *params)
3276 {
3277    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.getDimensions", DXIL_NONE);
3278    if (!func)
3279       return false;
3280 
3281    const struct dxil_value *args[] = {
3282       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_SIZE),
3283       params->tex,
3284       params->lod_or_sample
3285    };
3286 
3287    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3288 }
3289 
3290 static bool
emit_image_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)3291 emit_image_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3292 {
3293    const struct dxil_value *handle;
3294    if (ctx->opts->vulkan_environment) {
3295       assert(intr->intrinsic == nir_intrinsic_image_deref_size);
3296       handle = get_src_ssa(ctx, intr->src[0].ssa, 0);
3297    }
3298    else {
3299       assert(intr->intrinsic == nir_intrinsic_image_size);
3300       int binding = nir_src_as_int(intr->src[0]);
3301       handle = ctx->uav_handles[binding];
3302    }
3303    if (!handle)
3304       return false;
3305 
3306    const struct dxil_value *lod = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3307    if (!lod)
3308       return false;
3309 
3310    struct texop_parameters params = {
3311       .tex = handle,
3312       .lod_or_sample = lod
3313    };
3314    const struct dxil_value *dimensions = emit_texture_size(ctx, &params);
3315    if (!dimensions)
3316       return false;
3317 
3318    for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
3319       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, i);
3320       store_dest(ctx, &intr->dest, i, retval, nir_type_uint);
3321    }
3322 
3323    return true;
3324 }
3325 
3326 static bool
emit_get_ssbo_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)3327 emit_get_ssbo_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3328 {
3329    const struct dxil_value* handle = NULL;
3330    if (ctx->opts->vulkan_environment) {
3331       handle = get_src_ssa(ctx, intr->src[0].ssa, 0);
3332    } else {
3333       int binding = nir_src_as_int(intr->src[0]);
3334       handle = ctx->uav_handles[binding];
3335    }
3336 
3337    if (!handle)
3338      return false;
3339 
3340    struct texop_parameters params = {
3341       .tex = handle,
3342       .lod_or_sample = dxil_module_get_undef(
3343                         &ctx->mod, dxil_module_get_int_type(&ctx->mod, 32))
3344    };
3345 
3346    const struct dxil_value *dimensions = emit_texture_size(ctx, &params);
3347    if (!dimensions)
3348       return false;
3349 
3350    const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, 0);
3351    store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3352 
3353    return true;
3354 }
3355 
3356 static bool
emit_ssbo_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_atomic_op op,nir_alu_type type)3357 emit_ssbo_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr,
3358                    enum dxil_atomic_op op, nir_alu_type type)
3359 {
3360    const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, 0);
3361    const struct dxil_value *offset =
3362       get_src(ctx, &intr->src[1], 0, nir_type_uint);
3363    const struct dxil_value *value =
3364       get_src(ctx, &intr->src[2], 0, type);
3365 
3366    if (!value || !handle || !offset)
3367       return false;
3368 
3369    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3370    if (!int32_undef)
3371       return false;
3372 
3373    const struct dxil_value *coord[3] = {
3374       offset, int32_undef, int32_undef
3375    };
3376 
3377    const struct dxil_value *retval =
3378       emit_atomic_binop(ctx, handle, op, coord, value);
3379 
3380    if (!retval)
3381       return false;
3382 
3383    store_dest(ctx, &intr->dest, 0, retval, type);
3384    return true;
3385 }
3386 
3387 static bool
emit_ssbo_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)3388 emit_ssbo_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3389 {
3390    const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, 0);
3391    const struct dxil_value *offset =
3392       get_src(ctx, &intr->src[1], 0, nir_type_uint);
3393    const struct dxil_value *cmpval =
3394       get_src(ctx, &intr->src[2], 0, nir_type_int);
3395    const struct dxil_value *newval =
3396       get_src(ctx, &intr->src[3], 0, nir_type_int);
3397 
3398    if (!cmpval || !newval || !handle || !offset)
3399       return false;
3400 
3401    const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3402    if (!int32_undef)
3403       return false;
3404 
3405    const struct dxil_value *coord[3] = {
3406       offset, int32_undef, int32_undef
3407    };
3408 
3409    const struct dxil_value *retval =
3410       emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
3411 
3412    if (!retval)
3413       return false;
3414 
3415    store_dest(ctx, &intr->dest, 0, retval, nir_type_int);
3416    return true;
3417 }
3418 
3419 static bool
emit_shared_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_rmw_op op,nir_alu_type type)3420 emit_shared_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr,
3421                    enum dxil_rmw_op op, nir_alu_type type)
3422 {
3423    const struct dxil_value *zero, *index;
3424 
3425    assert(nir_src_bit_size(intr->src[1]) == 32);
3426 
3427    zero = dxil_module_get_int32_const(&ctx->mod, 0);
3428    if (!zero)
3429       return false;
3430 
3431    index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3432    if (!index)
3433       return false;
3434 
3435    const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
3436    const struct dxil_value *ptr, *value, *retval;
3437 
3438    ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3439    if (!ptr)
3440       return false;
3441 
3442    value = get_src(ctx, &intr->src[1], 0, type);
3443    if (!value)
3444       return false;
3445 
3446    retval = dxil_emit_atomicrmw(&ctx->mod, value, ptr, op, false,
3447                                 DXIL_ATOMIC_ORDERING_ACQREL,
3448                                 DXIL_SYNC_SCOPE_CROSSTHREAD);
3449    if (!retval)
3450       return false;
3451 
3452    store_dest(ctx, &intr->dest, 0, retval, type);
3453    return true;
3454 }
3455 
3456 static bool
emit_shared_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)3457 emit_shared_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3458 {
3459    const struct dxil_value *zero, *index;
3460 
3461    assert(nir_src_bit_size(intr->src[1]) == 32);
3462 
3463    zero = dxil_module_get_int32_const(&ctx->mod, 0);
3464    if (!zero)
3465       return false;
3466 
3467    index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3468    if (!index)
3469       return false;
3470 
3471    const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
3472    const struct dxil_value *ptr, *cmpval, *newval, *retval;
3473 
3474    ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3475    if (!ptr)
3476       return false;
3477 
3478    cmpval = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3479    newval = get_src(ctx, &intr->src[2], 0, nir_type_uint);
3480    if (!cmpval || !newval)
3481       return false;
3482 
3483    retval = dxil_emit_cmpxchg(&ctx->mod, cmpval, newval, ptr, false,
3484                               DXIL_ATOMIC_ORDERING_ACQREL,
3485                               DXIL_SYNC_SCOPE_CROSSTHREAD);
3486    if (!retval)
3487       return false;
3488 
3489    store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3490    return true;
3491 }
3492 
3493 static bool
emit_vulkan_resource_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)3494 emit_vulkan_resource_index(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3495 {
3496    unsigned int binding = nir_intrinsic_binding(intr);
3497 
3498    bool const_index = nir_src_is_const(intr->src[0]);
3499    if (const_index) {
3500       binding += nir_src_as_const_value(intr->src[0])->u32;
3501    }
3502 
3503    const struct dxil_value *index_value = dxil_module_get_int32_const(&ctx->mod, binding);
3504    if (!index_value)
3505       return false;
3506 
3507    if (!const_index) {
3508       const struct dxil_value *offset = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
3509       if (!offset)
3510          return false;
3511 
3512       index_value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, index_value, offset, 0);
3513       if (!index_value)
3514          return false;
3515    }
3516 
3517    store_dest(ctx, &intr->dest, 0, index_value, nir_type_uint32);
3518    store_dest(ctx, &intr->dest, 1, dxil_module_get_int32_const(&ctx->mod, 0), nir_type_uint32);
3519    return true;
3520 }
3521 
3522 static bool
emit_load_vulkan_descriptor(struct ntd_context * ctx,nir_intrinsic_instr * intr)3523 emit_load_vulkan_descriptor(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3524 {
3525    nir_intrinsic_instr* index = nir_src_as_intrinsic(intr->src[0]);
3526    /* We currently do not support reindex */
3527    assert(index && index->intrinsic == nir_intrinsic_vulkan_resource_index);
3528 
3529    unsigned binding = nir_intrinsic_binding(index);
3530    unsigned space = nir_intrinsic_desc_set(index);
3531 
3532    /* The descriptor_set field for variables is only 5 bits. We shouldn't have intrinsics trying to go beyond that. */
3533    assert(space < 32);
3534 
3535    nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
3536 
3537    const struct dxil_value *handle = NULL;
3538    enum dxil_resource_class resource_class;
3539 
3540    switch (nir_intrinsic_desc_type(intr)) {
3541    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
3542       resource_class = DXIL_RESOURCE_CLASS_CBV;
3543       break;
3544    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
3545       if (var->data.access & ACCESS_NON_WRITEABLE)
3546          resource_class = DXIL_RESOURCE_CLASS_SRV;
3547       else
3548          resource_class = DXIL_RESOURCE_CLASS_UAV;
3549       break;
3550    default:
3551       unreachable("unknown descriptor type");
3552       return false;
3553    }
3554 
3555    const struct dxil_value *index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
3556    if (!index_value)
3557       return false;
3558 
3559    handle = emit_createhandle_call(ctx, resource_class,
3560       get_resource_id(ctx, resource_class, space, binding),
3561       index_value, false);
3562 
3563    store_dest_value(ctx, &intr->dest, 0, handle);
3564    store_dest(ctx, &intr->dest, 1, get_src(ctx, &intr->src[0], 1, nir_type_uint32), nir_type_uint32);
3565 
3566    return true;
3567 }
3568 
3569 static bool
emit_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3570 emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3571 {
3572    switch (intr->intrinsic) {
3573    case nir_intrinsic_load_global_invocation_id:
3574    case nir_intrinsic_load_global_invocation_id_zero_base:
3575       return emit_load_global_invocation_id(ctx, intr);
3576    case nir_intrinsic_load_local_invocation_id:
3577       return emit_load_local_invocation_id(ctx, intr);
3578    case nir_intrinsic_load_local_invocation_index:
3579       return emit_load_local_invocation_index(ctx, intr);
3580    case nir_intrinsic_load_workgroup_id:
3581    case nir_intrinsic_load_workgroup_id_zero_base:
3582       return emit_load_local_workgroup_id(ctx, intr);
3583    case nir_intrinsic_load_ssbo:
3584       return emit_load_ssbo(ctx, intr);
3585    case nir_intrinsic_store_ssbo:
3586       return emit_store_ssbo(ctx, intr);
3587    case nir_intrinsic_store_ssbo_masked_dxil:
3588       return emit_store_ssbo_masked(ctx, intr);
3589    case nir_intrinsic_store_deref:
3590       return emit_store_deref(ctx, intr);
3591    case nir_intrinsic_store_shared_dxil:
3592    case nir_intrinsic_store_shared_masked_dxil:
3593       return emit_store_shared(ctx, intr);
3594    case nir_intrinsic_store_scratch_dxil:
3595       return emit_store_scratch(ctx, intr);
3596    case nir_intrinsic_load_deref:
3597       return emit_load_deref(ctx, intr);
3598    case nir_intrinsic_load_ptr_dxil:
3599       return emit_load_ptr(ctx, intr);
3600    case nir_intrinsic_load_ubo:
3601       return emit_load_ubo(ctx, intr);
3602    case nir_intrinsic_load_ubo_dxil:
3603       return emit_load_ubo_dxil(ctx, intr);
3604    case nir_intrinsic_load_front_face:
3605       return emit_load_input_interpolated(ctx, intr,
3606                                           ctx->system_value[SYSTEM_VALUE_FRONT_FACE]);
3607    case nir_intrinsic_load_vertex_id_zero_base:
3608       return emit_load_input_interpolated(ctx, intr,
3609                                           ctx->system_value[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]);
3610    case nir_intrinsic_load_instance_id:
3611       return emit_load_input_interpolated(ctx, intr,
3612                                           ctx->system_value[SYSTEM_VALUE_INSTANCE_ID]);
3613    case nir_intrinsic_load_primitive_id:
3614       return emit_load_unary_external_function(ctx, intr, "dx.op.primitiveID",
3615                                                DXIL_INTR_PRIMITIVE_ID);
3616    case nir_intrinsic_load_sample_id:
3617       return emit_load_unary_external_function(ctx, intr, "dx.op.sampleIndex",
3618                                                DXIL_INTR_SAMPLE_INDEX);
3619    case nir_intrinsic_load_shared_dxil:
3620       return emit_load_shared(ctx, intr);
3621    case nir_intrinsic_load_scratch_dxil:
3622       return emit_load_scratch(ctx, intr);
3623    case nir_intrinsic_discard_if:
3624       return emit_discard_if(ctx, intr);
3625    case nir_intrinsic_discard:
3626       return emit_discard(ctx);
3627    case nir_intrinsic_emit_vertex:
3628       return emit_emit_vertex(ctx, intr);
3629    case nir_intrinsic_end_primitive:
3630       return emit_end_primitive(ctx, intr);
3631    case nir_intrinsic_scoped_barrier:
3632       return emit_barrier(ctx, intr);
3633    case nir_intrinsic_ssbo_atomic_add:
3634       return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_ADD, nir_type_int);
3635    case nir_intrinsic_ssbo_atomic_imin:
3636       return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMIN, nir_type_int);
3637    case nir_intrinsic_ssbo_atomic_umin:
3638       return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMIN, nir_type_uint);
3639    case nir_intrinsic_ssbo_atomic_imax:
3640       return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_int);
3641    case nir_intrinsic_ssbo_atomic_umax:
3642       return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMAX, nir_type_uint);
3643    case nir_intrinsic_ssbo_atomic_and:
3644       return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_AND, nir_type_uint);
3645    case nir_intrinsic_ssbo_atomic_or:
3646       return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_OR, nir_type_uint);
3647    case nir_intrinsic_ssbo_atomic_xor:
3648       return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_XOR, nir_type_uint);
3649    case nir_intrinsic_ssbo_atomic_exchange:
3650       return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_EXCHANGE, nir_type_int);
3651    case nir_intrinsic_ssbo_atomic_comp_swap:
3652       return emit_ssbo_atomic_comp_swap(ctx, intr);
3653    case nir_intrinsic_shared_atomic_add_dxil:
3654       return emit_shared_atomic(ctx, intr, DXIL_RMWOP_ADD, nir_type_int);
3655    case nir_intrinsic_shared_atomic_imin_dxil:
3656       return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MIN, nir_type_int);
3657    case nir_intrinsic_shared_atomic_umin_dxil:
3658       return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMIN, nir_type_uint);
3659    case nir_intrinsic_shared_atomic_imax_dxil:
3660       return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MAX, nir_type_int);
3661    case nir_intrinsic_shared_atomic_umax_dxil:
3662       return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMAX, nir_type_uint);
3663    case nir_intrinsic_shared_atomic_and_dxil:
3664       return emit_shared_atomic(ctx, intr, DXIL_RMWOP_AND, nir_type_uint);
3665    case nir_intrinsic_shared_atomic_or_dxil:
3666       return emit_shared_atomic(ctx, intr, DXIL_RMWOP_OR, nir_type_uint);
3667    case nir_intrinsic_shared_atomic_xor_dxil:
3668       return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XOR, nir_type_uint);
3669    case nir_intrinsic_shared_atomic_exchange_dxil:
3670       return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XCHG, nir_type_int);
3671    case nir_intrinsic_shared_atomic_comp_swap_dxil:
3672       return emit_shared_atomic_comp_swap(ctx, intr);
3673    case nir_intrinsic_image_store:
3674    case nir_intrinsic_image_deref_store:
3675       return emit_image_store(ctx, intr);
3676    case nir_intrinsic_image_load:
3677    case nir_intrinsic_image_deref_load:
3678       return emit_image_load(ctx, intr);
3679    case nir_intrinsic_image_size:
3680    case nir_intrinsic_image_deref_size:
3681       return emit_image_size(ctx, intr);
3682    case nir_intrinsic_get_ssbo_size:
3683       return emit_get_ssbo_size(ctx, intr);
3684 
3685    case nir_intrinsic_vulkan_resource_index:
3686       return emit_vulkan_resource_index(ctx, intr);
3687    case nir_intrinsic_load_vulkan_descriptor:
3688       return emit_load_vulkan_descriptor(ctx, intr);
3689 
3690    case nir_intrinsic_load_num_workgroups:
3691    case nir_intrinsic_load_workgroup_size:
3692    default:
3693       NIR_INSTR_UNSUPPORTED(&intr->instr);
3694       assert("Unimplemented intrinsic instruction");
3695       return false;
3696    }
3697 }
3698 
3699 static bool
emit_load_const(struct ntd_context * ctx,nir_load_const_instr * load_const)3700 emit_load_const(struct ntd_context *ctx, nir_load_const_instr *load_const)
3701 {
3702    for (int i = 0; i < load_const->def.num_components; ++i) {
3703       const struct dxil_value *value;
3704       switch (load_const->def.bit_size) {
3705       case 1:
3706          value = dxil_module_get_int1_const(&ctx->mod,
3707                                             load_const->value[i].b);
3708          break;
3709       case 16:
3710          ctx->mod.feats.native_low_precision = true;
3711          value = dxil_module_get_int16_const(&ctx->mod,
3712                                              load_const->value[i].u16);
3713          break;
3714       case 32:
3715          value = dxil_module_get_int32_const(&ctx->mod,
3716                                              load_const->value[i].u32);
3717          break;
3718       case 64:
3719          ctx->mod.feats.int64_ops = true;
3720          value = dxil_module_get_int64_const(&ctx->mod,
3721                                              load_const->value[i].u64);
3722          break;
3723       default:
3724          unreachable("unexpected bit_size");
3725       }
3726       if (!value)
3727          return false;
3728 
3729       store_ssa_def(ctx, &load_const->def, i, value);
3730    }
3731    return true;
3732 }
3733 
3734 static bool
emit_deref(struct ntd_context * ctx,nir_deref_instr * instr)3735 emit_deref(struct ntd_context* ctx, nir_deref_instr* instr)
3736 {
3737    assert(instr->deref_type == nir_deref_type_var ||
3738           instr->deref_type == nir_deref_type_array);
3739 
3740    /* In the non-Vulkan environment, there's nothing to emit. Any references to
3741     * derefs will emit the necessary logic to handle scratch/shared GEP addressing
3742     */
3743    if (!ctx->opts->vulkan_environment)
3744       return true;
3745 
3746    /* In the Vulkan environment, we don't have cached handles for textures or
3747     * samplers, so let's use the opportunity of walking through the derefs to
3748     * emit those.
3749     */
3750    nir_variable *var = nir_deref_instr_get_variable(instr);
3751    assert(var);
3752 
3753    if (!glsl_type_is_sampler(glsl_without_array(var->type)) &&
3754        !glsl_type_is_image(glsl_without_array(var->type)))
3755       return true;
3756 
3757    const struct glsl_type *type = instr->type;
3758    const struct dxil_value *binding;
3759 
3760    if (instr->deref_type == nir_deref_type_var) {
3761       binding = dxil_module_get_int32_const(&ctx->mod, var->data.binding);
3762    } else {
3763       const struct dxil_value *base = get_src(ctx, &instr->parent, 0, nir_type_uint32);
3764       const struct dxil_value *offset = get_src(ctx, &instr->arr.index, 0, nir_type_uint32);
3765       if (!base || !offset)
3766          return false;
3767 
3768       binding = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, base, offset, 0);
3769    }
3770 
3771    if (!binding)
3772       return false;
3773 
3774    /* Haven't finished chasing the deref chain yet, just store the value */
3775    if (glsl_type_is_array(type)) {
3776       store_dest(ctx, &instr->dest, 0, binding, nir_type_uint32);
3777       return true;
3778    }
3779 
3780    assert(glsl_type_is_sampler(type) || glsl_type_is_image(type));
3781    enum dxil_resource_class res_class;
3782    if (glsl_type_is_image(type))
3783       res_class = DXIL_RESOURCE_CLASS_UAV;
3784    else if (glsl_get_sampler_result_type(type) == GLSL_TYPE_VOID)
3785       res_class = DXIL_RESOURCE_CLASS_SAMPLER;
3786    else
3787       res_class = DXIL_RESOURCE_CLASS_SRV;
3788 
3789    const struct dxil_value *handle = emit_createhandle_call(ctx, res_class,
3790       get_resource_id(ctx, res_class, var->data.descriptor_set, var->data.binding), binding, false);
3791    if (!handle)
3792       return false;
3793 
3794    store_dest_value(ctx, &instr->dest, 0, handle);
3795    return true;
3796 }
3797 
3798 static bool
emit_cond_branch(struct ntd_context * ctx,const struct dxil_value * cond,int true_block,int false_block)3799 emit_cond_branch(struct ntd_context *ctx, const struct dxil_value *cond,
3800                  int true_block, int false_block)
3801 {
3802    assert(cond);
3803    assert(true_block >= 0);
3804    assert(false_block >= 0);
3805    return dxil_emit_branch(&ctx->mod, cond, true_block, false_block);
3806 }
3807 
3808 static bool
emit_branch(struct ntd_context * ctx,int block)3809 emit_branch(struct ntd_context *ctx, int block)
3810 {
3811    assert(block >= 0);
3812    return dxil_emit_branch(&ctx->mod, NULL, block, -1);
3813 }
3814 
3815 static bool
emit_jump(struct ntd_context * ctx,nir_jump_instr * instr)3816 emit_jump(struct ntd_context *ctx, nir_jump_instr *instr)
3817 {
3818    switch (instr->type) {
3819    case nir_jump_break:
3820    case nir_jump_continue:
3821       assert(instr->instr.block->successors[0]);
3822       assert(!instr->instr.block->successors[1]);
3823       return emit_branch(ctx, instr->instr.block->successors[0]->index);
3824 
3825    default:
3826       unreachable("Unsupported jump type\n");
3827    }
3828 }
3829 
3830 struct phi_block {
3831    unsigned num_components;
3832    struct dxil_instr *comp[NIR_MAX_VEC_COMPONENTS];
3833 };
3834 
3835 static bool
emit_phi(struct ntd_context * ctx,nir_phi_instr * instr)3836 emit_phi(struct ntd_context *ctx, nir_phi_instr *instr)
3837 {
3838    unsigned bit_size = nir_dest_bit_size(instr->dest);
3839    const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod,
3840                                                            bit_size);
3841 
3842    struct phi_block *vphi = ralloc(ctx->phis, struct phi_block);
3843    vphi->num_components = nir_dest_num_components(instr->dest);
3844 
3845    for (unsigned i = 0; i < vphi->num_components; ++i) {
3846       struct dxil_instr *phi = vphi->comp[i] = dxil_emit_phi(&ctx->mod, type);
3847       if (!phi)
3848          return false;
3849       store_dest_value(ctx, &instr->dest, i, dxil_instr_get_return_value(phi));
3850    }
3851    _mesa_hash_table_insert(ctx->phis, instr, vphi);
3852    return true;
3853 }
3854 
3855 static void
fixup_phi(struct ntd_context * ctx,nir_phi_instr * instr,struct phi_block * vphi)3856 fixup_phi(struct ntd_context *ctx, nir_phi_instr *instr,
3857           struct phi_block *vphi)
3858 {
3859    const struct dxil_value *values[128];
3860    unsigned blocks[128];
3861    for (unsigned i = 0; i < vphi->num_components; ++i) {
3862       size_t num_incoming = 0;
3863       nir_foreach_phi_src(src, instr) {
3864          assert(src->src.is_ssa);
3865          const struct dxil_value *val = get_src_ssa(ctx, src->src.ssa, i);
3866          assert(num_incoming < ARRAY_SIZE(values));
3867          values[num_incoming] = val;
3868          assert(num_incoming < ARRAY_SIZE(blocks));
3869          blocks[num_incoming] = src->pred->index;
3870          ++num_incoming;
3871       }
3872       dxil_phi_set_incoming(vphi->comp[i], values, blocks, num_incoming);
3873    }
3874 }
3875 
3876 static unsigned
get_n_src(struct ntd_context * ctx,const struct dxil_value ** values,unsigned max_components,nir_tex_src * src,nir_alu_type type)3877 get_n_src(struct ntd_context *ctx, const struct dxil_value **values,
3878           unsigned max_components, nir_tex_src *src, nir_alu_type type)
3879 {
3880    unsigned num_components = nir_src_num_components(src->src);
3881    unsigned i = 0;
3882 
3883    assert(num_components <= max_components);
3884 
3885    for (i = 0; i < num_components; ++i) {
3886       values[i] = get_src(ctx, &src->src, i, type);
3887       if (!values[i])
3888          return 0;
3889    }
3890 
3891    return num_components;
3892 }
3893 
3894 #define PAD_SRC(ctx, array, components, undef) \
3895    for (unsigned i = components; i < ARRAY_SIZE(array); ++i) { \
3896       array[i] = undef; \
3897    }
3898 
3899 static const struct dxil_value *
emit_sample(struct ntd_context * ctx,struct texop_parameters * params)3900 emit_sample(struct ntd_context *ctx, struct texop_parameters *params)
3901 {
3902    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sample", params->overload);
3903    if (!func)
3904       return NULL;
3905 
3906    const struct dxil_value *args[11] = {
3907       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE),
3908       params->tex, params->sampler,
3909       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
3910       params->offset[0], params->offset[1], params->offset[2],
3911       params->min_lod
3912    };
3913 
3914    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3915 }
3916 
3917 static const struct dxil_value *
emit_sample_bias(struct ntd_context * ctx,struct texop_parameters * params)3918 emit_sample_bias(struct ntd_context *ctx, struct texop_parameters *params)
3919 {
3920    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleBias", params->overload);
3921    if (!func)
3922       return NULL;
3923 
3924    assert(params->bias != NULL);
3925 
3926    const struct dxil_value *args[12] = {
3927       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_BIAS),
3928       params->tex, params->sampler,
3929       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
3930       params->offset[0], params->offset[1], params->offset[2],
3931       params->bias, params->min_lod
3932    };
3933 
3934    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3935 }
3936 
3937 static const struct dxil_value *
emit_sample_level(struct ntd_context * ctx,struct texop_parameters * params)3938 emit_sample_level(struct ntd_context *ctx, struct texop_parameters *params)
3939 {
3940    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleLevel", params->overload);
3941    if (!func)
3942       return NULL;
3943 
3944    assert(params->lod_or_sample != NULL);
3945 
3946    const struct dxil_value *args[11] = {
3947       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_LEVEL),
3948       params->tex, params->sampler,
3949       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
3950       params->offset[0], params->offset[1], params->offset[2],
3951       params->lod_or_sample
3952    };
3953 
3954    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3955 }
3956 
3957 static const struct dxil_value *
emit_sample_cmp(struct ntd_context * ctx,struct texop_parameters * params)3958 emit_sample_cmp(struct ntd_context *ctx, struct texop_parameters *params)
3959 {
3960    const struct dxil_func *func;
3961    enum dxil_intr opcode;
3962    int numparam;
3963 
3964    if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER)  {
3965       func = dxil_get_function(&ctx->mod, "dx.op.sampleCmp", DXIL_F32);
3966       opcode = DXIL_INTR_SAMPLE_CMP;
3967       numparam = 12;
3968    } else {
3969       func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevelZero", DXIL_F32);
3970       opcode = DXIL_INTR_SAMPLE_CMP_LVL_ZERO;
3971       numparam = 11;
3972    }
3973 
3974    if (!func)
3975       return NULL;
3976 
3977    const struct dxil_value *args[12] = {
3978       dxil_module_get_int32_const(&ctx->mod, opcode),
3979       params->tex, params->sampler,
3980       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
3981       params->offset[0], params->offset[1], params->offset[2],
3982       params->cmp, params->min_lod
3983    };
3984 
3985    return dxil_emit_call(&ctx->mod, func, args, numparam);
3986 }
3987 
3988 static const struct dxil_value *
emit_sample_grad(struct ntd_context * ctx,struct texop_parameters * params)3989 emit_sample_grad(struct ntd_context *ctx, struct texop_parameters *params)
3990 {
3991    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleGrad", params->overload);
3992    if (!func)
3993       return false;
3994 
3995    const struct dxil_value *args[17] = {
3996       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_GRAD),
3997       params->tex, params->sampler,
3998       params->coord[0], params->coord[1], params->coord[2], params->coord[3],
3999       params->offset[0], params->offset[1], params->offset[2],
4000       params->dx[0], params->dx[1], params->dx[2],
4001       params->dy[0], params->dy[1], params->dy[2],
4002       params->min_lod
4003    };
4004 
4005    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4006 }
4007 
4008 static const struct dxil_value *
emit_texel_fetch(struct ntd_context * ctx,struct texop_parameters * params)4009 emit_texel_fetch(struct ntd_context *ctx, struct texop_parameters *params)
4010 {
4011    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", params->overload);
4012    if (!func)
4013       return false;
4014 
4015    if (!params->lod_or_sample)
4016       params->lod_or_sample = dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32));
4017 
4018    const struct dxil_value *args[] = {
4019       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD),
4020       params->tex,
4021       params->lod_or_sample, params->coord[0], params->coord[1], params->coord[2],
4022       params->offset[0], params->offset[1], params->offset[2]
4023    };
4024 
4025    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4026 }
4027 
4028 static const struct dxil_value *
emit_texture_lod(struct ntd_context * ctx,struct texop_parameters * params)4029 emit_texture_lod(struct ntd_context *ctx, struct texop_parameters *params)
4030 {
4031    const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.calculateLOD", DXIL_F32);
4032    if (!func)
4033       return false;
4034 
4035    const struct dxil_value *args[] = {
4036       dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOD),
4037       params->tex,
4038       params->sampler,
4039       params->coord[0],
4040       params->coord[1],
4041       params->coord[2],
4042       dxil_module_get_int1_const(&ctx->mod, 1)
4043    };
4044 
4045    return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4046 }
4047 
4048 static bool
emit_tex(struct ntd_context * ctx,nir_tex_instr * instr)4049 emit_tex(struct ntd_context *ctx, nir_tex_instr *instr)
4050 {
4051    struct texop_parameters params;
4052    memset(&params, 0, sizeof(struct texop_parameters));
4053    if (!ctx->opts->vulkan_environment) {
4054       params.tex = ctx->srv_handles[instr->texture_index];
4055       params.sampler = ctx->sampler_handles[instr->sampler_index];
4056    }
4057 
4058    const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
4059    const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32);
4060    const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
4061    const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type);
4062 
4063    unsigned coord_components = 0, offset_components = 0, dx_components = 0, dy_components = 0;
4064    params.overload = get_overload(instr->dest_type, 32);
4065 
4066    for (unsigned i = 0; i < instr->num_srcs; i++) {
4067       nir_alu_type type = nir_tex_instr_src_type(instr, i);
4068 
4069       switch (instr->src[i].src_type) {
4070       case nir_tex_src_coord:
4071          coord_components = get_n_src(ctx, params.coord, ARRAY_SIZE(params.coord),
4072                                       &instr->src[i], type);
4073          if (!coord_components)
4074             return false;
4075          break;
4076 
4077       case nir_tex_src_offset:
4078          offset_components = get_n_src(ctx, params.offset, ARRAY_SIZE(params.offset),
4079                                        &instr->src[i],  nir_type_int);
4080          if (!offset_components)
4081             return false;
4082          break;
4083 
4084       case nir_tex_src_bias:
4085          assert(instr->op == nir_texop_txb);
4086          assert(nir_src_num_components(instr->src[i].src) == 1);
4087          params.bias = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
4088          if (!params.bias)
4089             return false;
4090          break;
4091 
4092       case nir_tex_src_lod:
4093          assert(nir_src_num_components(instr->src[i].src) == 1);
4094          /* Buffers don't have a LOD */
4095          if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF)
4096             params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, type);
4097          else
4098             params.lod_or_sample = int_undef;
4099          if (!params.lod_or_sample)
4100             return false;
4101          break;
4102 
4103       case nir_tex_src_min_lod:
4104          assert(nir_src_num_components(instr->src[i].src) == 1);
4105          params.min_lod = get_src(ctx, &instr->src[i].src, 0, type);
4106          if (!params.min_lod)
4107             return false;
4108          break;
4109 
4110       case nir_tex_src_comparator:
4111          assert(nir_src_num_components(instr->src[i].src) == 1);
4112          params.cmp = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
4113          if (!params.cmp)
4114             return false;
4115          break;
4116 
4117       case nir_tex_src_ddx:
4118          dx_components = get_n_src(ctx, params.dx, ARRAY_SIZE(params.dx),
4119                                    &instr->src[i], nir_type_float);
4120          if (!dx_components)
4121             return false;
4122          break;
4123 
4124       case nir_tex_src_ddy:
4125          dy_components = get_n_src(ctx, params.dy, ARRAY_SIZE(params.dy),
4126                                    &instr->src[i], nir_type_float);
4127          if (!dy_components)
4128             return false;
4129          break;
4130 
4131       case nir_tex_src_ms_index:
4132          params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, nir_type_int);
4133          if (!params.lod_or_sample)
4134             return false;
4135          break;
4136 
4137       case nir_tex_src_texture_deref:
4138          assert(ctx->opts->vulkan_environment);
4139          params.tex = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
4140          break;
4141 
4142       case nir_tex_src_sampler_deref:
4143          assert(ctx->opts->vulkan_environment);
4144          params.sampler = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
4145          break;
4146 
4147       case nir_tex_src_projector:
4148          unreachable("Texture projector should have been lowered");
4149 
4150       default:
4151          fprintf(stderr, "texture source: %d\n", instr->src[i].src_type);
4152          unreachable("unknown texture source");
4153       }
4154    }
4155 
4156    assert(params.tex != NULL);
4157    assert(instr->op == nir_texop_txf ||
4158           instr->op == nir_texop_txf_ms ||
4159           nir_tex_instr_is_query(instr) ||
4160           params.sampler != NULL);
4161 
4162    PAD_SRC(ctx, params.coord, coord_components, float_undef);
4163    PAD_SRC(ctx, params.offset, offset_components, int_undef);
4164    if (!params.min_lod) params.min_lod = float_undef;
4165 
4166    const struct dxil_value *sample = NULL;
4167    switch (instr->op) {
4168    case nir_texop_txb:
4169       sample = emit_sample_bias(ctx, &params);
4170       break;
4171 
4172    case nir_texop_tex:
4173       if (params.cmp != NULL) {
4174          sample = emit_sample_cmp(ctx, &params);
4175          break;
4176       } else if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) {
4177          sample = emit_sample(ctx, &params);
4178          break;
4179       }
4180       params.lod_or_sample = dxil_module_get_float_const(&ctx->mod, 0);
4181       FALLTHROUGH;
4182    case nir_texop_txl:
4183       sample = emit_sample_level(ctx, &params);
4184       break;
4185 
4186    case nir_texop_txd:
4187       PAD_SRC(ctx, params.dx, dx_components, float_undef);
4188       PAD_SRC(ctx, params.dy, dy_components,float_undef);
4189       sample = emit_sample_grad(ctx, &params);
4190       break;
4191 
4192    case nir_texop_txf:
4193    case nir_texop_txf_ms:
4194       if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
4195          params.coord[1] = int_undef;
4196          sample = emit_bufferload_call(ctx, params.tex, params.coord, params.overload);
4197       } else {
4198          PAD_SRC(ctx, params.coord, coord_components, int_undef);
4199          sample = emit_texel_fetch(ctx, &params);
4200       }
4201       break;
4202 
4203    case nir_texop_txs:
4204       sample = emit_texture_size(ctx, &params);
4205       break;
4206 
4207    case nir_texop_lod:
4208       sample = emit_texture_lod(ctx, &params);
4209       store_dest(ctx, &instr->dest, 0, sample, nir_alu_type_get_base_type(instr->dest_type));
4210       return true;
4211 
4212    case nir_texop_query_levels:
4213       params.lod_or_sample = dxil_module_get_int_const(&ctx->mod, 0, 32);
4214       sample = emit_texture_size(ctx, &params);
4215       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
4216       store_dest(ctx, &instr->dest, 0, retval, nir_alu_type_get_base_type(instr->dest_type));
4217       return true;
4218 
4219    default:
4220       fprintf(stderr, "texture op: %d\n", instr->op);
4221       unreachable("unknown texture op");
4222    }
4223 
4224    if (!sample)
4225       return false;
4226 
4227    for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
4228       const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, i);
4229       store_dest(ctx, &instr->dest, i, retval, nir_alu_type_get_base_type(instr->dest_type));
4230    }
4231 
4232    return true;
4233 }
4234 
4235 static bool
emit_undefined(struct ntd_context * ctx,nir_ssa_undef_instr * undef)4236 emit_undefined(struct ntd_context *ctx, nir_ssa_undef_instr *undef)
4237 {
4238    for (unsigned i = 0; i < undef->def.num_components; ++i)
4239       store_ssa_def(ctx, &undef->def, i, dxil_module_get_int32_const(&ctx->mod, 0));
4240    return true;
4241 }
4242 
emit_instr(struct ntd_context * ctx,struct nir_instr * instr)4243 static bool emit_instr(struct ntd_context *ctx, struct nir_instr* instr)
4244 {
4245    switch (instr->type) {
4246    case nir_instr_type_alu:
4247       return emit_alu(ctx, nir_instr_as_alu(instr));
4248    case nir_instr_type_intrinsic:
4249       return emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
4250    case nir_instr_type_load_const:
4251       return emit_load_const(ctx, nir_instr_as_load_const(instr));
4252    case nir_instr_type_deref:
4253       return emit_deref(ctx, nir_instr_as_deref(instr));
4254    case nir_instr_type_jump:
4255       return emit_jump(ctx, nir_instr_as_jump(instr));
4256    case nir_instr_type_phi:
4257       return emit_phi(ctx, nir_instr_as_phi(instr));
4258    case nir_instr_type_tex:
4259       return emit_tex(ctx, nir_instr_as_tex(instr));
4260    case nir_instr_type_ssa_undef:
4261       return emit_undefined(ctx, nir_instr_as_ssa_undef(instr));
4262    default:
4263       NIR_INSTR_UNSUPPORTED(instr);
4264       unreachable("Unimplemented instruction type");
4265       return false;
4266    }
4267 }
4268 
4269 
4270 static bool
emit_block(struct ntd_context * ctx,struct nir_block * block)4271 emit_block(struct ntd_context *ctx, struct nir_block *block)
4272 {
4273    assert(block->index < ctx->mod.num_basic_block_ids);
4274    ctx->mod.basic_block_ids[block->index] = ctx->mod.curr_block;
4275 
4276    nir_foreach_instr(instr, block) {
4277       TRACE_CONVERSION(instr);
4278 
4279       if (!emit_instr(ctx, instr))  {
4280          return false;
4281       }
4282    }
4283    return true;
4284 }
4285 
4286 static bool
4287 emit_cf_list(struct ntd_context *ctx, struct exec_list *list);
4288 
4289 static bool
emit_if(struct ntd_context * ctx,struct nir_if * if_stmt)4290 emit_if(struct ntd_context *ctx, struct nir_if *if_stmt)
4291 {
4292    assert(nir_src_num_components(if_stmt->condition) == 1);
4293    const struct dxil_value *cond = get_src(ctx, &if_stmt->condition, 0,
4294                                            nir_type_bool);
4295    if (!cond)
4296       return false;
4297 
4298    /* prepare blocks */
4299    nir_block *then_block = nir_if_first_then_block(if_stmt);
4300    assert(nir_if_last_then_block(if_stmt)->successors[0]);
4301    assert(!nir_if_last_then_block(if_stmt)->successors[1]);
4302    int then_succ = nir_if_last_then_block(if_stmt)->successors[0]->index;
4303 
4304    nir_block *else_block = NULL;
4305    int else_succ = -1;
4306    if (!exec_list_is_empty(&if_stmt->else_list)) {
4307       else_block = nir_if_first_else_block(if_stmt);
4308       assert(nir_if_last_else_block(if_stmt)->successors[0]);
4309       assert(!nir_if_last_else_block(if_stmt)->successors[1]);
4310       else_succ = nir_if_last_else_block(if_stmt)->successors[0]->index;
4311    }
4312 
4313    if (!emit_cond_branch(ctx, cond, then_block->index,
4314                          else_block ? else_block->index : then_succ))
4315       return false;
4316 
4317    /* handle then-block */
4318    if (!emit_cf_list(ctx, &if_stmt->then_list) ||
4319        (!nir_block_ends_in_jump(nir_if_last_then_block(if_stmt)) &&
4320         !emit_branch(ctx, then_succ)))
4321       return false;
4322 
4323    if (else_block) {
4324       /* handle else-block */
4325       if (!emit_cf_list(ctx, &if_stmt->else_list) ||
4326           (!nir_block_ends_in_jump(nir_if_last_else_block(if_stmt)) &&
4327            !emit_branch(ctx, else_succ)))
4328          return false;
4329    }
4330 
4331    return true;
4332 }
4333 
4334 static bool
emit_loop(struct ntd_context * ctx,nir_loop * loop)4335 emit_loop(struct ntd_context *ctx, nir_loop *loop)
4336 {
4337    nir_block *first_block = nir_loop_first_block(loop);
4338 
4339    assert(nir_loop_last_block(loop)->successors[0]);
4340    assert(!nir_loop_last_block(loop)->successors[1]);
4341 
4342    if (!emit_branch(ctx, first_block->index))
4343       return false;
4344 
4345    if (!emit_cf_list(ctx, &loop->body))
4346       return false;
4347 
4348    if (!emit_branch(ctx, first_block->index))
4349       return false;
4350 
4351    return true;
4352 }
4353 
4354 static bool
emit_cf_list(struct ntd_context * ctx,struct exec_list * list)4355 emit_cf_list(struct ntd_context *ctx, struct exec_list *list)
4356 {
4357    foreach_list_typed(nir_cf_node, node, node, list) {
4358       switch (node->type) {
4359       case nir_cf_node_block:
4360          if (!emit_block(ctx, nir_cf_node_as_block(node)))
4361             return false;
4362          break;
4363 
4364       case nir_cf_node_if:
4365          if (!emit_if(ctx, nir_cf_node_as_if(node)))
4366             return false;
4367          break;
4368 
4369       case nir_cf_node_loop:
4370          if (!emit_loop(ctx, nir_cf_node_as_loop(node)))
4371             return false;
4372          break;
4373 
4374       default:
4375          unreachable("unsupported cf-list node");
4376          break;
4377       }
4378    }
4379    return true;
4380 }
4381 
4382 static void
insert_sorted_by_binding(struct exec_list * var_list,nir_variable * new_var)4383 insert_sorted_by_binding(struct exec_list *var_list, nir_variable *new_var)
4384 {
4385    nir_foreach_variable_in_list(var, var_list) {
4386       if (var->data.binding > new_var->data.binding) {
4387          exec_node_insert_node_before(&var->node, &new_var->node);
4388          return;
4389       }
4390    }
4391    exec_list_push_tail(var_list, &new_var->node);
4392 }
4393 
4394 
4395 static void
sort_uniforms_by_binding_and_remove_structs(nir_shader * s)4396 sort_uniforms_by_binding_and_remove_structs(nir_shader *s)
4397 {
4398    struct exec_list new_list;
4399    exec_list_make_empty(&new_list);
4400 
4401    nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) {
4402       exec_node_remove(&var->node);
4403       const struct glsl_type *type = glsl_without_array(var->type);
4404       if (!glsl_type_is_struct(type))
4405          insert_sorted_by_binding(&new_list, var);
4406    }
4407    exec_list_append(&s->variables, &new_list);
4408 }
4409 
4410 static void
prepare_phi_values(struct ntd_context * ctx)4411 prepare_phi_values(struct ntd_context *ctx)
4412 {
4413    /* PHI nodes are difficult to get right when tracking the types:
4414     * Since the incoming sources are linked to blocks, we can't bitcast
4415     * on the fly while loading. So scan the shader and insert a typed dummy
4416     * value for each phi source, and when storing we convert if the incoming
4417     * value has a different type then the one expected by the phi node.
4418     * We choose int as default, because it supports more bit sizes.
4419     */
4420    nir_foreach_function(function, ctx->shader) {
4421       if (function->impl) {
4422          nir_foreach_block(block, function->impl) {
4423             nir_foreach_instr(instr, block) {
4424                if (instr->type == nir_instr_type_phi) {
4425                   nir_phi_instr *ir = nir_instr_as_phi(instr);
4426                   unsigned bitsize = nir_dest_bit_size(ir->dest);
4427                   const struct dxil_value *dummy = dxil_module_get_int_const(&ctx->mod, 0, bitsize);
4428                   nir_foreach_phi_src(src, ir) {
4429                      for(unsigned int i = 0; i < ir->dest.ssa.num_components; ++i)
4430                         store_ssa_def(ctx, src->src.ssa, i, dummy);
4431                   }
4432                }
4433             }
4434          }
4435       }
4436    }
4437 }
4438 
4439 static bool
emit_cbvs(struct ntd_context * ctx)4440 emit_cbvs(struct ntd_context *ctx)
4441 {
4442    if (ctx->shader->info.stage == MESA_SHADER_KERNEL || ctx->opts->vulkan_environment) {
4443       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ubo) {
4444          if (!emit_ubo_var(ctx, var))
4445             return false;
4446       }
4447    } else {
4448       for (int i = ctx->opts->ubo_binding_offset; i < ctx->shader->info.num_ubos; ++i) {
4449          char name[64];
4450          snprintf(name, sizeof(name), "__ubo%d", i);
4451          if (!emit_cbv(ctx, i, 0, 16384 /*4096 vec4's*/, 1, name))
4452             return false;
4453       }
4454    }
4455 
4456    return true;
4457 }
4458 
4459 static bool
emit_scratch(struct ntd_context * ctx)4460 emit_scratch(struct ntd_context *ctx)
4461 {
4462    if (ctx->shader->scratch_size) {
4463       /*
4464        * We always allocate an u32 array, no matter the actual variable types.
4465        * According to the DXIL spec, the minimum load/store granularity is
4466        * 32-bit, anything smaller requires using a read-extract/read-write-modify
4467        * approach.
4468        */
4469       unsigned size = ALIGN_POT(ctx->shader->scratch_size, sizeof(uint32_t));
4470       const struct dxil_type *int32 = dxil_module_get_int_type(&ctx->mod, 32);
4471       const struct dxil_value *array_length = dxil_module_get_int32_const(&ctx->mod, size / sizeof(uint32_t));
4472       if (!int32 || !array_length)
4473          return false;
4474 
4475       const struct dxil_type *type = dxil_module_get_array_type(
4476          &ctx->mod, int32, size / sizeof(uint32_t));
4477       if (!type)
4478          return false;
4479 
4480       ctx->scratchvars = dxil_emit_alloca(&ctx->mod, type, int32, array_length, 4);
4481       if (!ctx->scratchvars)
4482          return false;
4483    }
4484 
4485    return true;
4486 }
4487 
4488 /* The validator complains if we don't have ops that reference a global variable. */
4489 static bool
shader_has_shared_ops(struct nir_shader * s)4490 shader_has_shared_ops(struct nir_shader *s)
4491 {
4492    nir_foreach_function(func, s) {
4493       if (!func->impl)
4494          continue;
4495       nir_foreach_block(block, func->impl) {
4496          nir_foreach_instr(instr, block) {
4497             if (instr->type != nir_instr_type_intrinsic)
4498                continue;
4499             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
4500             switch (intrin->intrinsic) {
4501             case nir_intrinsic_load_shared_dxil:
4502             case nir_intrinsic_store_shared_dxil:
4503             case nir_intrinsic_shared_atomic_add_dxil:
4504             case nir_intrinsic_shared_atomic_and_dxil:
4505             case nir_intrinsic_shared_atomic_comp_swap_dxil:
4506             case nir_intrinsic_shared_atomic_exchange_dxil:
4507             case nir_intrinsic_shared_atomic_imax_dxil:
4508             case nir_intrinsic_shared_atomic_imin_dxil:
4509             case nir_intrinsic_shared_atomic_or_dxil:
4510             case nir_intrinsic_shared_atomic_umax_dxil:
4511             case nir_intrinsic_shared_atomic_umin_dxil:
4512             case nir_intrinsic_shared_atomic_xor_dxil:
4513                return true;
4514             default: break;
4515             }
4516          }
4517       }
4518    }
4519    return false;
4520 }
4521 
4522 static bool
emit_module(struct ntd_context * ctx,const struct nir_to_dxil_options * opts)4523 emit_module(struct ntd_context *ctx, const struct nir_to_dxil_options *opts)
4524 {
4525    /* The validator forces us to emit resources in a specific order:
4526     * CBVs, Samplers, SRVs, UAVs. While we are at it also remove
4527     * stale struct uniforms, they are lowered but might not have been removed */
4528    sort_uniforms_by_binding_and_remove_structs(ctx->shader);
4529 
4530    /* CBVs */
4531    if (!emit_cbvs(ctx))
4532       return false;
4533 
4534    /* Samplers */
4535    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
4536       unsigned count = glsl_type_get_sampler_count(var->type);
4537       const struct glsl_type *without_array = glsl_without_array(var->type);
4538       if (var->data.mode == nir_var_uniform && glsl_type_is_sampler(without_array) &&
4539           glsl_get_sampler_result_type(without_array) == GLSL_TYPE_VOID) {
4540          if (!emit_sampler(ctx, var, count))
4541             return false;
4542       }
4543    }
4544 
4545    /* SRVs */
4546    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
4547       unsigned count = glsl_type_get_sampler_count(var->type);
4548       const struct glsl_type *without_array = glsl_without_array(var->type);
4549       if (var->data.mode == nir_var_uniform && glsl_type_is_sampler(without_array) &&
4550           glsl_get_sampler_result_type(without_array) != GLSL_TYPE_VOID) {
4551          if (!emit_srv(ctx, var, count))
4552             return false;
4553       }
4554    }
4555    /* Handle read-only SSBOs as SRVs */
4556    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
4557       if ((var->data.access & ACCESS_NON_WRITEABLE) != 0) {
4558          unsigned count = 1;
4559          if (glsl_type_is_array(var->type))
4560             count = glsl_get_length(var->type);
4561          if (!emit_srv(ctx, var, count))
4562             return false;
4563       }
4564    }
4565 
4566    if (ctx->shader->info.shared_size && shader_has_shared_ops(ctx->shader)) {
4567       const struct dxil_type *type;
4568       unsigned size;
4569 
4570      /*
4571       * We always allocate an u32 array, no matter the actual variable types.
4572       * According to the DXIL spec, the minimum load/store granularity is
4573       * 32-bit, anything smaller requires using a read-extract/read-write-modify
4574       * approach. Non-atomic 64-bit accesses are allowed, but the
4575       * GEP(cast(gvar, u64[] *), offset) and cast(GEP(gvar, offset), u64 *))
4576       * sequences don't seem to be accepted by the DXIL validator when the
4577       * pointer is in the groupshared address space, making the 32-bit -> 64-bit
4578       * pointer cast impossible.
4579       */
4580       size = ALIGN_POT(ctx->shader->info.shared_size, sizeof(uint32_t));
4581       type = dxil_module_get_array_type(&ctx->mod,
4582                                         dxil_module_get_int_type(&ctx->mod, 32),
4583                                         size / sizeof(uint32_t));
4584       ctx->sharedvars = dxil_add_global_ptr_var(&ctx->mod, "shared", type,
4585                                                 DXIL_AS_GROUPSHARED,
4586                                                 ffs(sizeof(uint64_t)),
4587                                                 NULL);
4588    }
4589 
4590    if (!emit_scratch(ctx))
4591       return false;
4592 
4593    /* UAVs */
4594    if (ctx->shader->info.stage == MESA_SHADER_KERNEL) {
4595       if (!emit_globals(ctx, opts->num_kernel_globals))
4596          return false;
4597 
4598       ctx->consts = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
4599       if (!ctx->consts)
4600          return false;
4601       if (!emit_global_consts(ctx))
4602          return false;
4603    } else {
4604       /* Handle read/write SSBOs as UAVs */
4605       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
4606          if ((var->data.access & ACCESS_NON_WRITEABLE) == 0) {
4607             unsigned count = 1;
4608             if (glsl_type_is_array(var->type))
4609                count = glsl_get_length(var->type);
4610             if (!emit_uav(ctx, var->data.binding, var->data.descriptor_set,
4611                         count, DXIL_COMP_TYPE_INVALID,
4612                         DXIL_RESOURCE_KIND_RAW_BUFFER, var->name))
4613                return false;
4614 
4615          }
4616       }
4617    }
4618 
4619    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
4620       if (var->data.mode == nir_var_uniform && glsl_type_is_image(glsl_without_array(var->type))) {
4621          if (!emit_uav_var(ctx, var, glsl_type_get_image_count(var->type)))
4622             return false;
4623       }
4624    }
4625 
4626    nir_function_impl *entry = nir_shader_get_entrypoint(ctx->shader);
4627    nir_metadata_require(entry, nir_metadata_block_index);
4628 
4629    assert(entry->num_blocks > 0);
4630    ctx->mod.basic_block_ids = rzalloc_array(ctx->ralloc_ctx, int,
4631                                             entry->num_blocks);
4632    if (!ctx->mod.basic_block_ids)
4633       return false;
4634 
4635    for (int i = 0; i < entry->num_blocks; ++i)
4636       ctx->mod.basic_block_ids[i] = -1;
4637    ctx->mod.num_basic_block_ids = entry->num_blocks;
4638 
4639    ctx->defs = rzalloc_array(ctx->ralloc_ctx, struct dxil_def,
4640                              entry->ssa_alloc);
4641    if (!ctx->defs)
4642       return false;
4643    ctx->num_defs = entry->ssa_alloc;
4644 
4645    ctx->phis = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
4646    if (!ctx->phis)
4647       return false;
4648 
4649    prepare_phi_values(ctx);
4650 
4651    if (!emit_cf_list(ctx, &entry->body))
4652       return false;
4653 
4654    hash_table_foreach(ctx->phis, entry) {
4655       fixup_phi(ctx, (nir_phi_instr *)entry->key,
4656                 (struct phi_block *)entry->data);
4657    }
4658 
4659    if (!dxil_emit_ret_void(&ctx->mod))
4660       return false;
4661 
4662    if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
4663       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_out) {
4664          if (var->data.location == FRAG_RESULT_STENCIL) {
4665             ctx->mod.feats.stencil_ref = true;
4666          }
4667       }
4668    }
4669 
4670    if (ctx->mod.feats.native_low_precision)
4671       ctx->mod.minor_version = MAX2(ctx->mod.minor_version, 2);
4672 
4673    return emit_metadata(ctx) &&
4674           dxil_emit_module(&ctx->mod);
4675 }
4676 
4677 static unsigned int
get_dxil_shader_kind(struct nir_shader * s)4678 get_dxil_shader_kind(struct nir_shader *s)
4679 {
4680    switch (s->info.stage) {
4681    case MESA_SHADER_VERTEX:
4682       return DXIL_VERTEX_SHADER;
4683    case MESA_SHADER_GEOMETRY:
4684       return DXIL_GEOMETRY_SHADER;
4685    case MESA_SHADER_FRAGMENT:
4686       return DXIL_PIXEL_SHADER;
4687    case MESA_SHADER_KERNEL:
4688    case MESA_SHADER_COMPUTE:
4689       return DXIL_COMPUTE_SHADER;
4690    default:
4691       unreachable("unknown shader stage in nir_to_dxil");
4692       return DXIL_COMPUTE_SHADER;
4693    }
4694 }
4695 
4696 static unsigned
lower_bit_size_callback(const nir_instr * instr,void * data)4697 lower_bit_size_callback(const nir_instr* instr, void *data)
4698 {
4699    if (instr->type != nir_instr_type_alu)
4700       return 0;
4701    const nir_alu_instr *alu = nir_instr_as_alu(instr);
4702 
4703    if (nir_op_infos[alu->op].is_conversion)
4704       return 0;
4705 
4706    unsigned num_inputs = nir_op_infos[alu->op].num_inputs;
4707    const struct nir_to_dxil_options *opts = (const struct nir_to_dxil_options*)data;
4708    unsigned min_bit_size = opts->lower_int16 ? 32 : 16;
4709 
4710    unsigned ret = 0;
4711    for (unsigned i = 0; i < num_inputs; i++) {
4712       unsigned bit_size = nir_src_bit_size(alu->src[i].src);
4713       if (bit_size != 1 && bit_size < min_bit_size)
4714          ret = min_bit_size;
4715    }
4716 
4717    return ret;
4718 }
4719 
4720 static void
optimize_nir(struct nir_shader * s,const struct nir_to_dxil_options * opts)4721 optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts)
4722 {
4723    bool progress;
4724    do {
4725       progress = false;
4726       NIR_PASS_V(s, nir_lower_vars_to_ssa);
4727       NIR_PASS(progress, s, nir_lower_indirect_derefs, nir_var_function_temp, UINT32_MAX);
4728       NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
4729       NIR_PASS(progress, s, nir_copy_prop);
4730       NIR_PASS(progress, s, nir_opt_copy_prop_vars);
4731       NIR_PASS(progress, s, nir_lower_bit_size, lower_bit_size_callback, (void*)opts);
4732       NIR_PASS(progress, s, dxil_nir_lower_8bit_conv);
4733       if (opts->lower_int16)
4734          NIR_PASS(progress, s, dxil_nir_lower_16bit_conv);
4735       NIR_PASS(progress, s, nir_opt_remove_phis);
4736       NIR_PASS(progress, s, nir_opt_dce);
4737       NIR_PASS(progress, s, nir_opt_if, true);
4738       NIR_PASS(progress, s, nir_opt_dead_cf);
4739       NIR_PASS(progress, s, nir_opt_cse);
4740       NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
4741       NIR_PASS(progress, s, nir_opt_algebraic);
4742       NIR_PASS(progress, s, dxil_nir_lower_x2b);
4743       if (s->options->lower_int64_options)
4744          NIR_PASS(progress, s, nir_lower_int64);
4745       NIR_PASS(progress, s, nir_lower_alu);
4746       NIR_PASS(progress, s, dxil_nir_lower_inot);
4747       NIR_PASS(progress, s, nir_opt_constant_folding);
4748       NIR_PASS(progress, s, nir_opt_undef);
4749       NIR_PASS(progress, s, nir_lower_undef_to_zero);
4750       NIR_PASS(progress, s, nir_opt_deref);
4751       NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16);
4752       NIR_PASS(progress, s, nir_lower_64bit_phis);
4753       NIR_PASS_V(s, nir_lower_system_values);
4754    } while (progress);
4755 
4756    do {
4757       progress = false;
4758       NIR_PASS(progress, s, nir_opt_algebraic_late);
4759    } while (progress);
4760 }
4761 
4762 static
dxil_fill_validation_state(struct ntd_context * ctx,struct dxil_validation_state * state)4763 void dxil_fill_validation_state(struct ntd_context *ctx,
4764                                 struct dxil_validation_state *state)
4765 {
4766    state->num_resources = util_dynarray_num_elements(&ctx->resources, struct dxil_resource);
4767    state->resources = (struct dxil_resource*)ctx->resources.data;
4768    state->state.psv0.max_expected_wave_lane_count = UINT_MAX;
4769    state->state.shader_stage = (uint8_t)ctx->mod.shader_kind;
4770    state->state.sig_input_elements = (uint8_t)ctx->mod.num_sig_inputs;
4771    state->state.sig_output_elements = (uint8_t)ctx->mod.num_sig_outputs;
4772    //state->state.sig_patch_const_or_prim_elements = 0;
4773 
4774    switch (ctx->mod.shader_kind) {
4775    case DXIL_VERTEX_SHADER:
4776       state->state.psv0.vs.output_position_present = ctx->mod.info.has_out_position;
4777       break;
4778    case DXIL_PIXEL_SHADER:
4779       /* TODO: handle depth outputs */
4780       state->state.psv0.ps.depth_output = ctx->mod.info.has_out_depth;
4781       state->state.psv0.ps.sample_frequency =
4782          ctx->mod.info.has_per_sample_input;
4783       break;
4784    case DXIL_COMPUTE_SHADER:
4785       break;
4786    case DXIL_GEOMETRY_SHADER:
4787       state->state.max_vertex_count = ctx->shader->info.gs.vertices_out;
4788       state->state.psv0.gs.input_primitive = dxil_get_input_primitive(ctx->shader->info.gs.input_primitive);
4789       state->state.psv0.gs.output_toplology = dxil_get_primitive_topology(ctx->shader->info.gs.output_primitive);
4790       state->state.psv0.gs.output_stream_mask = ctx->shader->info.gs.active_stream_mask;
4791       state->state.psv0.gs.output_position_present = ctx->mod.info.has_out_position;
4792       break;
4793    default:
4794       assert(0 && "Shader type not (yet) supported");
4795    }
4796 }
4797 
4798 static nir_variable *
add_sysvalue(struct ntd_context * ctx,uint8_t value,char * name,int driver_location)4799 add_sysvalue(struct ntd_context *ctx,
4800               uint8_t value, char *name,
4801               int driver_location)
4802 {
4803 
4804    nir_variable *var = rzalloc(ctx->shader, nir_variable);
4805    if (!var)
4806       return NULL;
4807    var->data.driver_location = driver_location;
4808    var->data.location = value;
4809    var->type = glsl_uint_type();
4810    var->name = name;
4811    var->data.mode = nir_var_system_value;
4812    var->data.interpolation = INTERP_MODE_FLAT;
4813    return var;
4814 }
4815 
4816 static bool
append_input_or_sysvalue(struct ntd_context * ctx,int input_loc,int sv_slot,char * name,int driver_location)4817 append_input_or_sysvalue(struct ntd_context *ctx,
4818                          int input_loc,  int sv_slot,
4819                          char *name, int driver_location)
4820 {
4821    if (input_loc >= 0) {
4822       /* Check inputs whether a variable is available the corresponds
4823        * to the sysvalue */
4824       nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
4825          if (var->data.location == input_loc) {
4826             ctx->system_value[sv_slot] = var;
4827             return true;
4828          }
4829       }
4830    }
4831 
4832    ctx->system_value[sv_slot] = add_sysvalue(ctx, sv_slot, name, driver_location);
4833    if (!ctx->system_value[sv_slot])
4834       return false;
4835 
4836    nir_shader_add_variable(ctx->shader, ctx->system_value[sv_slot]);
4837    return true;
4838 }
4839 
4840 struct sysvalue_name {
4841    gl_system_value value;
4842    int slot;
4843    char *name;
4844 } possible_sysvalues[] = {
4845    {SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, -1, "SV_VertexID"},
4846    {SYSTEM_VALUE_INSTANCE_ID, -1, "SV_InstanceID"},
4847    {SYSTEM_VALUE_FRONT_FACE, VARYING_SLOT_FACE, "SV_IsFrontFace"},
4848    {SYSTEM_VALUE_PRIMITIVE_ID, VARYING_SLOT_PRIMITIVE_ID, "SV_PrimitiveID"},
4849    {SYSTEM_VALUE_SAMPLE_ID, -1, "SV_SampleIndex"},
4850 };
4851 
4852 static bool
allocate_sysvalues(struct ntd_context * ctx)4853 allocate_sysvalues(struct ntd_context *ctx)
4854 {
4855    unsigned driver_location = 0;
4856    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in)
4857       driver_location++;
4858    nir_foreach_variable_with_modes(var, ctx->shader, nir_var_system_value)
4859       driver_location++;
4860 
4861    for (unsigned i = 0; i < ARRAY_SIZE(possible_sysvalues); ++i) {
4862       struct sysvalue_name *info = &possible_sysvalues[i];
4863       if (BITSET_TEST(ctx->shader->info.system_values_read, info->value)) {
4864          if (!append_input_or_sysvalue(ctx, info->slot,
4865                                        info->value, info->name,
4866                                        driver_location++))
4867             return false;
4868       }
4869    }
4870    return true;
4871 }
4872 
4873 bool
nir_to_dxil(struct nir_shader * s,const struct nir_to_dxil_options * opts,struct blob * blob)4874 nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts,
4875             struct blob *blob)
4876 {
4877    assert(opts);
4878    bool retval = true;
4879    debug_dxil = (int)debug_get_option_debug_dxil();
4880    blob_init(blob);
4881 
4882    struct ntd_context *ctx = calloc(1, sizeof(*ctx));
4883    if (!ctx)
4884       return false;
4885 
4886    ctx->opts = opts;
4887    ctx->shader = s;
4888 
4889    ctx->ralloc_ctx = ralloc_context(NULL);
4890    if (!ctx->ralloc_ctx) {
4891       retval = false;
4892       goto out;
4893    }
4894 
4895    util_dynarray_init(&ctx->srv_metadata_nodes, ctx->ralloc_ctx);
4896    util_dynarray_init(&ctx->uav_metadata_nodes, ctx->ralloc_ctx);
4897    util_dynarray_init(&ctx->cbv_metadata_nodes, ctx->ralloc_ctx);
4898    util_dynarray_init(&ctx->sampler_metadata_nodes, ctx->ralloc_ctx);
4899    util_dynarray_init(&ctx->resources, ctx->ralloc_ctx);
4900    dxil_module_init(&ctx->mod, ctx->ralloc_ctx);
4901    ctx->mod.shader_kind = get_dxil_shader_kind(s);
4902    ctx->mod.major_version = 6;
4903    ctx->mod.minor_version = 1;
4904 
4905    NIR_PASS_V(s, nir_lower_pack);
4906    NIR_PASS_V(s, nir_lower_frexp);
4907    NIR_PASS_V(s, nir_lower_flrp, 16 | 32 | 64, true);
4908 
4909    optimize_nir(s, opts);
4910 
4911    NIR_PASS_V(s, nir_remove_dead_variables,
4912               nir_var_function_temp | nir_var_shader_temp, NULL);
4913 
4914    if (!allocate_sysvalues(ctx))
4915       return false;
4916 
4917    if (debug_dxil & DXIL_DEBUG_VERBOSE)
4918       nir_print_shader(s, stderr);
4919 
4920    if (!emit_module(ctx, opts)) {
4921       debug_printf("D3D12: dxil_container_add_module failed\n");
4922       retval = false;
4923       goto out;
4924    }
4925 
4926    if (debug_dxil & DXIL_DEBUG_DUMP_MODULE) {
4927       struct dxil_dumper *dumper = dxil_dump_create();
4928       dxil_dump_module(dumper, &ctx->mod);
4929       fprintf(stderr, "\n");
4930       dxil_dump_buf_to_file(dumper, stderr);
4931       fprintf(stderr, "\n\n");
4932       dxil_dump_free(dumper);
4933    }
4934 
4935    struct dxil_container container;
4936    dxil_container_init(&container);
4937    if (!dxil_container_add_features(&container, &ctx->mod.feats)) {
4938       debug_printf("D3D12: dxil_container_add_features failed\n");
4939       retval = false;
4940       goto out;
4941    }
4942 
4943    if (!dxil_container_add_io_signature(&container,
4944                                         DXIL_ISG1,
4945                                         ctx->mod.num_sig_inputs,
4946                                         ctx->mod.inputs)) {
4947       debug_printf("D3D12: failed to write input signature\n");
4948       retval = false;
4949       goto out;
4950    }
4951 
4952    if (!dxil_container_add_io_signature(&container,
4953                                         DXIL_OSG1,
4954                                         ctx->mod.num_sig_outputs,
4955                                         ctx->mod.outputs)) {
4956       debug_printf("D3D12: failed to write output signature\n");
4957       retval = false;
4958       goto out;
4959    }
4960 
4961    struct dxil_validation_state validation_state;
4962    memset(&validation_state, 0, sizeof(validation_state));
4963    dxil_fill_validation_state(ctx, &validation_state);
4964 
4965    if (!dxil_container_add_state_validation(&container,&ctx->mod,
4966                                             &validation_state)) {
4967       debug_printf("D3D12: failed to write state-validation\n");
4968       retval = false;
4969       goto out;
4970    }
4971 
4972    if (!dxil_container_add_module(&container, &ctx->mod)) {
4973       debug_printf("D3D12: failed to write module\n");
4974       retval = false;
4975       goto out;
4976    }
4977 
4978    if (!dxil_container_write(&container, blob)) {
4979       debug_printf("D3D12: dxil_container_write failed\n");
4980       retval = false;
4981       goto out;
4982    }
4983    dxil_container_finish(&container);
4984 
4985    if (debug_dxil & DXIL_DEBUG_DUMP_BLOB) {
4986       static int shader_id = 0;
4987       char buffer[64];
4988       snprintf(buffer, sizeof(buffer), "shader_%s_%d.blob",
4989                get_shader_kind_str(ctx->mod.shader_kind), shader_id++);
4990       debug_printf("Try to write blob to %s\n", buffer);
4991       FILE *f = fopen(buffer, "wb");
4992       if (f) {
4993          fwrite(blob->data, 1, blob->size, f);
4994          fclose(f);
4995       }
4996    }
4997 
4998 out:
4999    dxil_module_release(&ctx->mod);
5000    ralloc_free(ctx->ralloc_ctx);
5001    free(ctx);
5002    return retval;
5003 }
5004 
5005 enum dxil_sysvalue_type
nir_var_to_dxil_sysvalue_type(nir_variable * var,uint64_t other_stage_mask)5006 nir_var_to_dxil_sysvalue_type(nir_variable *var, uint64_t other_stage_mask)
5007 {
5008    switch (var->data.location) {
5009    case VARYING_SLOT_FACE:
5010       return DXIL_GENERATED_SYSVALUE;
5011    case VARYING_SLOT_POS:
5012    case VARYING_SLOT_PRIMITIVE_ID:
5013    case VARYING_SLOT_CLIP_DIST0:
5014    case VARYING_SLOT_CLIP_DIST1:
5015    case VARYING_SLOT_PSIZ:
5016       if (!((1ull << var->data.location) & other_stage_mask))
5017          return DXIL_SYSVALUE;
5018       FALLTHROUGH;
5019    default:
5020       return DXIL_NO_SYSVALUE;
5021    }
5022 }
5023