1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_to_dxil.h"
25
26 #include "dxil_container.h"
27 #include "dxil_dump.h"
28 #include "dxil_enums.h"
29 #include "dxil_function.h"
30 #include "dxil_module.h"
31 #include "dxil_nir.h"
32 #include "dxil_signature.h"
33
34 #include "nir/nir_builder.h"
35 #include "util/u_debug.h"
36 #include "util/u_dynarray.h"
37 #include "util/u_math.h"
38
39 #include "git_sha1.h"
40
41 #include "vulkan/vulkan_core.h"
42
43 #include <stdint.h>
44
45 int debug_dxil = 0;
46
47 static const struct debug_named_value
48 dxil_debug_options[] = {
49 { "verbose", DXIL_DEBUG_VERBOSE, NULL },
50 { "dump_blob", DXIL_DEBUG_DUMP_BLOB , "Write shader blobs" },
51 { "trace", DXIL_DEBUG_TRACE , "Trace instruction conversion" },
52 { "dump_module", DXIL_DEBUG_DUMP_MODULE, "dump module tree to stderr"},
53 DEBUG_NAMED_VALUE_END
54 };
55
56 DEBUG_GET_ONCE_FLAGS_OPTION(debug_dxil, "DXIL_DEBUG", dxil_debug_options, 0)
57
58 #define NIR_INSTR_UNSUPPORTED(instr) \
59 if (debug_dxil & DXIL_DEBUG_VERBOSE) \
60 do { \
61 fprintf(stderr, "Unsupported instruction:"); \
62 nir_print_instr(instr, stderr); \
63 fprintf(stderr, "\n"); \
64 } while (0)
65
66 #define TRACE_CONVERSION(instr) \
67 if (debug_dxil & DXIL_DEBUG_TRACE) \
68 do { \
69 fprintf(stderr, "Convert '"); \
70 nir_print_instr(instr, stderr); \
71 fprintf(stderr, "'\n"); \
72 } while (0)
73
74 static const nir_shader_compiler_options
75 nir_options = {
76 .lower_ineg = true,
77 .lower_fneg = true,
78 .lower_ffma16 = true,
79 .lower_ffma32 = true,
80 .lower_isign = true,
81 .lower_fsign = true,
82 .lower_iabs = true,
83 .lower_fmod = true,
84 .lower_fpow = true,
85 .lower_scmp = true,
86 .lower_ldexp = true,
87 .lower_flrp16 = true,
88 .lower_flrp32 = true,
89 .lower_flrp64 = true,
90 .lower_bitfield_extract_to_shifts = true,
91 .lower_extract_word = true,
92 .lower_extract_byte = true,
93 .lower_insert_word = true,
94 .lower_insert_byte = true,
95 .lower_all_io_to_elements = true,
96 .lower_all_io_to_temps = true,
97 .lower_hadd = true,
98 .lower_uadd_sat = true,
99 .lower_iadd_sat = true,
100 .lower_uadd_carry = true,
101 .lower_mul_high = true,
102 .lower_rotate = true,
103 .lower_pack_64_2x32_split = true,
104 .lower_pack_32_2x16_split = true,
105 .lower_unpack_64_2x32_split = true,
106 .lower_unpack_32_2x16_split = true,
107 .has_fsub = true,
108 .has_isub = true,
109 .use_scoped_barrier = true,
110 .vertex_id_zero_based = true,
111 .lower_base_vertex = true,
112 .has_cs_global_id = true,
113 .has_txs = true,
114 };
115
116 const nir_shader_compiler_options*
dxil_get_nir_compiler_options(void)117 dxil_get_nir_compiler_options(void)
118 {
119 return &nir_options;
120 }
121
122 static bool
emit_llvm_ident(struct dxil_module * m)123 emit_llvm_ident(struct dxil_module *m)
124 {
125 const struct dxil_mdnode *compiler = dxil_get_metadata_string(m, "Mesa version " PACKAGE_VERSION MESA_GIT_SHA1);
126 if (!compiler)
127 return false;
128
129 const struct dxil_mdnode *llvm_ident = dxil_get_metadata_node(m, &compiler, 1);
130 return llvm_ident &&
131 dxil_add_metadata_named_node(m, "llvm.ident", &llvm_ident, 1);
132 }
133
134 static bool
emit_named_version(struct dxil_module * m,const char * name,int major,int minor)135 emit_named_version(struct dxil_module *m, const char *name,
136 int major, int minor)
137 {
138 const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, major);
139 const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, minor);
140 const struct dxil_mdnode *version_nodes[] = { major_node, minor_node };
141 const struct dxil_mdnode *version = dxil_get_metadata_node(m, version_nodes,
142 ARRAY_SIZE(version_nodes));
143 return dxil_add_metadata_named_node(m, name, &version, 1);
144 }
145
146 static const char *
get_shader_kind_str(enum dxil_shader_kind kind)147 get_shader_kind_str(enum dxil_shader_kind kind)
148 {
149 switch (kind) {
150 case DXIL_PIXEL_SHADER:
151 return "ps";
152 case DXIL_VERTEX_SHADER:
153 return "vs";
154 case DXIL_GEOMETRY_SHADER:
155 return "gs";
156 case DXIL_HULL_SHADER:
157 return "hs";
158 case DXIL_DOMAIN_SHADER:
159 return "ds";
160 case DXIL_COMPUTE_SHADER:
161 return "cs";
162 default:
163 unreachable("invalid shader kind");
164 }
165 }
166
167 static bool
emit_dx_shader_model(struct dxil_module * m)168 emit_dx_shader_model(struct dxil_module *m)
169 {
170 const struct dxil_mdnode *type_node = dxil_get_metadata_string(m, get_shader_kind_str(m->shader_kind));
171 const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, m->major_version);
172 const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, m->minor_version);
173 const struct dxil_mdnode *shader_model[] = { type_node, major_node,
174 minor_node };
175 const struct dxil_mdnode *dx_shader_model = dxil_get_metadata_node(m, shader_model, ARRAY_SIZE(shader_model));
176
177 return dxil_add_metadata_named_node(m, "dx.shaderModel",
178 &dx_shader_model, 1);
179 }
180
181 enum {
182 DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG = 0,
183 DXIL_STRUCTURED_BUFFER_ELEMENT_STRIDE_TAG = 1
184 };
185
186 enum dxil_intr {
187 DXIL_INTR_LOAD_INPUT = 4,
188 DXIL_INTR_STORE_OUTPUT = 5,
189 DXIL_INTR_FABS = 6,
190 DXIL_INTR_SATURATE = 7,
191
192 DXIL_INTR_ISFINITE = 10,
193 DXIL_INTR_ISNORMAL = 11,
194
195 DXIL_INTR_FCOS = 12,
196 DXIL_INTR_FSIN = 13,
197
198 DXIL_INTR_FEXP2 = 21,
199 DXIL_INTR_FRC = 22,
200 DXIL_INTR_FLOG2 = 23,
201
202 DXIL_INTR_SQRT = 24,
203 DXIL_INTR_RSQRT = 25,
204 DXIL_INTR_ROUND_NE = 26,
205 DXIL_INTR_ROUND_NI = 27,
206 DXIL_INTR_ROUND_PI = 28,
207 DXIL_INTR_ROUND_Z = 29,
208
209 DXIL_INTR_COUNTBITS = 31,
210 DXIL_INTR_FIRSTBIT_HI = 33,
211
212 DXIL_INTR_FMAX = 35,
213 DXIL_INTR_FMIN = 36,
214 DXIL_INTR_IMAX = 37,
215 DXIL_INTR_IMIN = 38,
216 DXIL_INTR_UMAX = 39,
217 DXIL_INTR_UMIN = 40,
218
219 DXIL_INTR_FMA = 47,
220
221 DXIL_INTR_CREATE_HANDLE = 57,
222 DXIL_INTR_CBUFFER_LOAD_LEGACY = 59,
223
224 DXIL_INTR_SAMPLE = 60,
225 DXIL_INTR_SAMPLE_BIAS = 61,
226 DXIL_INTR_SAMPLE_LEVEL = 62,
227 DXIL_INTR_SAMPLE_GRAD = 63,
228 DXIL_INTR_SAMPLE_CMP = 64,
229 DXIL_INTR_SAMPLE_CMP_LVL_ZERO = 65,
230
231 DXIL_INTR_TEXTURE_LOAD = 66,
232 DXIL_INTR_TEXTURE_STORE = 67,
233
234 DXIL_INTR_BUFFER_LOAD = 68,
235 DXIL_INTR_BUFFER_STORE = 69,
236
237 DXIL_INTR_TEXTURE_SIZE = 72,
238
239 DXIL_INTR_ATOMIC_BINOP = 78,
240 DXIL_INTR_ATOMIC_CMPXCHG = 79,
241 DXIL_INTR_BARRIER = 80,
242 DXIL_INTR_TEXTURE_LOD = 81,
243
244 DXIL_INTR_DISCARD = 82,
245 DXIL_INTR_DDX_COARSE = 83,
246 DXIL_INTR_DDY_COARSE = 84,
247 DXIL_INTR_DDX_FINE = 85,
248 DXIL_INTR_DDY_FINE = 86,
249
250 DXIL_INTR_SAMPLE_INDEX = 90,
251
252 DXIL_INTR_THREAD_ID = 93,
253 DXIL_INTR_GROUP_ID = 94,
254 DXIL_INTR_THREAD_ID_IN_GROUP = 95,
255 DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP = 96,
256
257 DXIL_INTR_EMIT_STREAM = 97,
258 DXIL_INTR_CUT_STREAM = 98,
259
260 DXIL_INTR_MAKE_DOUBLE = 101,
261 DXIL_INTR_SPLIT_DOUBLE = 102,
262
263 DXIL_INTR_PRIMITIVE_ID = 108,
264
265 DXIL_INTR_LEGACY_F32TOF16 = 130,
266 DXIL_INTR_LEGACY_F16TOF32 = 131,
267
268 DXIL_INTR_ATTRIBUTE_AT_VERTEX = 137,
269 };
270
271 enum dxil_atomic_op {
272 DXIL_ATOMIC_ADD = 0,
273 DXIL_ATOMIC_AND = 1,
274 DXIL_ATOMIC_OR = 2,
275 DXIL_ATOMIC_XOR = 3,
276 DXIL_ATOMIC_IMIN = 4,
277 DXIL_ATOMIC_IMAX = 5,
278 DXIL_ATOMIC_UMIN = 6,
279 DXIL_ATOMIC_UMAX = 7,
280 DXIL_ATOMIC_EXCHANGE = 8,
281 };
282
283 typedef struct {
284 unsigned id;
285 unsigned binding;
286 unsigned size;
287 unsigned space;
288 } resource_array_layout;
289
290 static void
fill_resource_metadata(struct dxil_module * m,const struct dxil_mdnode ** fields,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout)291 fill_resource_metadata(struct dxil_module *m, const struct dxil_mdnode **fields,
292 const struct dxil_type *struct_type,
293 const char *name, const resource_array_layout *layout)
294 {
295 const struct dxil_type *pointer_type = dxil_module_get_pointer_type(m, struct_type);
296 const struct dxil_value *pointer_undef = dxil_module_get_undef(m, pointer_type);
297
298 fields[0] = dxil_get_metadata_int32(m, layout->id); // resource ID
299 fields[1] = dxil_get_metadata_value(m, pointer_type, pointer_undef); // global constant symbol
300 fields[2] = dxil_get_metadata_string(m, name ? name : ""); // name
301 fields[3] = dxil_get_metadata_int32(m, layout->space); // space ID
302 fields[4] = dxil_get_metadata_int32(m, layout->binding); // lower bound
303 fields[5] = dxil_get_metadata_int32(m, layout->size); // range size
304 }
305
306 static const struct dxil_mdnode *
emit_srv_metadata(struct dxil_module * m,const struct dxil_type * elem_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind)307 emit_srv_metadata(struct dxil_module *m, const struct dxil_type *elem_type,
308 const char *name, const resource_array_layout *layout,
309 enum dxil_component_type comp_type,
310 enum dxil_resource_kind res_kind)
311 {
312 const struct dxil_mdnode *fields[9];
313
314 const struct dxil_mdnode *metadata_tag_nodes[2];
315
316 fill_resource_metadata(m, fields, elem_type, name, layout);
317 fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
318 fields[7] = dxil_get_metadata_int1(m, 0); // sample count
319 if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
320 res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
321 metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
322 metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
323 fields[8] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
324 } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
325 fields[8] = NULL;
326 else
327 unreachable("Structured buffers not supported yet");
328
329 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
330 }
331
332 static const struct dxil_mdnode *
emit_uav_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind)333 emit_uav_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
334 const char *name, const resource_array_layout *layout,
335 enum dxil_component_type comp_type,
336 enum dxil_resource_kind res_kind)
337 {
338 const struct dxil_mdnode *fields[11];
339
340 const struct dxil_mdnode *metadata_tag_nodes[2];
341
342 fill_resource_metadata(m, fields, struct_type, name, layout);
343 fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
344 fields[7] = dxil_get_metadata_int1(m, false); // globally-coherent
345 fields[8] = dxil_get_metadata_int1(m, false); // has counter
346 fields[9] = dxil_get_metadata_int1(m, false); // is ROV
347 if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
348 res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
349 metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
350 metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
351 fields[10] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
352 } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
353 fields[10] = NULL;
354 else
355 unreachable("Structured buffers not supported yet");
356
357 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
358 }
359
360 static const struct dxil_mdnode *
emit_cbv_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,unsigned size)361 emit_cbv_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
362 const char *name, const resource_array_layout *layout,
363 unsigned size)
364 {
365 const struct dxil_mdnode *fields[8];
366
367 fill_resource_metadata(m, fields, struct_type, name, layout);
368 fields[6] = dxil_get_metadata_int32(m, size); // constant buffer size
369 fields[7] = NULL; // metadata
370
371 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
372 }
373
374 static const struct dxil_mdnode *
emit_sampler_metadata(struct dxil_module * m,const struct dxil_type * struct_type,nir_variable * var,const resource_array_layout * layout)375 emit_sampler_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
376 nir_variable *var, const resource_array_layout *layout)
377 {
378 const struct dxil_mdnode *fields[8];
379 const struct glsl_type *type = glsl_without_array(var->type);
380
381 fill_resource_metadata(m, fields, struct_type, var->name, layout);
382 fields[6] = dxil_get_metadata_int32(m, DXIL_SAMPLER_KIND_DEFAULT); // sampler kind
383 enum dxil_sampler_kind sampler_kind = glsl_sampler_type_is_shadow(type) ?
384 DXIL_SAMPLER_KIND_COMPARISON : DXIL_SAMPLER_KIND_DEFAULT;
385 fields[6] = dxil_get_metadata_int32(m, sampler_kind); // sampler kind
386 fields[7] = NULL; // metadata
387
388 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
389 }
390
391
392 #define MAX_SRVS 128
393 #define MAX_UAVS 64
394 #define MAX_CBVS 64 // ??
395 #define MAX_SAMPLERS 64 // ??
396
397 struct dxil_def {
398 const struct dxil_value *chans[NIR_MAX_VEC_COMPONENTS];
399 };
400
401 struct ntd_context {
402 void *ralloc_ctx;
403 const struct nir_to_dxil_options *opts;
404 struct nir_shader *shader;
405
406 struct dxil_module mod;
407
408 struct util_dynarray srv_metadata_nodes;
409 const struct dxil_value *srv_handles[MAX_SRVS];
410
411 struct util_dynarray uav_metadata_nodes;
412 const struct dxil_value *uav_handles[MAX_UAVS];
413
414 struct util_dynarray cbv_metadata_nodes;
415 const struct dxil_value *cbv_handles[MAX_CBVS];
416
417 struct util_dynarray sampler_metadata_nodes;
418 const struct dxil_value *sampler_handles[MAX_SAMPLERS];
419
420 struct util_dynarray resources;
421
422 const struct dxil_mdnode *shader_property_nodes[6];
423 size_t num_shader_property_nodes;
424
425 struct dxil_def *defs;
426 unsigned num_defs;
427 struct hash_table *phis;
428
429 const struct dxil_value *sharedvars;
430 const struct dxil_value *scratchvars;
431 struct hash_table *consts;
432
433 nir_variable *ps_front_face;
434 nir_variable *system_value[SYSTEM_VALUE_MAX];
435 };
436
437 static const char*
unary_func_name(enum dxil_intr intr)438 unary_func_name(enum dxil_intr intr)
439 {
440 switch (intr) {
441 case DXIL_INTR_COUNTBITS:
442 case DXIL_INTR_FIRSTBIT_HI:
443 return "dx.op.unaryBits";
444 case DXIL_INTR_ISFINITE:
445 case DXIL_INTR_ISNORMAL:
446 return "dx.op.isSpecialFloat";
447 default:
448 return "dx.op.unary";
449 }
450 }
451
452 static const struct dxil_value *
emit_unary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0)453 emit_unary_call(struct ntd_context *ctx, enum overload_type overload,
454 enum dxil_intr intr,
455 const struct dxil_value *op0)
456 {
457 const struct dxil_func *func = dxil_get_function(&ctx->mod,
458 unary_func_name(intr),
459 overload);
460 if (!func)
461 return NULL;
462
463 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
464 if (!opcode)
465 return NULL;
466
467 const struct dxil_value *args[] = {
468 opcode,
469 op0
470 };
471
472 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
473 }
474
475 static const struct dxil_value *
emit_binary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)476 emit_binary_call(struct ntd_context *ctx, enum overload_type overload,
477 enum dxil_intr intr,
478 const struct dxil_value *op0, const struct dxil_value *op1)
479 {
480 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.binary", overload);
481 if (!func)
482 return NULL;
483
484 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
485 if (!opcode)
486 return NULL;
487
488 const struct dxil_value *args[] = {
489 opcode,
490 op0,
491 op1
492 };
493
494 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
495 }
496
497 static const struct dxil_value *
emit_tertiary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)498 emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload,
499 enum dxil_intr intr,
500 const struct dxil_value *op0,
501 const struct dxil_value *op1,
502 const struct dxil_value *op2)
503 {
504 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.tertiary", overload);
505 if (!func)
506 return NULL;
507
508 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
509 if (!opcode)
510 return NULL;
511
512 const struct dxil_value *args[] = {
513 opcode,
514 op0,
515 op1,
516 op2
517 };
518
519 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
520 }
521
522 static const struct dxil_value *
emit_threadid_call(struct ntd_context * ctx,const struct dxil_value * comp)523 emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp)
524 {
525 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadId", DXIL_I32);
526 if (!func)
527 return NULL;
528
529 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
530 DXIL_INTR_THREAD_ID);
531 if (!opcode)
532 return NULL;
533
534 const struct dxil_value *args[] = {
535 opcode,
536 comp
537 };
538
539 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
540 }
541
542 static const struct dxil_value *
emit_threadidingroup_call(struct ntd_context * ctx,const struct dxil_value * comp)543 emit_threadidingroup_call(struct ntd_context *ctx,
544 const struct dxil_value *comp)
545 {
546 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadIdInGroup", DXIL_I32);
547
548 if (!func)
549 return NULL;
550
551 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
552 DXIL_INTR_THREAD_ID_IN_GROUP);
553 if (!opcode)
554 return NULL;
555
556 const struct dxil_value *args[] = {
557 opcode,
558 comp
559 };
560
561 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
562 }
563
564 static const struct dxil_value *
emit_flattenedthreadidingroup_call(struct ntd_context * ctx)565 emit_flattenedthreadidingroup_call(struct ntd_context *ctx)
566 {
567 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.flattenedThreadIdInGroup", DXIL_I32);
568
569 if (!func)
570 return NULL;
571
572 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
573 DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP);
574 if (!opcode)
575 return NULL;
576
577 const struct dxil_value *args[] = {
578 opcode
579 };
580
581 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
582 }
583
584 static const struct dxil_value *
emit_groupid_call(struct ntd_context * ctx,const struct dxil_value * comp)585 emit_groupid_call(struct ntd_context *ctx, const struct dxil_value *comp)
586 {
587 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.groupId", DXIL_I32);
588
589 if (!func)
590 return NULL;
591
592 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
593 DXIL_INTR_GROUP_ID);
594 if (!opcode)
595 return NULL;
596
597 const struct dxil_value *args[] = {
598 opcode,
599 comp
600 };
601
602 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
603 }
604
605 static const struct dxil_value *
emit_bufferload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],enum overload_type overload)606 emit_bufferload_call(struct ntd_context *ctx,
607 const struct dxil_value *handle,
608 const struct dxil_value *coord[2],
609 enum overload_type overload)
610 {
611 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferLoad", overload);
612 if (!func)
613 return NULL;
614
615 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
616 DXIL_INTR_BUFFER_LOAD);
617 const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1] };
618
619 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
620 }
621
622 static bool
emit_bufferstore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)623 emit_bufferstore_call(struct ntd_context *ctx,
624 const struct dxil_value *handle,
625 const struct dxil_value *coord[2],
626 const struct dxil_value *value[4],
627 const struct dxil_value *write_mask,
628 enum overload_type overload)
629 {
630 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferStore", overload);
631
632 if (!func)
633 return false;
634
635 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
636 DXIL_INTR_BUFFER_STORE);
637 const struct dxil_value *args[] = {
638 opcode, handle, coord[0], coord[1],
639 value[0], value[1], value[2], value[3],
640 write_mask
641 };
642
643 return dxil_emit_call_void(&ctx->mod, func,
644 args, ARRAY_SIZE(args));
645 }
646
647 static const struct dxil_value *
emit_textureload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],enum overload_type overload)648 emit_textureload_call(struct ntd_context *ctx,
649 const struct dxil_value *handle,
650 const struct dxil_value *coord[3],
651 enum overload_type overload)
652 {
653 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", overload);
654 if (!func)
655 return NULL;
656 const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
657 const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
658
659 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
660 DXIL_INTR_TEXTURE_LOAD);
661 const struct dxil_value *args[] = { opcode, handle,
662 /*lod_or_sample*/ int_undef,
663 coord[0], coord[1], coord[2],
664 /* offsets */ int_undef, int_undef, int_undef};
665
666 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
667 }
668
669 static bool
emit_texturestore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)670 emit_texturestore_call(struct ntd_context *ctx,
671 const struct dxil_value *handle,
672 const struct dxil_value *coord[3],
673 const struct dxil_value *value[4],
674 const struct dxil_value *write_mask,
675 enum overload_type overload)
676 {
677 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureStore", overload);
678
679 if (!func)
680 return false;
681
682 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
683 DXIL_INTR_TEXTURE_STORE);
684 const struct dxil_value *args[] = {
685 opcode, handle, coord[0], coord[1], coord[2],
686 value[0], value[1], value[2], value[3],
687 write_mask
688 };
689
690 return dxil_emit_call_void(&ctx->mod, func,
691 args, ARRAY_SIZE(args));
692 }
693
694 static const struct dxil_value *
emit_atomic_binop(struct ntd_context * ctx,const struct dxil_value * handle,enum dxil_atomic_op atomic_op,const struct dxil_value * coord[3],const struct dxil_value * value)695 emit_atomic_binop(struct ntd_context *ctx,
696 const struct dxil_value *handle,
697 enum dxil_atomic_op atomic_op,
698 const struct dxil_value *coord[3],
699 const struct dxil_value *value)
700 {
701 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicBinOp", DXIL_I32);
702
703 if (!func)
704 return false;
705
706 const struct dxil_value *opcode =
707 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_BINOP);
708 const struct dxil_value *atomic_op_value =
709 dxil_module_get_int32_const(&ctx->mod, atomic_op);
710 const struct dxil_value *args[] = {
711 opcode, handle, atomic_op_value,
712 coord[0], coord[1], coord[2], value
713 };
714
715 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
716 }
717
718 static const struct dxil_value *
emit_atomic_cmpxchg(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * cmpval,const struct dxil_value * newval)719 emit_atomic_cmpxchg(struct ntd_context *ctx,
720 const struct dxil_value *handle,
721 const struct dxil_value *coord[3],
722 const struct dxil_value *cmpval,
723 const struct dxil_value *newval)
724 {
725 const struct dxil_func *func =
726 dxil_get_function(&ctx->mod, "dx.op.atomicCompareExchange", DXIL_I32);
727
728 if (!func)
729 return false;
730
731 const struct dxil_value *opcode =
732 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_CMPXCHG);
733 const struct dxil_value *args[] = {
734 opcode, handle, coord[0], coord[1], coord[2], cmpval, newval
735 };
736
737 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
738 }
739
740 static const struct dxil_value *
emit_createhandle_call(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)741 emit_createhandle_call(struct ntd_context *ctx,
742 enum dxil_resource_class resource_class,
743 unsigned resource_range_id,
744 const struct dxil_value *resource_range_index,
745 bool non_uniform_resource_index)
746 {
747 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE);
748 const struct dxil_value *resource_class_value = dxil_module_get_int8_const(&ctx->mod, resource_class);
749 const struct dxil_value *resource_range_id_value = dxil_module_get_int32_const(&ctx->mod, resource_range_id);
750 const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
751 if (!opcode || !resource_class_value || !resource_range_id_value ||
752 !non_uniform_resource_index_value)
753 return NULL;
754
755 const struct dxil_value *args[] = {
756 opcode,
757 resource_class_value,
758 resource_range_id_value,
759 resource_range_index,
760 non_uniform_resource_index_value
761 };
762
763 const struct dxil_func *func =
764 dxil_get_function(&ctx->mod, "dx.op.createHandle", DXIL_NONE);
765
766 if (!func)
767 return NULL;
768
769 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
770 }
771
772 static const struct dxil_value *
emit_createhandle_call_const_index(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned resource_range_id,unsigned resource_range_index,bool non_uniform_resource_index)773 emit_createhandle_call_const_index(struct ntd_context *ctx,
774 enum dxil_resource_class resource_class,
775 unsigned resource_range_id,
776 unsigned resource_range_index,
777 bool non_uniform_resource_index)
778 {
779
780 const struct dxil_value *resource_range_index_value = dxil_module_get_int32_const(&ctx->mod, resource_range_index);
781 if (!resource_range_index_value)
782 return NULL;
783
784 return emit_createhandle_call(ctx, resource_class, resource_range_id,
785 resource_range_index_value,
786 non_uniform_resource_index);
787 }
788
789 static void
add_resource(struct ntd_context * ctx,enum dxil_resource_type type,const resource_array_layout * layout)790 add_resource(struct ntd_context *ctx, enum dxil_resource_type type,
791 const resource_array_layout *layout)
792 {
793 struct dxil_resource *resource = util_dynarray_grow(&ctx->resources, struct dxil_resource, 1);
794 resource->resource_type = type;
795 resource->space = layout->space;
796 resource->lower_bound = layout->binding;
797 if (layout->size == 0 || (uint64_t)layout->size + layout->binding >= UINT_MAX)
798 resource->upper_bound = UINT_MAX;
799 else
800 resource->upper_bound = layout->binding + layout->size - 1;
801 }
802
803 static unsigned
get_resource_id(struct ntd_context * ctx,enum dxil_resource_class class,unsigned space,unsigned binding)804 get_resource_id(struct ntd_context *ctx, enum dxil_resource_class class,
805 unsigned space, unsigned binding)
806 {
807 unsigned offset = 0;
808 unsigned count = 0;
809
810 unsigned num_srvs = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
811 unsigned num_uavs = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
812 unsigned num_cbvs = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
813 unsigned num_samplers = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
814
815 switch (class) {
816 case DXIL_RESOURCE_CLASS_UAV:
817 offset = num_srvs + num_samplers + num_cbvs;
818 count = num_uavs;
819 break;
820 case DXIL_RESOURCE_CLASS_SRV:
821 offset = num_samplers + num_cbvs;
822 count = num_srvs;
823 break;
824 case DXIL_RESOURCE_CLASS_SAMPLER:
825 offset = num_cbvs;
826 count = num_samplers;
827 break;
828 case DXIL_RESOURCE_CLASS_CBV:
829 offset = 0;
830 count = num_cbvs;
831 break;
832 }
833
834 assert(offset + count <= util_dynarray_num_elements(&ctx->resources, struct dxil_resource));
835 for (unsigned i = offset; i < offset + count; ++i) {
836 const struct dxil_resource *resource = util_dynarray_element(&ctx->resources, struct dxil_resource, i);
837 if (resource->space == space &&
838 resource->lower_bound <= binding &&
839 resource->upper_bound >= binding) {
840 return i - offset;
841 }
842 }
843
844 unreachable("Resource access for undeclared range");
845 return 0;
846 }
847
848 static bool
emit_srv(struct ntd_context * ctx,nir_variable * var,unsigned count)849 emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned count)
850 {
851 unsigned id = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
852 unsigned binding = var->data.binding;
853 resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
854
855 enum dxil_component_type comp_type;
856 enum dxil_resource_kind res_kind;
857 enum dxil_resource_type res_type;
858 if (var->data.mode == nir_var_mem_ssbo) {
859 comp_type = DXIL_COMP_TYPE_INVALID;
860 res_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
861 res_type = DXIL_RES_SRV_RAW;
862 } else {
863 comp_type = dxil_get_comp_type(var->type);
864 res_kind = dxil_get_resource_kind(var->type);
865 res_type = DXIL_RES_SRV_TYPED;
866 }
867 const struct dxil_type *res_type_as_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, false /* readwrite */);
868 const struct dxil_mdnode *srv_meta = emit_srv_metadata(&ctx->mod, res_type_as_type, var->name,
869 &layout, comp_type, res_kind);
870
871 if (!srv_meta)
872 return false;
873
874 util_dynarray_append(&ctx->srv_metadata_nodes, const struct dxil_mdnode *, srv_meta);
875 add_resource(ctx, res_type, &layout);
876 if (res_type == DXIL_RES_SRV_RAW)
877 ctx->mod.raw_and_structured_buffers = true;
878
879 if (!ctx->opts->vulkan_environment) {
880 for (unsigned i = 0; i < count; ++i) {
881 const struct dxil_value *handle =
882 emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_SRV,
883 id, binding + i, false);
884 if (!handle)
885 return false;
886
887 int idx = var->data.binding + i;
888 ctx->srv_handles[idx] = handle;
889 }
890 }
891
892 return true;
893 }
894
895 static bool
emit_globals(struct ntd_context * ctx,unsigned size)896 emit_globals(struct ntd_context *ctx, unsigned size)
897 {
898 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo)
899 size++;
900
901 if (!size)
902 return true;
903
904 const struct dxil_type *struct_type = dxil_module_get_res_type(&ctx->mod,
905 DXIL_RESOURCE_KIND_RAW_BUFFER, DXIL_COMP_TYPE_INVALID, true /* readwrite */);
906 if (!struct_type)
907 return false;
908
909 const struct dxil_type *array_type =
910 dxil_module_get_array_type(&ctx->mod, struct_type, size);
911 if (!array_type)
912 return false;
913
914 resource_array_layout layout = {0, 0, size, 0};
915 const struct dxil_mdnode *uav_meta =
916 emit_uav_metadata(&ctx->mod, array_type,
917 "globals", &layout,
918 DXIL_COMP_TYPE_INVALID,
919 DXIL_RESOURCE_KIND_RAW_BUFFER);
920 if (!uav_meta)
921 return false;
922
923 util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
924 if (util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
925 ctx->mod.feats.use_64uavs = 1;
926 /* Handles to UAVs used for kernel globals are created on-demand */
927 add_resource(ctx, DXIL_RES_UAV_RAW, &layout);
928 ctx->mod.raw_and_structured_buffers = true;
929 return true;
930 }
931
932 static bool
emit_uav(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned count,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind,const char * name)933 emit_uav(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned count,
934 enum dxil_component_type comp_type, enum dxil_resource_kind res_kind, const char *name)
935 {
936 unsigned id = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
937 resource_array_layout layout = { id, binding, count, space };
938
939 const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, true /* readwrite */);
940 const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, res_type, name,
941 &layout, comp_type, res_kind);
942
943 if (!uav_meta)
944 return false;
945
946 util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
947 if (util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
948 ctx->mod.feats.use_64uavs = 1;
949
950 add_resource(ctx, res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER ? DXIL_RES_UAV_RAW : DXIL_RES_UAV_TYPED, &layout);
951 if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
952 ctx->mod.raw_and_structured_buffers = true;
953
954 if (!ctx->opts->vulkan_environment) {
955 for (unsigned i = 0; i < count; ++i) {
956 const struct dxil_value *handle = emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_UAV,
957 id, binding + i, false);
958 if (!handle)
959 return false;
960
961 ctx->uav_handles[binding + i] = handle;
962 }
963 }
964
965 return true;
966 }
967
968 static bool
emit_uav_var(struct ntd_context * ctx,nir_variable * var,unsigned count)969 emit_uav_var(struct ntd_context *ctx, nir_variable *var, unsigned count)
970 {
971 unsigned binding = var->data.binding;
972 unsigned space = var->data.descriptor_set;
973 enum dxil_component_type comp_type = dxil_get_comp_type(var->type);
974 enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type);
975 const char *name = var->name;
976
977 return emit_uav(ctx, binding, space, count, comp_type, res_kind, name);
978 }
979
get_dword_size(const struct glsl_type * type)980 static unsigned get_dword_size(const struct glsl_type *type)
981 {
982 if (glsl_type_is_array(type)) {
983 type = glsl_without_array(type);
984 }
985 assert(glsl_type_is_struct(type) || glsl_type_is_interface(type));
986 return glsl_get_explicit_size(type, false);
987 }
988
989 static void
var_fill_const_array_with_vector_or_scalar(struct ntd_context * ctx,const struct nir_constant * c,const struct glsl_type * type,void * const_vals,unsigned int offset)990 var_fill_const_array_with_vector_or_scalar(struct ntd_context *ctx,
991 const struct nir_constant *c,
992 const struct glsl_type *type,
993 void *const_vals,
994 unsigned int offset)
995 {
996 assert(glsl_type_is_vector_or_scalar(type));
997 unsigned int components = glsl_get_vector_elements(type);
998 unsigned bit_size = glsl_get_bit_size(type);
999 unsigned int increment = bit_size / 8;
1000
1001 for (unsigned int comp = 0; comp < components; comp++) {
1002 uint8_t *dst = (uint8_t *)const_vals + offset;
1003
1004 switch (bit_size) {
1005 case 64:
1006 memcpy(dst, &c->values[comp].u64, sizeof(c->values[0].u64));
1007 break;
1008 case 32:
1009 memcpy(dst, &c->values[comp].u32, sizeof(c->values[0].u32));
1010 break;
1011 case 16:
1012 memcpy(dst, &c->values[comp].u16, sizeof(c->values[0].u16));
1013 break;
1014 case 8:
1015 assert(glsl_base_type_is_integer(glsl_get_base_type(type)));
1016 memcpy(dst, &c->values[comp].u8, sizeof(c->values[0].u8));
1017 break;
1018 default:
1019 unreachable("unexpeted bit-size");
1020 }
1021
1022 offset += increment;
1023 }
1024 }
1025
1026 static void
var_fill_const_array(struct ntd_context * ctx,const struct nir_constant * c,const struct glsl_type * type,void * const_vals,unsigned int offset)1027 var_fill_const_array(struct ntd_context *ctx, const struct nir_constant *c,
1028 const struct glsl_type *type, void *const_vals,
1029 unsigned int offset)
1030 {
1031 assert(!glsl_type_is_interface(type));
1032
1033 if (glsl_type_is_vector_or_scalar(type)) {
1034 var_fill_const_array_with_vector_or_scalar(ctx, c, type,
1035 const_vals,
1036 offset);
1037 } else if (glsl_type_is_array(type)) {
1038 assert(!glsl_type_is_unsized_array(type));
1039 const struct glsl_type *without = glsl_without_array(type);
1040 unsigned stride = glsl_get_explicit_stride(without);
1041
1042 for (unsigned elt = 0; elt < glsl_get_length(type); elt++) {
1043 var_fill_const_array(ctx, c->elements[elt], without,
1044 const_vals, offset + (elt * stride));
1045 offset += glsl_get_cl_size(without);
1046 }
1047 } else if (glsl_type_is_struct(type)) {
1048 for (unsigned int elt = 0; elt < glsl_get_length(type); elt++) {
1049 const struct glsl_type *elt_type = glsl_get_struct_field(type, elt);
1050 unsigned field_offset = glsl_get_struct_field_offset(type, elt);
1051
1052 var_fill_const_array(ctx, c->elements[elt],
1053 elt_type, const_vals,
1054 offset + field_offset);
1055 }
1056 } else
1057 unreachable("unknown GLSL type in var_fill_const_array");
1058 }
1059
1060 static bool
emit_global_consts(struct ntd_context * ctx)1061 emit_global_consts(struct ntd_context *ctx)
1062 {
1063 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_temp) {
1064 assert(var->constant_initializer);
1065
1066 unsigned int num_members = DIV_ROUND_UP(glsl_get_cl_size(var->type), 4);
1067 uint32_t *const_ints = ralloc_array(ctx->ralloc_ctx, uint32_t, num_members);
1068 var_fill_const_array(ctx, var->constant_initializer, var->type,
1069 const_ints, 0);
1070 const struct dxil_value **const_vals =
1071 ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, num_members);
1072 if (!const_vals)
1073 return false;
1074 for (int i = 0; i < num_members; i++)
1075 const_vals[i] = dxil_module_get_int32_const(&ctx->mod, const_ints[i]);
1076
1077 const struct dxil_type *elt_type = dxil_module_get_int_type(&ctx->mod, 32);
1078 if (!elt_type)
1079 return false;
1080 const struct dxil_type *type =
1081 dxil_module_get_array_type(&ctx->mod, elt_type, num_members);
1082 if (!type)
1083 return false;
1084 const struct dxil_value *agg_vals =
1085 dxil_module_get_array_const(&ctx->mod, type, const_vals);
1086 if (!agg_vals)
1087 return false;
1088
1089 const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name, type,
1090 DXIL_AS_DEFAULT, 4,
1091 agg_vals);
1092 if (!gvar)
1093 return false;
1094
1095 if (!_mesa_hash_table_insert(ctx->consts, var, (void *)gvar))
1096 return false;
1097 }
1098
1099 return true;
1100 }
1101
1102 static bool
emit_cbv(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned size,unsigned count,char * name)1103 emit_cbv(struct ntd_context *ctx, unsigned binding, unsigned space,
1104 unsigned size, unsigned count, char *name)
1105 {
1106 unsigned idx = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1107
1108 const struct dxil_type *float32 = dxil_module_get_float_type(&ctx->mod, 32);
1109 const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, float32, size);
1110 const struct dxil_type *buffer_type = dxil_module_get_struct_type(&ctx->mod, name,
1111 &array_type, 1);
1112 const struct dxil_type *final_type = count != 1 ? dxil_module_get_array_type(&ctx->mod, buffer_type, count) : buffer_type;
1113 resource_array_layout layout = {idx, binding, count, space};
1114 const struct dxil_mdnode *cbv_meta = emit_cbv_metadata(&ctx->mod, final_type,
1115 name, &layout, 4 * size);
1116
1117 if (!cbv_meta)
1118 return false;
1119
1120 util_dynarray_append(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *, cbv_meta);
1121 add_resource(ctx, DXIL_RES_CBV, &layout);
1122
1123 if (!ctx->opts->vulkan_environment) {
1124 for (unsigned i = 0; i < count; ++i) {
1125 const struct dxil_value *handle = emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_CBV,
1126 idx, binding + i, false);
1127 if (!handle)
1128 return false;
1129
1130 assert(!ctx->cbv_handles[binding + i]);
1131 ctx->cbv_handles[binding + i] = handle;
1132 }
1133 }
1134
1135 return true;
1136 }
1137
1138 static bool
emit_ubo_var(struct ntd_context * ctx,nir_variable * var)1139 emit_ubo_var(struct ntd_context *ctx, nir_variable *var)
1140 {
1141 unsigned count = 1;
1142 if (glsl_type_is_array(var->type))
1143 count = glsl_get_length(var->type);
1144 return emit_cbv(ctx, var->data.binding, var->data.descriptor_set, get_dword_size(var->type), count, var->name);
1145 }
1146
1147 static bool
emit_sampler(struct ntd_context * ctx,nir_variable * var,unsigned count)1148 emit_sampler(struct ntd_context *ctx, nir_variable *var, unsigned count)
1149 {
1150 unsigned id = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1151 unsigned binding = var->data.binding;
1152 resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1153 const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
1154 const struct dxil_type *sampler_type = dxil_module_get_struct_type(&ctx->mod, "struct.SamplerState", &int32_type, 1);
1155 const struct dxil_mdnode *sampler_meta = emit_sampler_metadata(&ctx->mod, sampler_type, var, &layout);
1156
1157 if (!sampler_meta)
1158 return false;
1159
1160 util_dynarray_append(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *, sampler_meta);
1161 add_resource(ctx, DXIL_RES_SAMPLER, &layout);
1162
1163 if (!ctx->opts->vulkan_environment) {
1164 for (unsigned i = 0; i < count; ++i) {
1165 const struct dxil_value *handle =
1166 emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_SAMPLER,
1167 id, binding + i, false);
1168 if (!handle)
1169 return false;
1170
1171 unsigned idx = var->data.binding + i;
1172 ctx->sampler_handles[idx] = handle;
1173 }
1174 }
1175
1176 return true;
1177 }
1178
1179 static const struct dxil_mdnode *
emit_gs_state(struct ntd_context * ctx)1180 emit_gs_state(struct ntd_context *ctx)
1181 {
1182 const struct dxil_mdnode *gs_state_nodes[5];
1183 const nir_shader *s = ctx->shader;
1184
1185 gs_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, dxil_get_input_primitive(s->info.gs.input_primitive));
1186 gs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.vertices_out);
1187 gs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.active_stream_mask);
1188 gs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, dxil_get_primitive_topology(s->info.gs.output_primitive));
1189 gs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.invocations);
1190
1191 for (unsigned i = 0; i < ARRAY_SIZE(gs_state_nodes); ++i) {
1192 if (!gs_state_nodes[i])
1193 return NULL;
1194 }
1195
1196 return dxil_get_metadata_node(&ctx->mod, gs_state_nodes, ARRAY_SIZE(gs_state_nodes));
1197 }
1198
1199 static const struct dxil_mdnode *
emit_threads(struct ntd_context * ctx)1200 emit_threads(struct ntd_context *ctx)
1201 {
1202 const nir_shader *s = ctx->shader;
1203 const struct dxil_mdnode *threads_x = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[0], 1));
1204 const struct dxil_mdnode *threads_y = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[1], 1));
1205 const struct dxil_mdnode *threads_z = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[2], 1));
1206 if (!threads_x || !threads_y || !threads_z)
1207 return false;
1208
1209 const struct dxil_mdnode *threads_nodes[] = { threads_x, threads_y, threads_z };
1210 return dxil_get_metadata_node(&ctx->mod, threads_nodes, ARRAY_SIZE(threads_nodes));
1211 }
1212
1213 static int64_t
get_module_flags(struct ntd_context * ctx)1214 get_module_flags(struct ntd_context *ctx)
1215 {
1216 /* See the DXIL documentation for the definition of these flags:
1217 *
1218 * https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-flags
1219 */
1220
1221 uint64_t flags = 0;
1222 if (ctx->mod.feats.doubles)
1223 flags |= (1 << 2);
1224 if (ctx->mod.raw_and_structured_buffers)
1225 flags |= (1 << 4);
1226 if (ctx->mod.feats.min_precision)
1227 flags |= (1 << 5);
1228 if (ctx->mod.feats.dx11_1_double_extensions)
1229 flags |= (1 << 6);
1230 if (ctx->mod.feats.inner_coverage)
1231 flags |= (1 << 10);
1232 if (ctx->mod.feats.typed_uav_load_additional_formats)
1233 flags |= (1 << 13);
1234 if (ctx->mod.feats.use_64uavs)
1235 flags |= (1 << 15);
1236 if (ctx->mod.feats.cs_4x_raw_sb)
1237 flags |= (1 << 17);
1238 if (ctx->mod.feats.wave_ops)
1239 flags |= (1 << 19);
1240 if (ctx->mod.feats.int64_ops)
1241 flags |= (1 << 20);
1242 if (ctx->mod.feats.stencil_ref)
1243 flags |= (1 << 11);
1244 if (ctx->mod.feats.native_low_precision)
1245 flags |= (1 << 23) | (1 << 5);
1246
1247 if (ctx->opts->disable_math_refactoring)
1248 flags |= (1 << 1);
1249
1250 return flags;
1251 }
1252
1253 static const struct dxil_mdnode *
emit_entrypoint(struct ntd_context * ctx,const struct dxil_func * func,const char * name,const struct dxil_mdnode * signatures,const struct dxil_mdnode * resources,const struct dxil_mdnode * shader_props)1254 emit_entrypoint(struct ntd_context *ctx,
1255 const struct dxil_func *func, const char *name,
1256 const struct dxil_mdnode *signatures,
1257 const struct dxil_mdnode *resources,
1258 const struct dxil_mdnode *shader_props)
1259 {
1260 const struct dxil_mdnode *func_md = dxil_get_metadata_func(&ctx->mod, func);
1261 const struct dxil_mdnode *name_md = dxil_get_metadata_string(&ctx->mod, name);
1262 const struct dxil_mdnode *nodes[] = {
1263 func_md,
1264 name_md,
1265 signatures,
1266 resources,
1267 shader_props
1268 };
1269 return dxil_get_metadata_node(&ctx->mod, nodes,
1270 ARRAY_SIZE(nodes));
1271 }
1272
1273 static const struct dxil_mdnode *
emit_resources(struct ntd_context * ctx)1274 emit_resources(struct ntd_context *ctx)
1275 {
1276 bool emit_resources = false;
1277 const struct dxil_mdnode *resources_nodes[] = {
1278 NULL, NULL, NULL, NULL
1279 };
1280
1281 #define ARRAY_AND_SIZE(arr) arr.data, util_dynarray_num_elements(&arr, const struct dxil_mdnode *)
1282
1283 if (ctx->srv_metadata_nodes.size) {
1284 resources_nodes[0] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->srv_metadata_nodes));
1285 emit_resources = true;
1286 }
1287
1288 if (ctx->uav_metadata_nodes.size) {
1289 resources_nodes[1] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->uav_metadata_nodes));
1290 emit_resources = true;
1291 }
1292
1293 if (ctx->cbv_metadata_nodes.size) {
1294 resources_nodes[2] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->cbv_metadata_nodes));
1295 emit_resources = true;
1296 }
1297
1298 if (ctx->sampler_metadata_nodes.size) {
1299 resources_nodes[3] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->sampler_metadata_nodes));
1300 emit_resources = true;
1301 }
1302
1303 #undef ARRAY_AND_SIZE
1304
1305 return emit_resources ?
1306 dxil_get_metadata_node(&ctx->mod, resources_nodes, ARRAY_SIZE(resources_nodes)): NULL;
1307 }
1308
1309 static boolean
emit_tag(struct ntd_context * ctx,enum dxil_shader_tag tag,const struct dxil_mdnode * value_node)1310 emit_tag(struct ntd_context *ctx, enum dxil_shader_tag tag,
1311 const struct dxil_mdnode *value_node)
1312 {
1313 const struct dxil_mdnode *tag_node = dxil_get_metadata_int32(&ctx->mod, tag);
1314 if (!tag_node || !value_node)
1315 return false;
1316 assert(ctx->num_shader_property_nodes <= ARRAY_SIZE(ctx->shader_property_nodes) - 2);
1317 ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = tag_node;
1318 ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = value_node;
1319
1320 return true;
1321 }
1322
1323 static bool
emit_metadata(struct ntd_context * ctx)1324 emit_metadata(struct ntd_context *ctx)
1325 {
1326 unsigned dxilMinor = ctx->mod.minor_version;
1327 if (!emit_llvm_ident(&ctx->mod) ||
1328 !emit_named_version(&ctx->mod, "dx.version", 1, dxilMinor) ||
1329 !emit_named_version(&ctx->mod, "dx.valver", 1, 4) ||
1330 !emit_dx_shader_model(&ctx->mod))
1331 return false;
1332
1333 const struct dxil_type *void_type = dxil_module_get_void_type(&ctx->mod);
1334 const struct dxil_type *main_func_type = dxil_module_add_function_type(&ctx->mod, void_type, NULL, 0);
1335 const struct dxil_func *main_func = dxil_add_function_def(&ctx->mod, "main", main_func_type);
1336 if (!main_func)
1337 return false;
1338
1339 const struct dxil_mdnode *resources_node = emit_resources(ctx);
1340
1341 const struct dxil_mdnode *main_entrypoint = dxil_get_metadata_func(&ctx->mod, main_func);
1342 const struct dxil_mdnode *node27 = dxil_get_metadata_node(&ctx->mod, NULL, 0);
1343
1344 const struct dxil_mdnode *node4 = dxil_get_metadata_int32(&ctx->mod, 0);
1345 const struct dxil_mdnode *nodes_4_27_27[] = {
1346 node4, node27, node27
1347 };
1348 const struct dxil_mdnode *node28 = dxil_get_metadata_node(&ctx->mod, nodes_4_27_27,
1349 ARRAY_SIZE(nodes_4_27_27));
1350
1351 const struct dxil_mdnode *node29 = dxil_get_metadata_node(&ctx->mod, &node28, 1);
1352
1353 const struct dxil_mdnode *node3 = dxil_get_metadata_int32(&ctx->mod, 1);
1354 const struct dxil_mdnode *main_type_annotation_nodes[] = {
1355 node3, main_entrypoint, node29
1356 };
1357 const struct dxil_mdnode *main_type_annotation = dxil_get_metadata_node(&ctx->mod, main_type_annotation_nodes,
1358 ARRAY_SIZE(main_type_annotation_nodes));
1359
1360 if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {
1361 if (!emit_tag(ctx, DXIL_SHADER_TAG_GS_STATE, emit_gs_state(ctx)))
1362 return false;
1363 } else if (ctx->mod.shader_kind == DXIL_COMPUTE_SHADER) {
1364 if (!emit_tag(ctx, DXIL_SHADER_TAG_NUM_THREADS, emit_threads(ctx)))
1365 return false;
1366 }
1367
1368 uint64_t flags = get_module_flags(ctx);
1369 if (flags != 0) {
1370 if (!emit_tag(ctx, DXIL_SHADER_TAG_FLAGS, dxil_get_metadata_int64(&ctx->mod, flags)))
1371 return false;
1372 }
1373 const struct dxil_mdnode *shader_properties = NULL;
1374 if (ctx->num_shader_property_nodes > 0) {
1375 shader_properties = dxil_get_metadata_node(&ctx->mod, ctx->shader_property_nodes,
1376 ctx->num_shader_property_nodes);
1377 if (!shader_properties)
1378 return false;
1379 }
1380
1381 const struct dxil_mdnode *signatures = get_signatures(&ctx->mod, ctx->shader,
1382 ctx->opts->vulkan_environment);
1383
1384 const struct dxil_mdnode *dx_entry_point = emit_entrypoint(ctx, main_func,
1385 "main", signatures, resources_node, shader_properties);
1386 if (!dx_entry_point)
1387 return false;
1388
1389 if (resources_node) {
1390 const struct dxil_mdnode *dx_resources = resources_node;
1391 dxil_add_metadata_named_node(&ctx->mod, "dx.resources",
1392 &dx_resources, 1);
1393 }
1394
1395 const struct dxil_mdnode *dx_type_annotations[] = { main_type_annotation };
1396 return dxil_add_metadata_named_node(&ctx->mod, "dx.typeAnnotations",
1397 dx_type_annotations,
1398 ARRAY_SIZE(dx_type_annotations)) &&
1399 dxil_add_metadata_named_node(&ctx->mod, "dx.entryPoints",
1400 &dx_entry_point, 1);
1401 }
1402
1403 static const struct dxil_value *
bitcast_to_int(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)1404 bitcast_to_int(struct ntd_context *ctx, unsigned bit_size,
1405 const struct dxil_value *value)
1406 {
1407 const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size);
1408 if (!type)
1409 return NULL;
1410
1411 return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
1412 }
1413
1414 static const struct dxil_value *
bitcast_to_float(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)1415 bitcast_to_float(struct ntd_context *ctx, unsigned bit_size,
1416 const struct dxil_value *value)
1417 {
1418 const struct dxil_type *type = dxil_module_get_float_type(&ctx->mod, bit_size);
1419 if (!type)
1420 return NULL;
1421
1422 return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
1423 }
1424
1425 static void
store_ssa_def(struct ntd_context * ctx,nir_ssa_def * ssa,unsigned chan,const struct dxil_value * value)1426 store_ssa_def(struct ntd_context *ctx, nir_ssa_def *ssa, unsigned chan,
1427 const struct dxil_value *value)
1428 {
1429 assert(ssa->index < ctx->num_defs);
1430 assert(chan < ssa->num_components);
1431 /* We pre-defined the dest value because of a phi node, so bitcast while storing if the
1432 * base type differs */
1433 if (ctx->defs[ssa->index].chans[chan]) {
1434 const struct dxil_type *expect_type = dxil_value_get_type(ctx->defs[ssa->index].chans[chan]);
1435 const struct dxil_type *value_type = dxil_value_get_type(value);
1436 if (dxil_type_to_nir_type(expect_type) != dxil_type_to_nir_type(value_type))
1437 value = dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, expect_type, value);
1438 }
1439 ctx->defs[ssa->index].chans[chan] = value;
1440 }
1441
1442 static void
store_dest_value(struct ntd_context * ctx,nir_dest * dest,unsigned chan,const struct dxil_value * value)1443 store_dest_value(struct ntd_context *ctx, nir_dest *dest, unsigned chan,
1444 const struct dxil_value *value)
1445 {
1446 assert(dest->is_ssa);
1447 assert(value);
1448 store_ssa_def(ctx, &dest->ssa, chan, value);
1449 }
1450
1451 static void
store_dest(struct ntd_context * ctx,nir_dest * dest,unsigned chan,const struct dxil_value * value,nir_alu_type type)1452 store_dest(struct ntd_context *ctx, nir_dest *dest, unsigned chan,
1453 const struct dxil_value *value, nir_alu_type type)
1454 {
1455 switch (nir_alu_type_get_base_type(type)) {
1456 case nir_type_float:
1457 if (nir_dest_bit_size(*dest) == 64)
1458 ctx->mod.feats.doubles = true;
1459 FALLTHROUGH;
1460 case nir_type_uint:
1461 case nir_type_int:
1462 if (nir_dest_bit_size(*dest) == 16)
1463 ctx->mod.feats.native_low_precision = true;
1464 if (nir_dest_bit_size(*dest) == 64)
1465 ctx->mod.feats.int64_ops = true;
1466 FALLTHROUGH;
1467 case nir_type_bool:
1468 store_dest_value(ctx, dest, chan, value);
1469 break;
1470 default:
1471 unreachable("unexpected nir_alu_type");
1472 }
1473 }
1474
1475 static void
store_alu_dest(struct ntd_context * ctx,nir_alu_instr * alu,unsigned chan,const struct dxil_value * value)1476 store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan,
1477 const struct dxil_value *value)
1478 {
1479 assert(!alu->dest.saturate);
1480 store_dest(ctx, &alu->dest.dest, chan, value,
1481 nir_op_infos[alu->op].output_type);
1482 }
1483
1484 static const struct dxil_value *
get_src_ssa(struct ntd_context * ctx,const nir_ssa_def * ssa,unsigned chan)1485 get_src_ssa(struct ntd_context *ctx, const nir_ssa_def *ssa, unsigned chan)
1486 {
1487 assert(ssa->index < ctx->num_defs);
1488 assert(chan < ssa->num_components);
1489 assert(ctx->defs[ssa->index].chans[chan]);
1490 return ctx->defs[ssa->index].chans[chan];
1491 }
1492
1493 static const struct dxil_value *
get_src(struct ntd_context * ctx,nir_src * src,unsigned chan,nir_alu_type type)1494 get_src(struct ntd_context *ctx, nir_src *src, unsigned chan,
1495 nir_alu_type type)
1496 {
1497 assert(src->is_ssa);
1498 const struct dxil_value *value = get_src_ssa(ctx, src->ssa, chan);
1499
1500 const int bit_size = nir_src_bit_size(*src);
1501
1502 switch (nir_alu_type_get_base_type(type)) {
1503 case nir_type_int:
1504 case nir_type_uint: {
1505 assert(bit_size != 64 || ctx->mod.feats.int64_ops);
1506 const struct dxil_type *expect_type = dxil_module_get_int_type(&ctx->mod, bit_size);
1507 /* nohing to do */
1508 if (dxil_value_type_equal_to(value, expect_type))
1509 return value;
1510 assert(dxil_value_type_bitsize_equal_to(value, bit_size));
1511 return bitcast_to_int(ctx, bit_size, value);
1512 }
1513
1514 case nir_type_float:
1515 assert(nir_src_bit_size(*src) >= 16);
1516 assert(nir_src_bit_size(*src) != 64 || (ctx->mod.feats.doubles &&
1517 ctx->mod.feats.int64_ops));
1518 if (dxil_value_type_equal_to(value, dxil_module_get_float_type(&ctx->mod, bit_size)))
1519 return value;
1520 assert(dxil_value_type_bitsize_equal_to(value, bit_size));
1521 return bitcast_to_float(ctx, bit_size, value);
1522
1523 case nir_type_bool:
1524 if (!dxil_value_type_bitsize_equal_to(value, 1)) {
1525 return dxil_emit_cast(&ctx->mod, DXIL_CAST_TRUNC,
1526 dxil_module_get_int_type(&ctx->mod, 1), value);
1527 }
1528 return value;
1529
1530 default:
1531 unreachable("unexpected nir_alu_type");
1532 }
1533 }
1534
1535 static const struct dxil_type *
get_alu_src_type(struct ntd_context * ctx,nir_alu_instr * alu,unsigned src)1536 get_alu_src_type(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)
1537 {
1538 assert(!alu->src[src].abs);
1539 assert(!alu->src[src].negate);
1540 nir_ssa_def *ssa_src = alu->src[src].src.ssa;
1541 unsigned chan = alu->src[src].swizzle[0];
1542 const struct dxil_value *value = get_src_ssa(ctx, ssa_src, chan);
1543 return dxil_value_get_type(value);
1544 }
1545
1546 static const struct dxil_value *
get_alu_src(struct ntd_context * ctx,nir_alu_instr * alu,unsigned src)1547 get_alu_src(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)
1548 {
1549 assert(!alu->src[src].abs);
1550 assert(!alu->src[src].negate);
1551
1552 unsigned chan = alu->src[src].swizzle[0];
1553 return get_src(ctx, &alu->src[src].src, chan,
1554 nir_op_infos[alu->op].input_types[src]);
1555 }
1556
1557 static bool
emit_binop(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)1558 emit_binop(struct ntd_context *ctx, nir_alu_instr *alu,
1559 enum dxil_bin_opcode opcode,
1560 const struct dxil_value *op0, const struct dxil_value *op1)
1561 {
1562 bool is_float_op = nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float;
1563
1564 enum dxil_opt_flags flags = 0;
1565 if (is_float_op && !alu->exact)
1566 flags |= DXIL_UNSAFE_ALGEBRA;
1567
1568 const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, flags);
1569 if (!v)
1570 return false;
1571 store_alu_dest(ctx, alu, 0, v);
1572 return true;
1573 }
1574
1575 static bool
emit_shift(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)1576 emit_shift(struct ntd_context *ctx, nir_alu_instr *alu,
1577 enum dxil_bin_opcode opcode,
1578 const struct dxil_value *op0, const struct dxil_value *op1)
1579 {
1580 unsigned op0_bit_size = nir_src_bit_size(alu->src[0].src);
1581 unsigned op1_bit_size = nir_src_bit_size(alu->src[1].src);
1582 if (op0_bit_size != op1_bit_size) {
1583 const struct dxil_type *type =
1584 dxil_module_get_int_type(&ctx->mod, op0_bit_size);
1585 enum dxil_cast_opcode cast_op =
1586 op1_bit_size < op0_bit_size ? DXIL_CAST_ZEXT : DXIL_CAST_TRUNC;
1587 op1 = dxil_emit_cast(&ctx->mod, cast_op, type, op1);
1588 }
1589
1590 const struct dxil_value *v =
1591 dxil_emit_binop(&ctx->mod, opcode, op0, op1, 0);
1592 if (!v)
1593 return false;
1594 store_alu_dest(ctx, alu, 0, v);
1595 return true;
1596 }
1597
1598 static bool
emit_cmp(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_cmp_pred pred,const struct dxil_value * op0,const struct dxil_value * op1)1599 emit_cmp(struct ntd_context *ctx, nir_alu_instr *alu,
1600 enum dxil_cmp_pred pred,
1601 const struct dxil_value *op0, const struct dxil_value *op1)
1602 {
1603 const struct dxil_value *v = dxil_emit_cmp(&ctx->mod, pred, op0, op1);
1604 if (!v)
1605 return false;
1606 store_alu_dest(ctx, alu, 0, v);
1607 return true;
1608 }
1609
1610 static enum dxil_cast_opcode
get_cast_op(nir_alu_instr * alu)1611 get_cast_op(nir_alu_instr *alu)
1612 {
1613 unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1614 unsigned src_bits = nir_src_bit_size(alu->src[0].src);
1615
1616 switch (alu->op) {
1617 /* bool -> int */
1618 case nir_op_b2i16:
1619 case nir_op_b2i32:
1620 case nir_op_b2i64:
1621 return DXIL_CAST_ZEXT;
1622
1623 /* float -> float */
1624 case nir_op_f2f16_rtz:
1625 case nir_op_f2f32:
1626 case nir_op_f2f64:
1627 assert(dst_bits != src_bits);
1628 if (dst_bits < src_bits)
1629 return DXIL_CAST_FPTRUNC;
1630 else
1631 return DXIL_CAST_FPEXT;
1632
1633 /* int -> int */
1634 case nir_op_i2i16:
1635 case nir_op_i2i32:
1636 case nir_op_i2i64:
1637 assert(dst_bits != src_bits);
1638 if (dst_bits < src_bits)
1639 return DXIL_CAST_TRUNC;
1640 else
1641 return DXIL_CAST_SEXT;
1642
1643 /* uint -> uint */
1644 case nir_op_u2u16:
1645 case nir_op_u2u32:
1646 case nir_op_u2u64:
1647 assert(dst_bits != src_bits);
1648 if (dst_bits < src_bits)
1649 return DXIL_CAST_TRUNC;
1650 else
1651 return DXIL_CAST_ZEXT;
1652
1653 /* float -> int */
1654 case nir_op_f2i16:
1655 case nir_op_f2i32:
1656 case nir_op_f2i64:
1657 return DXIL_CAST_FPTOSI;
1658
1659 /* float -> uint */
1660 case nir_op_f2u16:
1661 case nir_op_f2u32:
1662 case nir_op_f2u64:
1663 return DXIL_CAST_FPTOUI;
1664
1665 /* int -> float */
1666 case nir_op_i2f16:
1667 case nir_op_i2f32:
1668 case nir_op_i2f64:
1669 return DXIL_CAST_SITOFP;
1670
1671 /* uint -> float */
1672 case nir_op_u2f16:
1673 case nir_op_u2f32:
1674 case nir_op_u2f64:
1675 return DXIL_CAST_UITOFP;
1676
1677 default:
1678 unreachable("unexpected cast op");
1679 }
1680 }
1681
1682 static const struct dxil_type *
get_cast_dest_type(struct ntd_context * ctx,nir_alu_instr * alu)1683 get_cast_dest_type(struct ntd_context *ctx, nir_alu_instr *alu)
1684 {
1685 unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1686 switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type)) {
1687 case nir_type_bool:
1688 assert(dst_bits == 1);
1689 FALLTHROUGH;
1690 case nir_type_int:
1691 case nir_type_uint:
1692 return dxil_module_get_int_type(&ctx->mod, dst_bits);
1693
1694 case nir_type_float:
1695 return dxil_module_get_float_type(&ctx->mod, dst_bits);
1696
1697 default:
1698 unreachable("unknown nir_alu_type");
1699 }
1700 }
1701
1702 static bool
is_double(nir_alu_type alu_type,unsigned bit_size)1703 is_double(nir_alu_type alu_type, unsigned bit_size)
1704 {
1705 return nir_alu_type_get_base_type(alu_type) == nir_type_float &&
1706 bit_size == 64;
1707 }
1708
1709 static bool
emit_cast(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * value)1710 emit_cast(struct ntd_context *ctx, nir_alu_instr *alu,
1711 const struct dxil_value *value)
1712 {
1713 enum dxil_cast_opcode opcode = get_cast_op(alu);
1714 const struct dxil_type *type = get_cast_dest_type(ctx, alu);
1715 if (!type)
1716 return false;
1717
1718 const nir_op_info *info = &nir_op_infos[alu->op];
1719 switch (opcode) {
1720 case DXIL_CAST_UITOFP:
1721 case DXIL_CAST_SITOFP:
1722 if (is_double(info->output_type, nir_dest_bit_size(alu->dest.dest)))
1723 ctx->mod.feats.dx11_1_double_extensions = true;
1724 break;
1725 case DXIL_CAST_FPTOUI:
1726 case DXIL_CAST_FPTOSI:
1727 if (is_double(info->input_types[0], nir_src_bit_size(alu->src[0].src)))
1728 ctx->mod.feats.dx11_1_double_extensions = true;
1729 break;
1730 default:
1731 break;
1732 }
1733
1734 const struct dxil_value *v = dxil_emit_cast(&ctx->mod, opcode, type,
1735 value);
1736 if (!v)
1737 return false;
1738 store_alu_dest(ctx, alu, 0, v);
1739 return true;
1740 }
1741
1742 static enum overload_type
get_overload(nir_alu_type alu_type,unsigned bit_size)1743 get_overload(nir_alu_type alu_type, unsigned bit_size)
1744 {
1745 switch (nir_alu_type_get_base_type(alu_type)) {
1746 case nir_type_int:
1747 case nir_type_uint:
1748 switch (bit_size) {
1749 case 16: return DXIL_I16;
1750 case 32: return DXIL_I32;
1751 case 64: return DXIL_I64;
1752 default:
1753 unreachable("unexpected bit_size");
1754 }
1755 case nir_type_float:
1756 switch (bit_size) {
1757 case 16: return DXIL_F16;
1758 case 32: return DXIL_F32;
1759 case 64: return DXIL_F64;
1760 default:
1761 unreachable("unexpected bit_size");
1762 }
1763 default:
1764 unreachable("unexpected output type");
1765 }
1766 }
1767
1768 static bool
emit_unary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op)1769 emit_unary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
1770 enum dxil_intr intr, const struct dxil_value *op)
1771 {
1772 const nir_op_info *info = &nir_op_infos[alu->op];
1773 unsigned src_bits = nir_src_bit_size(alu->src[0].src);
1774 enum overload_type overload = get_overload(info->input_types[0], src_bits);
1775
1776 const struct dxil_value *v = emit_unary_call(ctx, overload, intr, op);
1777 if (!v)
1778 return false;
1779 store_alu_dest(ctx, alu, 0, v);
1780 return true;
1781 }
1782
1783 static bool
emit_binary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)1784 emit_binary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
1785 enum dxil_intr intr,
1786 const struct dxil_value *op0, const struct dxil_value *op1)
1787 {
1788 const nir_op_info *info = &nir_op_infos[alu->op];
1789 assert(info->output_type == info->input_types[0]);
1790 assert(info->output_type == info->input_types[1]);
1791 unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1792 assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
1793 assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
1794 enum overload_type overload = get_overload(info->output_type, dst_bits);
1795
1796 const struct dxil_value *v = emit_binary_call(ctx, overload, intr,
1797 op0, op1);
1798 if (!v)
1799 return false;
1800 store_alu_dest(ctx, alu, 0, v);
1801 return true;
1802 }
1803
1804 static bool
emit_tertiary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)1805 emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
1806 enum dxil_intr intr,
1807 const struct dxil_value *op0,
1808 const struct dxil_value *op1,
1809 const struct dxil_value *op2)
1810 {
1811 const nir_op_info *info = &nir_op_infos[alu->op];
1812 assert(info->output_type == info->input_types[0]);
1813 assert(info->output_type == info->input_types[1]);
1814 assert(info->output_type == info->input_types[2]);
1815
1816 unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1817 assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
1818 assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
1819 assert(nir_src_bit_size(alu->src[2].src) == dst_bits);
1820
1821 enum overload_type overload = get_overload(info->output_type, dst_bits);
1822
1823 const struct dxil_value *v = emit_tertiary_call(ctx, overload, intr,
1824 op0, op1, op2);
1825 if (!v)
1826 return false;
1827 store_alu_dest(ctx, alu, 0, v);
1828 return true;
1829 }
1830
emit_select(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * sel,const struct dxil_value * val_true,const struct dxil_value * val_false)1831 static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu,
1832 const struct dxil_value *sel,
1833 const struct dxil_value *val_true,
1834 const struct dxil_value *val_false)
1835 {
1836 assert(sel);
1837 assert(val_true);
1838 assert(val_false);
1839
1840 const struct dxil_value *v = dxil_emit_select(&ctx->mod, sel, val_true, val_false);
1841 if (!v)
1842 return false;
1843
1844 store_alu_dest(ctx, alu, 0, v);
1845 return true;
1846 }
1847
1848 static bool
emit_b2f16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)1849 emit_b2f16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
1850 {
1851 assert(val);
1852
1853 struct dxil_module *m = &ctx->mod;
1854
1855 const struct dxil_value *c1 = dxil_module_get_float16_const(m, 0x3C00);
1856 const struct dxil_value *c0 = dxil_module_get_float16_const(m, 0);
1857
1858 if (!c0 || !c1)
1859 return false;
1860
1861 return emit_select(ctx, alu, val, c1, c0);
1862 }
1863
1864 static bool
emit_b2f32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)1865 emit_b2f32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
1866 {
1867 assert(val);
1868
1869 struct dxil_module *m = &ctx->mod;
1870
1871 const struct dxil_value *c1 = dxil_module_get_float_const(m, 1.0f);
1872 const struct dxil_value *c0 = dxil_module_get_float_const(m, 0.0f);
1873
1874 if (!c0 || !c1)
1875 return false;
1876
1877 return emit_select(ctx, alu, val, c1, c0);
1878 }
1879
1880 static bool
emit_f2b32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)1881 emit_f2b32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
1882 {
1883 assert(val);
1884
1885 const struct dxil_value *zero = dxil_module_get_float_const(&ctx->mod, 0.0f);
1886 return emit_cmp(ctx, alu, DXIL_FCMP_UNE, val, zero);
1887 }
1888
1889 static bool
emit_ufind_msb(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)1890 emit_ufind_msb(struct ntd_context *ctx, nir_alu_instr *alu,
1891 const struct dxil_value *val)
1892 {
1893 const nir_op_info *info = &nir_op_infos[alu->op];
1894 unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1895 unsigned src_bits = nir_src_bit_size(alu->src[0].src);
1896 enum overload_type overload = get_overload(info->output_type, src_bits);
1897
1898 const struct dxil_value *v = emit_unary_call(ctx, overload,
1899 DXIL_INTR_FIRSTBIT_HI, val);
1900 if (!v)
1901 return false;
1902
1903 const struct dxil_value *size = dxil_module_get_int32_const(&ctx->mod,
1904 src_bits - 1);
1905 const struct dxil_value *zero = dxil_module_get_int_const(&ctx->mod, 0,
1906 src_bits);
1907 if (!size || !zero)
1908 return false;
1909
1910 v = dxil_emit_binop(&ctx->mod, DXIL_BINOP_SUB, size, v, 0);
1911 const struct dxil_value *cnd = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_NE,
1912 val, zero);
1913 if (!v || !cnd)
1914 return false;
1915
1916 const struct dxil_value *minus_one =
1917 dxil_module_get_int_const(&ctx->mod, -1, dst_bits);
1918 if (!minus_one)
1919 return false;
1920
1921 v = dxil_emit_select(&ctx->mod, cnd, v, minus_one);
1922 if (!v)
1923 return false;
1924
1925 store_alu_dest(ctx, alu, 0, v);
1926 return true;
1927 }
1928
1929 static bool
emit_f16tof32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)1930 emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
1931 {
1932 const struct dxil_func *func = dxil_get_function(&ctx->mod,
1933 "dx.op.legacyF16ToF32",
1934 DXIL_NONE);
1935 if (!func)
1936 return false;
1937
1938 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F16TOF32);
1939 if (!opcode)
1940 return false;
1941
1942 const struct dxil_value *args[] = {
1943 opcode,
1944 val
1945 };
1946
1947 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1948 if (!v)
1949 return false;
1950 store_alu_dest(ctx, alu, 0, v);
1951 return true;
1952 }
1953
1954 static bool
emit_f32tof16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)1955 emit_f32tof16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
1956 {
1957 const struct dxil_func *func = dxil_get_function(&ctx->mod,
1958 "dx.op.legacyF32ToF16",
1959 DXIL_NONE);
1960 if (!func)
1961 return false;
1962
1963 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F32TOF16);
1964 if (!opcode)
1965 return false;
1966
1967 const struct dxil_value *args[] = {
1968 opcode,
1969 val
1970 };
1971
1972 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
1973 if (!v)
1974 return false;
1975 store_alu_dest(ctx, alu, 0, v);
1976 return true;
1977 }
1978
1979 static bool
emit_vec(struct ntd_context * ctx,nir_alu_instr * alu,unsigned num_inputs)1980 emit_vec(struct ntd_context *ctx, nir_alu_instr *alu, unsigned num_inputs)
1981 {
1982 const struct dxil_type *type = get_alu_src_type(ctx, alu, 0);
1983 nir_alu_type t = dxil_type_to_nir_type(type);
1984
1985 for (unsigned i = 0; i < num_inputs; i++) {
1986 const struct dxil_value *src =
1987 get_src(ctx, &alu->src[i].src, alu->src[i].swizzle[0], t);
1988 if (!src)
1989 return false;
1990
1991 store_alu_dest(ctx, alu, i, src);
1992 }
1993 return true;
1994 }
1995
1996 static bool
emit_make_double(struct ntd_context * ctx,nir_alu_instr * alu)1997 emit_make_double(struct ntd_context *ctx, nir_alu_instr *alu)
1998 {
1999 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.makeDouble", DXIL_F64);
2000 if (!func)
2001 return false;
2002
2003 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_MAKE_DOUBLE);
2004 if (!opcode)
2005 return false;
2006
2007 const struct dxil_value *args[3] = {
2008 opcode,
2009 get_src(ctx, &alu->src[0].src, 0, nir_type_uint32),
2010 get_src(ctx, &alu->src[0].src, 1, nir_type_uint32),
2011 };
2012 if (!args[1] || !args[2])
2013 return false;
2014
2015 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2016 if (!v)
2017 return false;
2018 store_dest(ctx, &alu->dest.dest, 0, v, nir_type_float64);
2019 return true;
2020 }
2021
2022 static bool
emit_split_double(struct ntd_context * ctx,nir_alu_instr * alu)2023 emit_split_double(struct ntd_context *ctx, nir_alu_instr *alu)
2024 {
2025 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.splitDouble", DXIL_F64);
2026 if (!func)
2027 return false;
2028
2029 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SPLIT_DOUBLE);
2030 if (!opcode)
2031 return false;
2032
2033 const struct dxil_value *args[] = {
2034 opcode,
2035 get_src(ctx, &alu->src[0].src, 0, nir_type_float64)
2036 };
2037 if (!args[1])
2038 return false;
2039
2040 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2041 if (!v)
2042 return false;
2043
2044 const struct dxil_value *hi = dxil_emit_extractval(&ctx->mod, v, 0);
2045 const struct dxil_value *lo = dxil_emit_extractval(&ctx->mod, v, 1);
2046 if (!hi || !lo)
2047 return false;
2048
2049 store_dest_value(ctx, &alu->dest.dest, 0, hi);
2050 store_dest_value(ctx, &alu->dest.dest, 1, lo);
2051 return true;
2052 }
2053
2054 static bool
emit_alu(struct ntd_context * ctx,nir_alu_instr * alu)2055 emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
2056 {
2057 /* handle vec-instructions first; they are the only ones that produce
2058 * vector results.
2059 */
2060 switch (alu->op) {
2061 case nir_op_vec2:
2062 case nir_op_vec3:
2063 case nir_op_vec4:
2064 case nir_op_vec8:
2065 case nir_op_vec16:
2066 return emit_vec(ctx, alu, nir_op_infos[alu->op].num_inputs);
2067 case nir_op_mov: {
2068 assert(nir_dest_num_components(alu->dest.dest) == 1);
2069 store_ssa_def(ctx, &alu->dest.dest.ssa, 0, get_src_ssa(ctx,
2070 alu->src->src.ssa, alu->src->swizzle[0]));
2071 return true;
2072 }
2073 case nir_op_pack_double_2x32_dxil:
2074 return emit_make_double(ctx, alu);
2075 case nir_op_unpack_double_2x32_dxil:
2076 return emit_split_double(ctx, alu);
2077 default:
2078 /* silence warnings */
2079 ;
2080 }
2081
2082 /* other ops should be scalar */
2083 assert(alu->dest.write_mask == 1);
2084 const struct dxil_value *src[4];
2085 assert(nir_op_infos[alu->op].num_inputs <= 4);
2086 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
2087 src[i] = get_alu_src(ctx, alu, i);
2088 if (!src[i])
2089 return false;
2090 }
2091
2092 switch (alu->op) {
2093 case nir_op_iadd:
2094 case nir_op_fadd: return emit_binop(ctx, alu, DXIL_BINOP_ADD, src[0], src[1]);
2095
2096 case nir_op_isub:
2097 case nir_op_fsub: return emit_binop(ctx, alu, DXIL_BINOP_SUB, src[0], src[1]);
2098
2099 case nir_op_imul:
2100 case nir_op_fmul: return emit_binop(ctx, alu, DXIL_BINOP_MUL, src[0], src[1]);
2101
2102 case nir_op_idiv:
2103 case nir_op_fdiv: return emit_binop(ctx, alu, DXIL_BINOP_SDIV, src[0], src[1]);
2104
2105 case nir_op_udiv: return emit_binop(ctx, alu, DXIL_BINOP_UDIV, src[0], src[1]);
2106 case nir_op_irem: return emit_binop(ctx, alu, DXIL_BINOP_SREM, src[0], src[1]);
2107 case nir_op_imod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2108 case nir_op_umod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2109 case nir_op_ishl: return emit_shift(ctx, alu, DXIL_BINOP_SHL, src[0], src[1]);
2110 case nir_op_ishr: return emit_shift(ctx, alu, DXIL_BINOP_ASHR, src[0], src[1]);
2111 case nir_op_ushr: return emit_shift(ctx, alu, DXIL_BINOP_LSHR, src[0], src[1]);
2112 case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]);
2113 case nir_op_ior: return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]);
2114 case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]);
2115 case nir_op_inot: {
2116 unsigned bit_size = alu->dest.dest.ssa.bit_size;
2117 intmax_t val = bit_size == 1 ? 1 : -1;
2118 const struct dxil_value *negative_one = dxil_module_get_int_const(&ctx->mod, val, bit_size);
2119 return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], negative_one);
2120 }
2121 case nir_op_ieq: return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]);
2122 case nir_op_ine: return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]);
2123 case nir_op_ige: return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]);
2124 case nir_op_uge: return emit_cmp(ctx, alu, DXIL_ICMP_UGE, src[0], src[1]);
2125 case nir_op_ilt: return emit_cmp(ctx, alu, DXIL_ICMP_SLT, src[0], src[1]);
2126 case nir_op_ult: return emit_cmp(ctx, alu, DXIL_ICMP_ULT, src[0], src[1]);
2127 case nir_op_feq: return emit_cmp(ctx, alu, DXIL_FCMP_OEQ, src[0], src[1]);
2128 case nir_op_fneu: return emit_cmp(ctx, alu, DXIL_FCMP_UNE, src[0], src[1]);
2129 case nir_op_flt: return emit_cmp(ctx, alu, DXIL_FCMP_OLT, src[0], src[1]);
2130 case nir_op_fge: return emit_cmp(ctx, alu, DXIL_FCMP_OGE, src[0], src[1]);
2131 case nir_op_bcsel: return emit_select(ctx, alu, src[0], src[1], src[2]);
2132 case nir_op_ftrunc: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_Z, src[0]);
2133 case nir_op_fabs: return emit_unary_intin(ctx, alu, DXIL_INTR_FABS, src[0]);
2134 case nir_op_fcos: return emit_unary_intin(ctx, alu, DXIL_INTR_FCOS, src[0]);
2135 case nir_op_fsin: return emit_unary_intin(ctx, alu, DXIL_INTR_FSIN, src[0]);
2136 case nir_op_fceil: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_PI, src[0]);
2137 case nir_op_fexp2: return emit_unary_intin(ctx, alu, DXIL_INTR_FEXP2, src[0]);
2138 case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]);
2139 case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]);
2140 case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]);
2141 case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]);
2142 case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]);
2143
2144 case nir_op_fddx:
2145 case nir_op_fddx_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_COARSE, src[0]);
2146 case nir_op_fddx_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_FINE, src[0]);
2147 case nir_op_fddy:
2148 case nir_op_fddy_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_COARSE, src[0]);
2149 case nir_op_fddy_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_FINE, src[0]);
2150
2151 case nir_op_fround_even: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NE, src[0]);
2152 case nir_op_frcp: {
2153 const struct dxil_value *one = dxil_module_get_float_const(&ctx->mod, 1.0f);
2154 return emit_binop(ctx, alu, DXIL_BINOP_SDIV, one, src[0]);
2155 }
2156 case nir_op_fsat: return emit_unary_intin(ctx, alu, DXIL_INTR_SATURATE, src[0]);
2157 case nir_op_bit_count: return emit_unary_intin(ctx, alu, DXIL_INTR_COUNTBITS, src[0]);
2158 case nir_op_ufind_msb: return emit_ufind_msb(ctx, alu, src[0]);
2159 case nir_op_imax: return emit_binary_intin(ctx, alu, DXIL_INTR_IMAX, src[0], src[1]);
2160 case nir_op_imin: return emit_binary_intin(ctx, alu, DXIL_INTR_IMIN, src[0], src[1]);
2161 case nir_op_umax: return emit_binary_intin(ctx, alu, DXIL_INTR_UMAX, src[0], src[1]);
2162 case nir_op_umin: return emit_binary_intin(ctx, alu, DXIL_INTR_UMIN, src[0], src[1]);
2163 case nir_op_frsq: return emit_unary_intin(ctx, alu, DXIL_INTR_RSQRT, src[0]);
2164 case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]);
2165 case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]);
2166 case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]);
2167 case nir_op_ffma: return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]);
2168
2169 case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0]);
2170 case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0]);
2171
2172 case nir_op_b2i16:
2173 case nir_op_i2i16:
2174 case nir_op_f2i16:
2175 case nir_op_f2u16:
2176 case nir_op_u2u16:
2177 case nir_op_u2f16:
2178 case nir_op_i2f16:
2179 case nir_op_f2f16_rtz:
2180 case nir_op_b2i32:
2181 case nir_op_f2f32:
2182 case nir_op_f2i32:
2183 case nir_op_f2u32:
2184 case nir_op_i2f32:
2185 case nir_op_i2i32:
2186 case nir_op_u2f32:
2187 case nir_op_u2u32:
2188 case nir_op_b2i64:
2189 case nir_op_f2f64:
2190 case nir_op_f2i64:
2191 case nir_op_f2u64:
2192 case nir_op_i2f64:
2193 case nir_op_i2i64:
2194 case nir_op_u2f64:
2195 case nir_op_u2u64:
2196 return emit_cast(ctx, alu, src[0]);
2197
2198 case nir_op_f2b32: return emit_f2b32(ctx, alu, src[0]);
2199 case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]);
2200 case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]);
2201 default:
2202 NIR_INSTR_UNSUPPORTED(&alu->instr);
2203 assert("Unimplemented ALU instruction");
2204 return false;
2205 }
2206 }
2207
2208 static const struct dxil_value *
load_ubo(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * offset,enum overload_type overload)2209 load_ubo(struct ntd_context *ctx, const struct dxil_value *handle,
2210 const struct dxil_value *offset, enum overload_type overload)
2211 {
2212 assert(handle && offset);
2213
2214 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CBUFFER_LOAD_LEGACY);
2215 if (!opcode)
2216 return NULL;
2217
2218 const struct dxil_value *args[] = {
2219 opcode, handle, offset
2220 };
2221
2222 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cbufferLoadLegacy", overload);
2223 if (!func)
2224 return NULL;
2225 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2226 }
2227
2228 static bool
emit_barrier(struct ntd_context * ctx,nir_intrinsic_instr * intr)2229 emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2230 {
2231 const struct dxil_value *opcode, *mode;
2232 const struct dxil_func *func;
2233 uint32_t flags = 0;
2234
2235 if (nir_intrinsic_execution_scope(intr) == NIR_SCOPE_WORKGROUP)
2236 flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP;
2237
2238 nir_variable_mode modes = nir_intrinsic_memory_modes(intr);
2239 nir_scope mem_scope = nir_intrinsic_memory_scope(intr);
2240
2241 /* Currently vtn uses uniform to indicate image memory, which DXIL considers global */
2242 if (modes & nir_var_uniform)
2243 modes |= nir_var_mem_global;
2244
2245 if (modes & (nir_var_mem_ssbo | nir_var_mem_global)) {
2246 if (mem_scope > NIR_SCOPE_WORKGROUP)
2247 flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL;
2248 else
2249 flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP;
2250 }
2251
2252 if (modes & nir_var_mem_shared)
2253 flags |= DXIL_BARRIER_MODE_GROUPSHARED_MEM_FENCE;
2254
2255 func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE);
2256 if (!func)
2257 return false;
2258
2259 opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER);
2260 if (!opcode)
2261 return false;
2262
2263 mode = dxil_module_get_int32_const(&ctx->mod, flags);
2264 if (!mode)
2265 return false;
2266
2267 const struct dxil_value *args[] = { opcode, mode };
2268
2269 return dxil_emit_call_void(&ctx->mod, func,
2270 args, ARRAY_SIZE(args));
2271 }
2272
2273 static bool
emit_load_global_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)2274 emit_load_global_invocation_id(struct ntd_context *ctx,
2275 nir_intrinsic_instr *intr)
2276 {
2277 assert(intr->dest.is_ssa);
2278 nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
2279
2280 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2281 if (comps & (1 << i)) {
2282 const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
2283 if (!idx)
2284 return false;
2285 const struct dxil_value *globalid = emit_threadid_call(ctx, idx);
2286
2287 if (!globalid)
2288 return false;
2289
2290 store_dest_value(ctx, &intr->dest, i, globalid);
2291 }
2292 }
2293 return true;
2294 }
2295
2296 static bool
emit_load_local_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)2297 emit_load_local_invocation_id(struct ntd_context *ctx,
2298 nir_intrinsic_instr *intr)
2299 {
2300 assert(intr->dest.is_ssa);
2301 nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
2302
2303 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2304 if (comps & (1 << i)) {
2305 const struct dxil_value
2306 *idx = dxil_module_get_int32_const(&ctx->mod, i);
2307 if (!idx)
2308 return false;
2309 const struct dxil_value
2310 *threadidingroup = emit_threadidingroup_call(ctx, idx);
2311 if (!threadidingroup)
2312 return false;
2313 store_dest_value(ctx, &intr->dest, i, threadidingroup);
2314 }
2315 }
2316 return true;
2317 }
2318
2319 static bool
emit_load_local_invocation_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)2320 emit_load_local_invocation_index(struct ntd_context *ctx,
2321 nir_intrinsic_instr *intr)
2322 {
2323 assert(intr->dest.is_ssa);
2324
2325 const struct dxil_value
2326 *flattenedthreadidingroup = emit_flattenedthreadidingroup_call(ctx);
2327 if (!flattenedthreadidingroup)
2328 return false;
2329 store_dest_value(ctx, &intr->dest, 0, flattenedthreadidingroup);
2330
2331 return true;
2332 }
2333
2334 static bool
emit_load_local_workgroup_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)2335 emit_load_local_workgroup_id(struct ntd_context *ctx,
2336 nir_intrinsic_instr *intr)
2337 {
2338 assert(intr->dest.is_ssa);
2339 nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
2340
2341 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2342 if (comps & (1 << i)) {
2343 const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
2344 if (!idx)
2345 return false;
2346 const struct dxil_value *groupid = emit_groupid_call(ctx, idx);
2347 if (!groupid)
2348 return false;
2349 store_dest_value(ctx, &intr->dest, i, groupid);
2350 }
2351 }
2352 return true;
2353 }
2354
2355 static bool
emit_load_unary_external_function(struct ntd_context * ctx,nir_intrinsic_instr * intr,const char * name,int32_t dxil_intr)2356 emit_load_unary_external_function(struct ntd_context *ctx,
2357 nir_intrinsic_instr *intr, const char *name,
2358 int32_t dxil_intr)
2359 {
2360 const struct dxil_func *func =
2361 dxil_get_function(&ctx->mod, name, DXIL_I32);
2362 if (!func)
2363 return false;
2364
2365 const struct dxil_value *opcode =
2366 dxil_module_get_int32_const(&ctx->mod, dxil_intr);
2367 if (!opcode)
2368 return false;
2369
2370 const struct dxil_value *args[] = {opcode};
2371
2372 const struct dxil_value *value =
2373 dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2374 store_dest_value(ctx, &intr->dest, 0, value);
2375
2376 return true;
2377 }
2378
2379 static const struct dxil_value *
get_int32_undef(struct dxil_module * m)2380 get_int32_undef(struct dxil_module *m)
2381 {
2382 const struct dxil_type *int32_type =
2383 dxil_module_get_int_type(m, 32);
2384 if (!int32_type)
2385 return NULL;
2386
2387 return dxil_module_get_undef(m, int32_type);
2388 }
2389
2390 static const struct dxil_value *
emit_gep_for_index(struct ntd_context * ctx,const nir_variable * var,const struct dxil_value * index)2391 emit_gep_for_index(struct ntd_context *ctx, const nir_variable *var,
2392 const struct dxil_value *index)
2393 {
2394 assert(var->data.mode == nir_var_shader_temp);
2395
2396 struct hash_entry *he = _mesa_hash_table_search(ctx->consts, var);
2397 assert(he != NULL);
2398 const struct dxil_value *ptr = he->data;
2399
2400 const struct dxil_value *zero = dxil_module_get_int32_const(&ctx->mod, 0);
2401 if (!zero)
2402 return NULL;
2403
2404 const struct dxil_value *ops[] = { ptr, zero, index };
2405 return dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
2406 }
2407
2408 static const struct dxil_value *
get_ubo_ssbo_handle(struct ntd_context * ctx,nir_src * src,enum dxil_resource_class class,unsigned base_binding)2409 get_ubo_ssbo_handle(struct ntd_context *ctx, nir_src *src, enum dxil_resource_class class, unsigned base_binding)
2410 {
2411 /* This source might be one of:
2412 * 1. Constant resource index - just look it up in precomputed handle arrays
2413 * If it's null in that array, create a handle, and store the result
2414 * 2. A handle from load_vulkan_descriptor - just get the stored SSA value
2415 * 3. Dynamic resource index - create a handle for it here
2416 */
2417 assert(src->ssa->num_components == 1 && src->ssa->bit_size == 32);
2418 nir_const_value *const_block_index = nir_src_as_const_value(*src);
2419 const struct dxil_value **handle_entry = NULL;
2420 if (const_block_index) {
2421 assert(!ctx->opts->vulkan_environment);
2422 switch (class) {
2423 case DXIL_RESOURCE_CLASS_CBV:
2424 handle_entry = &ctx->cbv_handles[const_block_index->u32];
2425 break;
2426 case DXIL_RESOURCE_CLASS_UAV:
2427 handle_entry = &ctx->uav_handles[const_block_index->u32];
2428 break;
2429 case DXIL_RESOURCE_CLASS_SRV:
2430 handle_entry = &ctx->srv_handles[const_block_index->u32];
2431 break;
2432 default:
2433 unreachable("Unexpected resource class");
2434 }
2435 }
2436
2437 if (handle_entry && *handle_entry)
2438 return *handle_entry;
2439
2440 const struct dxil_value *value = get_src_ssa(ctx, src->ssa, 0);
2441 if (ctx->opts->vulkan_environment) {
2442 return value;
2443 }
2444
2445 const struct dxil_value *handle = emit_createhandle_call(ctx, class,
2446 get_resource_id(ctx, class, 0, base_binding), value, !const_block_index);
2447 if (handle_entry)
2448 *handle_entry = handle;
2449
2450 return handle;
2451 }
2452
2453 static bool
emit_load_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)2454 emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2455 {
2456 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
2457
2458 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
2459 enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
2460 if (var && var->data.access & ACCESS_NON_WRITEABLE)
2461 class = DXIL_RESOURCE_CLASS_SRV;
2462
2463 const struct dxil_value *handle = get_ubo_ssbo_handle(ctx, &intr->src[0], class, 0);
2464 const struct dxil_value *offset =
2465 get_src(ctx, &intr->src[1], 0, nir_type_uint);
2466 if (!int32_undef || !handle || !offset)
2467 return false;
2468
2469 assert(nir_src_bit_size(intr->src[0]) == 32);
2470 assert(nir_intrinsic_dest_components(intr) <= 4);
2471
2472 const struct dxil_value *coord[2] = {
2473 offset,
2474 int32_undef
2475 };
2476
2477 const struct dxil_value *load = emit_bufferload_call(ctx, handle, coord, DXIL_I32);
2478 if (!load)
2479 return false;
2480
2481 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2482 const struct dxil_value *val =
2483 dxil_emit_extractval(&ctx->mod, load, i);
2484 if (!val)
2485 return false;
2486 store_dest_value(ctx, &intr->dest, i, val);
2487 }
2488 return true;
2489 }
2490
2491 static bool
emit_store_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)2492 emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2493 {
2494 const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[1], DXIL_RESOURCE_CLASS_UAV, 0);
2495 const struct dxil_value *offset =
2496 get_src(ctx, &intr->src[2], 0, nir_type_uint);
2497 if (!handle || !offset)
2498 return false;
2499
2500 assert(nir_src_bit_size(intr->src[0]) == 32);
2501 unsigned num_components = nir_src_num_components(intr->src[0]);
2502 assert(num_components <= 4);
2503 const struct dxil_value *value[4];
2504 for (unsigned i = 0; i < num_components; ++i) {
2505 value[i] = get_src(ctx, &intr->src[0], i, nir_type_uint);
2506 if (!value[i])
2507 return false;
2508 }
2509
2510 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
2511 if (!int32_undef)
2512 return false;
2513
2514 const struct dxil_value *coord[2] = {
2515 offset,
2516 int32_undef
2517 };
2518
2519 for (int i = num_components; i < 4; ++i)
2520 value[i] = int32_undef;
2521
2522 const struct dxil_value *write_mask =
2523 dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
2524 if (!write_mask)
2525 return false;
2526
2527 return emit_bufferstore_call(ctx, handle, coord, value, write_mask, DXIL_I32);
2528 }
2529
2530 static bool
emit_store_ssbo_masked(struct ntd_context * ctx,nir_intrinsic_instr * intr)2531 emit_store_ssbo_masked(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2532 {
2533 const struct dxil_value *value =
2534 get_src(ctx, &intr->src[0], 0, nir_type_uint);
2535 const struct dxil_value *mask =
2536 get_src(ctx, &intr->src[1], 0, nir_type_uint);
2537 const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[2], DXIL_RESOURCE_CLASS_UAV, 0);
2538 const struct dxil_value *offset =
2539 get_src(ctx, &intr->src[3], 0, nir_type_uint);
2540 if (!value || !mask || !handle || !offset)
2541 return false;
2542
2543 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
2544 if (!int32_undef)
2545 return false;
2546
2547 const struct dxil_value *coord[3] = {
2548 offset, int32_undef, int32_undef
2549 };
2550
2551 return
2552 emit_atomic_binop(ctx, handle, DXIL_ATOMIC_AND, coord, mask) != NULL &&
2553 emit_atomic_binop(ctx, handle, DXIL_ATOMIC_OR, coord, value) != NULL;
2554 }
2555
2556 static bool
emit_store_shared(struct ntd_context * ctx,nir_intrinsic_instr * intr)2557 emit_store_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2558 {
2559 const struct dxil_value *zero, *index;
2560
2561 /* All shared mem accesses should have been lowered to scalar 32bit
2562 * accesses.
2563 */
2564 assert(nir_src_bit_size(intr->src[0]) == 32);
2565 assert(nir_src_num_components(intr->src[0]) == 1);
2566
2567 zero = dxil_module_get_int32_const(&ctx->mod, 0);
2568 if (!zero)
2569 return false;
2570
2571 if (intr->intrinsic == nir_intrinsic_store_shared_dxil)
2572 index = get_src(ctx, &intr->src[1], 0, nir_type_uint);
2573 else
2574 index = get_src(ctx, &intr->src[2], 0, nir_type_uint);
2575 if (!index)
2576 return false;
2577
2578 const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
2579 const struct dxil_value *ptr, *value;
2580
2581 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
2582 if (!ptr)
2583 return false;
2584
2585 value = get_src(ctx, &intr->src[0], 0, nir_type_uint);
2586 if (!value)
2587 return false;
2588
2589 if (intr->intrinsic == nir_intrinsic_store_shared_dxil)
2590 return dxil_emit_store(&ctx->mod, value, ptr, 4, false);
2591
2592 const struct dxil_value *mask = get_src(ctx, &intr->src[1], 0, nir_type_uint);
2593 if (!mask)
2594 return false;
2595
2596 if (!dxil_emit_atomicrmw(&ctx->mod, mask, ptr, DXIL_RMWOP_AND, false,
2597 DXIL_ATOMIC_ORDERING_ACQREL,
2598 DXIL_SYNC_SCOPE_CROSSTHREAD))
2599 return false;
2600
2601 if (!dxil_emit_atomicrmw(&ctx->mod, value, ptr, DXIL_RMWOP_OR, false,
2602 DXIL_ATOMIC_ORDERING_ACQREL,
2603 DXIL_SYNC_SCOPE_CROSSTHREAD))
2604 return false;
2605
2606 return true;
2607 }
2608
2609 static bool
emit_store_scratch(struct ntd_context * ctx,nir_intrinsic_instr * intr)2610 emit_store_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2611 {
2612 const struct dxil_value *zero, *index;
2613
2614 /* All scratch mem accesses should have been lowered to scalar 32bit
2615 * accesses.
2616 */
2617 assert(nir_src_bit_size(intr->src[0]) == 32);
2618 assert(nir_src_num_components(intr->src[0]) == 1);
2619
2620 zero = dxil_module_get_int32_const(&ctx->mod, 0);
2621 if (!zero)
2622 return false;
2623
2624 index = get_src(ctx, &intr->src[1], 0, nir_type_uint);
2625 if (!index)
2626 return false;
2627
2628 const struct dxil_value *ops[] = { ctx->scratchvars, zero, index };
2629 const struct dxil_value *ptr, *value;
2630
2631 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
2632 if (!ptr)
2633 return false;
2634
2635 value = get_src(ctx, &intr->src[0], 0, nir_type_uint);
2636 if (!value)
2637 return false;
2638
2639 return dxil_emit_store(&ctx->mod, value, ptr, 4, false);
2640 }
2641
2642 static bool
emit_load_ubo(struct ntd_context * ctx,nir_intrinsic_instr * intr)2643 emit_load_ubo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2644 {
2645 const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, 0);
2646 if (!handle)
2647 return false;
2648
2649 const struct dxil_value *offset;
2650 nir_const_value *const_offset = nir_src_as_const_value(intr->src[1]);
2651 if (const_offset) {
2652 offset = dxil_module_get_int32_const(&ctx->mod, const_offset->i32 >> 4);
2653 } else {
2654 const struct dxil_value *offset_src = get_src(ctx, &intr->src[1], 0, nir_type_uint);
2655 const struct dxil_value *c4 = dxil_module_get_int32_const(&ctx->mod, 4);
2656 if (!offset_src || !c4)
2657 return false;
2658
2659 offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ASHR, offset_src, c4, 0);
2660 }
2661
2662 const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_F32);
2663
2664 if (!agg)
2665 return false;
2666
2667 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
2668 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, agg, i);
2669 store_dest(ctx, &intr->dest, i, retval,
2670 nir_dest_bit_size(intr->dest) > 1 ? nir_type_float : nir_type_bool);
2671 }
2672 return true;
2673 }
2674
2675 static bool
emit_load_ubo_dxil(struct ntd_context * ctx,nir_intrinsic_instr * intr)2676 emit_load_ubo_dxil(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2677 {
2678 assert(nir_dest_num_components(intr->dest) <= 4);
2679 assert(nir_dest_bit_size(intr->dest) == 32);
2680
2681 const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, 0);
2682 const struct dxil_value *offset =
2683 get_src(ctx, &intr->src[1], 0, nir_type_uint);
2684
2685 if (!handle || !offset)
2686 return false;
2687
2688 const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_I32);
2689 if (!agg)
2690 return false;
2691
2692 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); i++)
2693 store_dest_value(ctx, &intr->dest, i,
2694 dxil_emit_extractval(&ctx->mod, agg, i));
2695
2696 return true;
2697 }
2698
2699 static bool
emit_store_output(struct ntd_context * ctx,nir_intrinsic_instr * intr,nir_variable * output)2700 emit_store_output(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2701 nir_variable *output)
2702 {
2703 nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(output->type));
2704 enum overload_type overload = DXIL_F32;
2705 if (output->data.compact)
2706 out_type = nir_type_float;
2707 else
2708 overload = get_overload(out_type, glsl_get_bit_size(output->type));
2709 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.storeOutput", overload);
2710
2711 if (!func)
2712 return false;
2713
2714 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_STORE_OUTPUT);
2715 const struct dxil_value *output_id = dxil_module_get_int32_const(&ctx->mod, (int)output->data.driver_location);
2716 const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0);
2717
2718 bool success = true;
2719 if (output->data.compact) {
2720 nir_deref_instr *array_deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
2721 unsigned array_index = nir_src_as_uint(array_deref->arr.index);
2722
2723 const struct dxil_value *col = dxil_module_get_int8_const(&ctx->mod, array_index);
2724 const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, out_type);
2725 if (!col || !value)
2726 return false;
2727
2728 const struct dxil_value *args[] = {
2729 opcode, output_id, row, col, value
2730 };
2731 success = dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
2732 } else {
2733 uint32_t writemask = nir_intrinsic_write_mask(intr);
2734 for (unsigned i = 0; i < nir_src_num_components(intr->src[1]) && success; ++i) {
2735 if (writemask & (1 << i)) {
2736 const struct dxil_value *col = dxil_module_get_int8_const(&ctx->mod, i);
2737 const struct dxil_value *value = get_src(ctx, &intr->src[1], i, out_type);
2738 if (!col || !value)
2739 return false;
2740
2741 const struct dxil_value *args[] = {
2742 opcode, output_id, row, col, value
2743 };
2744 success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
2745 }
2746 }
2747 }
2748 return success;
2749 }
2750
2751 static bool
emit_store_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)2752 emit_store_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2753 {
2754 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2755 nir_variable *var = nir_deref_instr_get_variable(deref);
2756
2757 switch (var->data.mode) {
2758 case nir_var_shader_out:
2759 return emit_store_output(ctx, intr, var);
2760
2761 default:
2762 unreachable("unsupported nir_variable_mode");
2763 }
2764 }
2765
2766 static bool
emit_load_input_array(struct ntd_context * ctx,nir_intrinsic_instr * intr,nir_variable * var,nir_src * index)2767 emit_load_input_array(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable *var, nir_src *index)
2768 {
2769 assert(var);
2770 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LOAD_INPUT);
2771 const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, var->data.driver_location);
2772 const struct dxil_value *vertex_id;
2773 const struct dxil_value *row;
2774
2775 if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {
2776 vertex_id = get_src(ctx, index, 0, nir_type_int);
2777 row = dxil_module_get_int32_const(&ctx->mod, 0);
2778 } else {
2779 const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
2780 vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);
2781 row = get_src(ctx, index, 0, nir_type_int);
2782 }
2783
2784 if (!opcode || !input_id || !vertex_id || !row)
2785 return false;
2786
2787 nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(glsl_get_array_element(var->type)));
2788 enum overload_type overload = get_overload(out_type, glsl_get_bit_size(glsl_get_array_element(var->type)));
2789
2790 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.loadInput", overload);
2791
2792 if (!func)
2793 return false;
2794
2795 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
2796 const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, i);
2797 if (!comp)
2798 return false;
2799
2800 const struct dxil_value *args[] = {
2801 opcode, input_id, row, comp, vertex_id
2802 };
2803
2804 const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2805 if (!retval)
2806 return false;
2807 store_dest(ctx, &intr->dest, i, retval, out_type);
2808 }
2809 return true;
2810 }
2811
2812 static bool
emit_load_compact_input_array(struct ntd_context * ctx,nir_intrinsic_instr * intr,nir_variable * var,nir_deref_instr * deref)2813 emit_load_compact_input_array(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable *var, nir_deref_instr *deref)
2814 {
2815 assert(var);
2816 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LOAD_INPUT);
2817 const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, var->data.driver_location);
2818 const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0);
2819 const struct dxil_value *vertex_id;
2820
2821 nir_src *col = &deref->arr.index;
2822 nir_src_is_const(*col);
2823
2824 if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {
2825 nir_deref_instr *deref_parent = nir_deref_instr_parent(deref);
2826 assert(deref_parent->deref_type == nir_deref_type_array);
2827
2828 vertex_id = get_src(ctx, &deref_parent->arr.index, 0, nir_type_int);
2829 } else {
2830 const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
2831 vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);
2832 }
2833
2834 if (!opcode || !input_id || !row || !vertex_id)
2835 return false;
2836
2837 nir_alu_type out_type = nir_type_float;
2838 enum overload_type overload = get_overload(out_type, 32);
2839
2840 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.loadInput", overload);
2841
2842 if (!func)
2843 return false;
2844
2845 const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, nir_src_as_int(*col));
2846 if (!comp)
2847 return false;
2848
2849 const struct dxil_value *args[] = {
2850 opcode, input_id, row, comp, vertex_id
2851 };
2852
2853 const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2854 if (!retval)
2855 return false;
2856 store_dest(ctx, &intr->dest, 0, retval, out_type);
2857 return true;
2858 }
2859
2860 static bool
emit_load_input_interpolated(struct ntd_context * ctx,nir_intrinsic_instr * intr,nir_variable * var)2861 emit_load_input_interpolated(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable *var)
2862 {
2863 assert(var);
2864 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LOAD_INPUT);
2865 const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, var->data.driver_location);
2866 const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0);
2867 const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
2868 const struct dxil_value *vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);
2869
2870 if (!opcode || !input_id || !row || !int32_type || !vertex_id)
2871 return false;
2872
2873 nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(var->type));
2874 enum overload_type overload = get_overload(out_type, glsl_get_bit_size(var->type));
2875
2876 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.loadInput", overload);
2877
2878 if (!func)
2879 return false;
2880
2881 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
2882 const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, i);
2883
2884 const struct dxil_value *args[] = {
2885 opcode, input_id, row, comp, vertex_id
2886 };
2887
2888 const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2889 if (!retval)
2890 return false;
2891 store_dest(ctx, &intr->dest, i, retval, out_type);
2892 }
2893 return true;
2894 }
2895
2896 static bool
emit_load_input_flat(struct ntd_context * ctx,nir_intrinsic_instr * intr,nir_variable * var)2897 emit_load_input_flat(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable* var)
2898 {
2899 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATTRIBUTE_AT_VERTEX);
2900 const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, (int)var->data.driver_location);
2901 const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0);
2902 const struct dxil_value *vertex_id = dxil_module_get_int8_const(&ctx->mod, ctx->opts->provoking_vertex);
2903
2904 nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(var->type));
2905 enum overload_type overload = get_overload(out_type, glsl_get_bit_size(var->type));
2906
2907 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.attributeAtVertex", overload);
2908 if (!func)
2909 return false;
2910
2911 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
2912 const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, i);
2913 const struct dxil_value *args[] = {
2914 opcode, input_id, row, comp, vertex_id
2915 };
2916
2917 const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2918 if (!retval)
2919 return false;
2920
2921 store_dest(ctx, &intr->dest, i, retval, out_type);
2922 }
2923 return true;
2924 }
2925
2926 static bool
emit_load_input(struct ntd_context * ctx,nir_intrinsic_instr * intr,nir_variable * input)2927 emit_load_input(struct ntd_context *ctx, nir_intrinsic_instr *intr,
2928 nir_variable *input)
2929 {
2930 if (ctx->mod.shader_kind != DXIL_PIXEL_SHADER ||
2931 input->data.interpolation != INTERP_MODE_FLAT ||
2932 !ctx->opts->interpolate_at_vertex ||
2933 ctx->opts->provoking_vertex == 0 ||
2934 glsl_type_is_integer(input->type))
2935 return emit_load_input_interpolated(ctx, intr, input);
2936 else
2937 return emit_load_input_flat(ctx, intr, input);
2938 }
2939
2940 static bool
emit_load_ptr(struct ntd_context * ctx,nir_intrinsic_instr * intr)2941 emit_load_ptr(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2942 {
2943 struct nir_variable *var =
2944 nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
2945
2946 const struct dxil_value *index =
2947 get_src(ctx, &intr->src[1], 0, nir_type_uint);
2948 if (!index)
2949 return false;
2950
2951 const struct dxil_value *ptr = emit_gep_for_index(ctx, var, index);
2952 if (!ptr)
2953 return false;
2954
2955 const struct dxil_value *retval =
2956 dxil_emit_load(&ctx->mod, ptr, 4, false);
2957 if (!retval)
2958 return false;
2959
2960 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
2961 return true;
2962 }
2963
2964 static bool
emit_load_shared(struct ntd_context * ctx,nir_intrinsic_instr * intr)2965 emit_load_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2966 {
2967 const struct dxil_value *zero, *index;
2968 unsigned bit_size = nir_dest_bit_size(intr->dest);
2969 unsigned align = bit_size / 8;
2970
2971 /* All shared mem accesses should have been lowered to scalar 32bit
2972 * accesses.
2973 */
2974 assert(bit_size == 32);
2975 assert(nir_dest_num_components(intr->dest) == 1);
2976
2977 zero = dxil_module_get_int32_const(&ctx->mod, 0);
2978 if (!zero)
2979 return false;
2980
2981 index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
2982 if (!index)
2983 return false;
2984
2985 const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
2986 const struct dxil_value *ptr, *retval;
2987
2988 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
2989 if (!ptr)
2990 return false;
2991
2992 retval = dxil_emit_load(&ctx->mod, ptr, align, false);
2993 if (!retval)
2994 return false;
2995
2996 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
2997 return true;
2998 }
2999
3000 static bool
emit_load_scratch(struct ntd_context * ctx,nir_intrinsic_instr * intr)3001 emit_load_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3002 {
3003 const struct dxil_value *zero, *index;
3004 unsigned bit_size = nir_dest_bit_size(intr->dest);
3005 unsigned align = bit_size / 8;
3006
3007 /* All scratch mem accesses should have been lowered to scalar 32bit
3008 * accesses.
3009 */
3010 assert(bit_size == 32);
3011 assert(nir_dest_num_components(intr->dest) == 1);
3012
3013 zero = dxil_module_get_int32_const(&ctx->mod, 0);
3014 if (!zero)
3015 return false;
3016
3017 index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3018 if (!index)
3019 return false;
3020
3021 const struct dxil_value *ops[] = { ctx->scratchvars, zero, index };
3022 const struct dxil_value *ptr, *retval;
3023
3024 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3025 if (!ptr)
3026 return false;
3027
3028 retval = dxil_emit_load(&ctx->mod, ptr, align, false);
3029 if (!retval)
3030 return false;
3031
3032 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3033 return true;
3034 }
3035
3036 static bool
emit_load_deref(struct ntd_context * ctx,nir_intrinsic_instr * intr)3037 emit_load_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3038 {
3039 assert(intr->src[0].is_ssa);
3040 nir_deref_instr *deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
3041 nir_variable *var = nir_deref_instr_get_variable(deref);
3042
3043 switch (var->data.mode) {
3044 case nir_var_shader_in:
3045 if (glsl_type_is_array(var->type)) {
3046 if (var->data.compact)
3047 return emit_load_compact_input_array(ctx, intr, var, deref);
3048 else
3049 return emit_load_input_array(ctx, intr, var, &deref->arr.index);
3050 }
3051 return emit_load_input(ctx, intr, var);
3052
3053 default:
3054 unreachable("unsupported nir_variable_mode");
3055 }
3056 }
3057
3058 static bool
emit_discard_if_with_value(struct ntd_context * ctx,const struct dxil_value * value)3059 emit_discard_if_with_value(struct ntd_context *ctx, const struct dxil_value *value)
3060 {
3061 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DISCARD);
3062 if (!opcode)
3063 return false;
3064
3065 const struct dxil_value *args[] = {
3066 opcode,
3067 value
3068 };
3069
3070 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.discard", DXIL_NONE);
3071 if (!func)
3072 return false;
3073
3074 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3075 }
3076
3077 static bool
emit_discard_if(struct ntd_context * ctx,nir_intrinsic_instr * intr)3078 emit_discard_if(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3079 {
3080 const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_bool);
3081 if (!value)
3082 return false;
3083
3084 return emit_discard_if_with_value(ctx, value);
3085 }
3086
3087 static bool
emit_discard(struct ntd_context * ctx)3088 emit_discard(struct ntd_context *ctx)
3089 {
3090 const struct dxil_value *value = dxil_module_get_int1_const(&ctx->mod, true);
3091 return emit_discard_if_with_value(ctx, value);
3092 }
3093
3094 static bool
emit_emit_vertex(struct ntd_context * ctx,nir_intrinsic_instr * intr)3095 emit_emit_vertex(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3096 {
3097 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_EMIT_STREAM);
3098 const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
3099 if (!opcode || !stream_id)
3100 return false;
3101
3102 const struct dxil_value *args[] = {
3103 opcode,
3104 stream_id
3105 };
3106
3107 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.emitStream", DXIL_NONE);
3108 if (!func)
3109 return false;
3110
3111 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3112 }
3113
3114 static bool
emit_end_primitive(struct ntd_context * ctx,nir_intrinsic_instr * intr)3115 emit_end_primitive(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3116 {
3117 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CUT_STREAM);
3118 const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
3119 if (!opcode || !stream_id)
3120 return false;
3121
3122 const struct dxil_value *args[] = {
3123 opcode,
3124 stream_id
3125 };
3126
3127 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cutStream", DXIL_NONE);
3128 if (!func)
3129 return false;
3130
3131 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3132 }
3133
3134 static bool
emit_image_store(struct ntd_context * ctx,nir_intrinsic_instr * intr)3135 emit_image_store(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3136 {
3137 const struct dxil_value *handle;
3138 bool is_array = false;
3139 if (ctx->opts->vulkan_environment) {
3140 assert(intr->intrinsic == nir_intrinsic_image_deref_store);
3141 handle = get_src_ssa(ctx, intr->src[0].ssa, 0);
3142 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
3143 } else {
3144 assert(intr->intrinsic == nir_intrinsic_image_store);
3145 int binding = nir_src_as_int(intr->src[0]);
3146 is_array = nir_intrinsic_image_array(intr);
3147 handle = ctx->uav_handles[binding];
3148 }
3149 if (!handle)
3150 return false;
3151
3152 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3153 if (!int32_undef)
3154 return false;
3155
3156 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
3157 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_store ?
3158 nir_intrinsic_image_dim(intr) :
3159 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type);
3160 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
3161 if (is_array)
3162 ++num_coords;
3163
3164 assert(num_coords <= nir_src_num_components(intr->src[1]));
3165 for (unsigned i = 0; i < num_coords; ++i) {
3166 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
3167 if (!coord[i])
3168 return false;
3169 }
3170
3171 nir_alu_type in_type = nir_intrinsic_src_type(intr);
3172 enum overload_type overload = get_overload(in_type, 32);
3173
3174 assert(nir_src_bit_size(intr->src[3]) == 32);
3175 unsigned num_components = nir_src_num_components(intr->src[3]);
3176 assert(num_components <= 4);
3177 const struct dxil_value *value[4];
3178 for (unsigned i = 0; i < num_components; ++i) {
3179 value[i] = get_src(ctx, &intr->src[3], i, in_type);
3180 if (!value[i])
3181 return false;
3182 }
3183
3184 for (int i = num_components; i < 4; ++i)
3185 value[i] = int32_undef;
3186
3187 const struct dxil_value *write_mask =
3188 dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
3189 if (!write_mask)
3190 return false;
3191
3192 if (image_dim == GLSL_SAMPLER_DIM_BUF) {
3193 coord[1] = int32_undef;
3194 return emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
3195 } else
3196 return emit_texturestore_call(ctx, handle, coord, value, write_mask, overload);
3197 }
3198
3199 static bool
emit_image_load(struct ntd_context * ctx,nir_intrinsic_instr * intr)3200 emit_image_load(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3201 {
3202 const struct dxil_value *handle;
3203 bool is_array = false;
3204 if (ctx->opts->vulkan_environment) {
3205 assert(intr->intrinsic == nir_intrinsic_image_deref_load);
3206 handle = get_src_ssa(ctx, intr->src[0].ssa, 0);
3207 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
3208 } else {
3209 assert(intr->intrinsic == nir_intrinsic_image_load);
3210 int binding = nir_src_as_int(intr->src[0]);
3211 is_array = nir_intrinsic_image_array(intr);
3212 handle = ctx->uav_handles[binding];
3213 }
3214 if (!handle)
3215 return false;
3216
3217 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3218 if (!int32_undef)
3219 return false;
3220
3221 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
3222 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_load ?
3223 nir_intrinsic_image_dim(intr) :
3224 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type);
3225 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
3226 if (is_array)
3227 ++num_coords;
3228
3229 assert(num_coords <= nir_src_num_components(intr->src[1]));
3230 for (unsigned i = 0; i < num_coords; ++i) {
3231 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
3232 if (!coord[i])
3233 return false;
3234 }
3235
3236 nir_alu_type out_type = nir_intrinsic_dest_type(intr);
3237 enum overload_type overload = get_overload(out_type, 32);
3238
3239 const struct dxil_value *load_result;
3240 if (image_dim == GLSL_SAMPLER_DIM_BUF) {
3241 coord[1] = int32_undef;
3242 load_result = emit_bufferload_call(ctx, handle, coord, overload);
3243 } else
3244 load_result = emit_textureload_call(ctx, handle, coord, overload);
3245
3246 if (!load_result)
3247 return false;
3248
3249 assert(nir_dest_bit_size(intr->dest) == 32);
3250 unsigned num_components = nir_dest_num_components(intr->dest);
3251 assert(num_components <= 4);
3252 for (unsigned i = 0; i < num_components; ++i) {
3253 const struct dxil_value *component = dxil_emit_extractval(&ctx->mod, load_result, i);
3254 if (!component)
3255 return false;
3256 store_dest(ctx, &intr->dest, i, component, out_type);
3257 }
3258
3259 if (num_components > 1)
3260 ctx->mod.feats.typed_uav_load_additional_formats = true;
3261
3262 return true;
3263 }
3264
3265 struct texop_parameters {
3266 const struct dxil_value *tex;
3267 const struct dxil_value *sampler;
3268 const struct dxil_value *bias, *lod_or_sample, *min_lod;
3269 const struct dxil_value *coord[4], *offset[3], *dx[3], *dy[3];
3270 const struct dxil_value *cmp;
3271 enum overload_type overload;
3272 };
3273
3274 static const struct dxil_value *
emit_texture_size(struct ntd_context * ctx,struct texop_parameters * params)3275 emit_texture_size(struct ntd_context *ctx, struct texop_parameters *params)
3276 {
3277 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.getDimensions", DXIL_NONE);
3278 if (!func)
3279 return false;
3280
3281 const struct dxil_value *args[] = {
3282 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_SIZE),
3283 params->tex,
3284 params->lod_or_sample
3285 };
3286
3287 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3288 }
3289
3290 static bool
emit_image_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)3291 emit_image_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3292 {
3293 const struct dxil_value *handle;
3294 if (ctx->opts->vulkan_environment) {
3295 assert(intr->intrinsic == nir_intrinsic_image_deref_size);
3296 handle = get_src_ssa(ctx, intr->src[0].ssa, 0);
3297 }
3298 else {
3299 assert(intr->intrinsic == nir_intrinsic_image_size);
3300 int binding = nir_src_as_int(intr->src[0]);
3301 handle = ctx->uav_handles[binding];
3302 }
3303 if (!handle)
3304 return false;
3305
3306 const struct dxil_value *lod = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3307 if (!lod)
3308 return false;
3309
3310 struct texop_parameters params = {
3311 .tex = handle,
3312 .lod_or_sample = lod
3313 };
3314 const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms);
3315 if (!dimensions)
3316 return false;
3317
3318 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
3319 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, i);
3320 store_dest(ctx, &intr->dest, i, retval, nir_type_uint);
3321 }
3322
3323 return true;
3324 }
3325
3326 static bool
emit_get_ssbo_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)3327 emit_get_ssbo_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3328 {
3329 const struct dxil_value* handle = NULL;
3330 if (ctx->opts->vulkan_environment) {
3331 handle = get_src_ssa(ctx, intr->src[0].ssa, 0);
3332 } else {
3333 int binding = nir_src_as_int(intr->src[0]);
3334 handle = ctx->uav_handles[binding];
3335 }
3336
3337 if (!handle)
3338 return false;
3339
3340 struct texop_parameters params = {
3341 .tex = handle,
3342 .lod_or_sample = dxil_module_get_undef(
3343 &ctx->mod, dxil_module_get_int_type(&ctx->mod, 32))
3344 };
3345
3346 const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms);
3347 if (!dimensions)
3348 return false;
3349
3350 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, 0);
3351 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3352
3353 return true;
3354 }
3355
3356 static bool
emit_ssbo_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_atomic_op op,nir_alu_type type)3357 emit_ssbo_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr,
3358 enum dxil_atomic_op op, nir_alu_type type)
3359 {
3360 const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, 0);
3361 const struct dxil_value *offset =
3362 get_src(ctx, &intr->src[1], 0, nir_type_uint);
3363 const struct dxil_value *value =
3364 get_src(ctx, &intr->src[2], 0, type);
3365
3366 if (!value || !handle || !offset)
3367 return false;
3368
3369 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3370 if (!int32_undef)
3371 return false;
3372
3373 const struct dxil_value *coord[3] = {
3374 offset, int32_undef, int32_undef
3375 };
3376
3377 const struct dxil_value *retval =
3378 emit_atomic_binop(ctx, handle, op, coord, value);
3379
3380 if (!retval)
3381 return false;
3382
3383 store_dest(ctx, &intr->dest, 0, retval, type);
3384 return true;
3385 }
3386
3387 static bool
emit_ssbo_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)3388 emit_ssbo_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3389 {
3390 const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, 0);
3391 const struct dxil_value *offset =
3392 get_src(ctx, &intr->src[1], 0, nir_type_uint);
3393 const struct dxil_value *cmpval =
3394 get_src(ctx, &intr->src[2], 0, nir_type_int);
3395 const struct dxil_value *newval =
3396 get_src(ctx, &intr->src[3], 0, nir_type_int);
3397
3398 if (!cmpval || !newval || !handle || !offset)
3399 return false;
3400
3401 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3402 if (!int32_undef)
3403 return false;
3404
3405 const struct dxil_value *coord[3] = {
3406 offset, int32_undef, int32_undef
3407 };
3408
3409 const struct dxil_value *retval =
3410 emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
3411
3412 if (!retval)
3413 return false;
3414
3415 store_dest(ctx, &intr->dest, 0, retval, nir_type_int);
3416 return true;
3417 }
3418
3419 static bool
emit_shared_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_rmw_op op,nir_alu_type type)3420 emit_shared_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr,
3421 enum dxil_rmw_op op, nir_alu_type type)
3422 {
3423 const struct dxil_value *zero, *index;
3424
3425 assert(nir_src_bit_size(intr->src[1]) == 32);
3426
3427 zero = dxil_module_get_int32_const(&ctx->mod, 0);
3428 if (!zero)
3429 return false;
3430
3431 index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3432 if (!index)
3433 return false;
3434
3435 const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
3436 const struct dxil_value *ptr, *value, *retval;
3437
3438 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3439 if (!ptr)
3440 return false;
3441
3442 value = get_src(ctx, &intr->src[1], 0, type);
3443 if (!value)
3444 return false;
3445
3446 retval = dxil_emit_atomicrmw(&ctx->mod, value, ptr, op, false,
3447 DXIL_ATOMIC_ORDERING_ACQREL,
3448 DXIL_SYNC_SCOPE_CROSSTHREAD);
3449 if (!retval)
3450 return false;
3451
3452 store_dest(ctx, &intr->dest, 0, retval, type);
3453 return true;
3454 }
3455
3456 static bool
emit_shared_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)3457 emit_shared_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3458 {
3459 const struct dxil_value *zero, *index;
3460
3461 assert(nir_src_bit_size(intr->src[1]) == 32);
3462
3463 zero = dxil_module_get_int32_const(&ctx->mod, 0);
3464 if (!zero)
3465 return false;
3466
3467 index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3468 if (!index)
3469 return false;
3470
3471 const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
3472 const struct dxil_value *ptr, *cmpval, *newval, *retval;
3473
3474 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3475 if (!ptr)
3476 return false;
3477
3478 cmpval = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3479 newval = get_src(ctx, &intr->src[2], 0, nir_type_uint);
3480 if (!cmpval || !newval)
3481 return false;
3482
3483 retval = dxil_emit_cmpxchg(&ctx->mod, cmpval, newval, ptr, false,
3484 DXIL_ATOMIC_ORDERING_ACQREL,
3485 DXIL_SYNC_SCOPE_CROSSTHREAD);
3486 if (!retval)
3487 return false;
3488
3489 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3490 return true;
3491 }
3492
3493 static bool
emit_vulkan_resource_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)3494 emit_vulkan_resource_index(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3495 {
3496 unsigned int binding = nir_intrinsic_binding(intr);
3497
3498 bool const_index = nir_src_is_const(intr->src[0]);
3499 if (const_index) {
3500 binding += nir_src_as_const_value(intr->src[0])->u32;
3501 }
3502
3503 const struct dxil_value *index_value = dxil_module_get_int32_const(&ctx->mod, binding);
3504 if (!index_value)
3505 return false;
3506
3507 if (!const_index) {
3508 const struct dxil_value *offset = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
3509 if (!offset)
3510 return false;
3511
3512 index_value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, index_value, offset, 0);
3513 if (!index_value)
3514 return false;
3515 }
3516
3517 store_dest(ctx, &intr->dest, 0, index_value, nir_type_uint32);
3518 store_dest(ctx, &intr->dest, 1, dxil_module_get_int32_const(&ctx->mod, 0), nir_type_uint32);
3519 return true;
3520 }
3521
3522 static bool
emit_load_vulkan_descriptor(struct ntd_context * ctx,nir_intrinsic_instr * intr)3523 emit_load_vulkan_descriptor(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3524 {
3525 nir_intrinsic_instr* index = nir_src_as_intrinsic(intr->src[0]);
3526 /* We currently do not support reindex */
3527 assert(index && index->intrinsic == nir_intrinsic_vulkan_resource_index);
3528
3529 unsigned binding = nir_intrinsic_binding(index);
3530 unsigned space = nir_intrinsic_desc_set(index);
3531
3532 /* The descriptor_set field for variables is only 5 bits. We shouldn't have intrinsics trying to go beyond that. */
3533 assert(space < 32);
3534
3535 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
3536
3537 const struct dxil_value *handle = NULL;
3538 enum dxil_resource_class resource_class;
3539
3540 switch (nir_intrinsic_desc_type(intr)) {
3541 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
3542 resource_class = DXIL_RESOURCE_CLASS_CBV;
3543 break;
3544 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
3545 if (var->data.access & ACCESS_NON_WRITEABLE)
3546 resource_class = DXIL_RESOURCE_CLASS_SRV;
3547 else
3548 resource_class = DXIL_RESOURCE_CLASS_UAV;
3549 break;
3550 default:
3551 unreachable("unknown descriptor type");
3552 return false;
3553 }
3554
3555 const struct dxil_value *index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
3556 if (!index_value)
3557 return false;
3558
3559 handle = emit_createhandle_call(ctx, resource_class,
3560 get_resource_id(ctx, resource_class, space, binding),
3561 index_value, false);
3562
3563 store_dest_value(ctx, &intr->dest, 0, handle);
3564 store_dest(ctx, &intr->dest, 1, get_src(ctx, &intr->src[0], 1, nir_type_uint32), nir_type_uint32);
3565
3566 return true;
3567 }
3568
3569 static bool
emit_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3570 emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3571 {
3572 switch (intr->intrinsic) {
3573 case nir_intrinsic_load_global_invocation_id:
3574 case nir_intrinsic_load_global_invocation_id_zero_base:
3575 return emit_load_global_invocation_id(ctx, intr);
3576 case nir_intrinsic_load_local_invocation_id:
3577 return emit_load_local_invocation_id(ctx, intr);
3578 case nir_intrinsic_load_local_invocation_index:
3579 return emit_load_local_invocation_index(ctx, intr);
3580 case nir_intrinsic_load_workgroup_id:
3581 case nir_intrinsic_load_workgroup_id_zero_base:
3582 return emit_load_local_workgroup_id(ctx, intr);
3583 case nir_intrinsic_load_ssbo:
3584 return emit_load_ssbo(ctx, intr);
3585 case nir_intrinsic_store_ssbo:
3586 return emit_store_ssbo(ctx, intr);
3587 case nir_intrinsic_store_ssbo_masked_dxil:
3588 return emit_store_ssbo_masked(ctx, intr);
3589 case nir_intrinsic_store_deref:
3590 return emit_store_deref(ctx, intr);
3591 case nir_intrinsic_store_shared_dxil:
3592 case nir_intrinsic_store_shared_masked_dxil:
3593 return emit_store_shared(ctx, intr);
3594 case nir_intrinsic_store_scratch_dxil:
3595 return emit_store_scratch(ctx, intr);
3596 case nir_intrinsic_load_deref:
3597 return emit_load_deref(ctx, intr);
3598 case nir_intrinsic_load_ptr_dxil:
3599 return emit_load_ptr(ctx, intr);
3600 case nir_intrinsic_load_ubo:
3601 return emit_load_ubo(ctx, intr);
3602 case nir_intrinsic_load_ubo_dxil:
3603 return emit_load_ubo_dxil(ctx, intr);
3604 case nir_intrinsic_load_front_face:
3605 return emit_load_input_interpolated(ctx, intr,
3606 ctx->system_value[SYSTEM_VALUE_FRONT_FACE]);
3607 case nir_intrinsic_load_vertex_id_zero_base:
3608 return emit_load_input_interpolated(ctx, intr,
3609 ctx->system_value[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]);
3610 case nir_intrinsic_load_instance_id:
3611 return emit_load_input_interpolated(ctx, intr,
3612 ctx->system_value[SYSTEM_VALUE_INSTANCE_ID]);
3613 case nir_intrinsic_load_primitive_id:
3614 return emit_load_unary_external_function(ctx, intr, "dx.op.primitiveID",
3615 DXIL_INTR_PRIMITIVE_ID);
3616 case nir_intrinsic_load_sample_id:
3617 return emit_load_unary_external_function(ctx, intr, "dx.op.sampleIndex",
3618 DXIL_INTR_SAMPLE_INDEX);
3619 case nir_intrinsic_load_shared_dxil:
3620 return emit_load_shared(ctx, intr);
3621 case nir_intrinsic_load_scratch_dxil:
3622 return emit_load_scratch(ctx, intr);
3623 case nir_intrinsic_discard_if:
3624 return emit_discard_if(ctx, intr);
3625 case nir_intrinsic_discard:
3626 return emit_discard(ctx);
3627 case nir_intrinsic_emit_vertex:
3628 return emit_emit_vertex(ctx, intr);
3629 case nir_intrinsic_end_primitive:
3630 return emit_end_primitive(ctx, intr);
3631 case nir_intrinsic_scoped_barrier:
3632 return emit_barrier(ctx, intr);
3633 case nir_intrinsic_ssbo_atomic_add:
3634 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_ADD, nir_type_int);
3635 case nir_intrinsic_ssbo_atomic_imin:
3636 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMIN, nir_type_int);
3637 case nir_intrinsic_ssbo_atomic_umin:
3638 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMIN, nir_type_uint);
3639 case nir_intrinsic_ssbo_atomic_imax:
3640 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_int);
3641 case nir_intrinsic_ssbo_atomic_umax:
3642 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMAX, nir_type_uint);
3643 case nir_intrinsic_ssbo_atomic_and:
3644 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_AND, nir_type_uint);
3645 case nir_intrinsic_ssbo_atomic_or:
3646 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_OR, nir_type_uint);
3647 case nir_intrinsic_ssbo_atomic_xor:
3648 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_XOR, nir_type_uint);
3649 case nir_intrinsic_ssbo_atomic_exchange:
3650 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_EXCHANGE, nir_type_int);
3651 case nir_intrinsic_ssbo_atomic_comp_swap:
3652 return emit_ssbo_atomic_comp_swap(ctx, intr);
3653 case nir_intrinsic_shared_atomic_add_dxil:
3654 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_ADD, nir_type_int);
3655 case nir_intrinsic_shared_atomic_imin_dxil:
3656 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MIN, nir_type_int);
3657 case nir_intrinsic_shared_atomic_umin_dxil:
3658 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMIN, nir_type_uint);
3659 case nir_intrinsic_shared_atomic_imax_dxil:
3660 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MAX, nir_type_int);
3661 case nir_intrinsic_shared_atomic_umax_dxil:
3662 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMAX, nir_type_uint);
3663 case nir_intrinsic_shared_atomic_and_dxil:
3664 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_AND, nir_type_uint);
3665 case nir_intrinsic_shared_atomic_or_dxil:
3666 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_OR, nir_type_uint);
3667 case nir_intrinsic_shared_atomic_xor_dxil:
3668 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XOR, nir_type_uint);
3669 case nir_intrinsic_shared_atomic_exchange_dxil:
3670 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XCHG, nir_type_int);
3671 case nir_intrinsic_shared_atomic_comp_swap_dxil:
3672 return emit_shared_atomic_comp_swap(ctx, intr);
3673 case nir_intrinsic_image_store:
3674 case nir_intrinsic_image_deref_store:
3675 return emit_image_store(ctx, intr);
3676 case nir_intrinsic_image_load:
3677 case nir_intrinsic_image_deref_load:
3678 return emit_image_load(ctx, intr);
3679 case nir_intrinsic_image_size:
3680 case nir_intrinsic_image_deref_size:
3681 return emit_image_size(ctx, intr);
3682 case nir_intrinsic_get_ssbo_size:
3683 return emit_get_ssbo_size(ctx, intr);
3684
3685 case nir_intrinsic_vulkan_resource_index:
3686 return emit_vulkan_resource_index(ctx, intr);
3687 case nir_intrinsic_load_vulkan_descriptor:
3688 return emit_load_vulkan_descriptor(ctx, intr);
3689
3690 case nir_intrinsic_load_num_workgroups:
3691 case nir_intrinsic_load_workgroup_size:
3692 default:
3693 NIR_INSTR_UNSUPPORTED(&intr->instr);
3694 assert("Unimplemented intrinsic instruction");
3695 return false;
3696 }
3697 }
3698
3699 static bool
emit_load_const(struct ntd_context * ctx,nir_load_const_instr * load_const)3700 emit_load_const(struct ntd_context *ctx, nir_load_const_instr *load_const)
3701 {
3702 for (int i = 0; i < load_const->def.num_components; ++i) {
3703 const struct dxil_value *value;
3704 switch (load_const->def.bit_size) {
3705 case 1:
3706 value = dxil_module_get_int1_const(&ctx->mod,
3707 load_const->value[i].b);
3708 break;
3709 case 16:
3710 ctx->mod.feats.native_low_precision = true;
3711 value = dxil_module_get_int16_const(&ctx->mod,
3712 load_const->value[i].u16);
3713 break;
3714 case 32:
3715 value = dxil_module_get_int32_const(&ctx->mod,
3716 load_const->value[i].u32);
3717 break;
3718 case 64:
3719 ctx->mod.feats.int64_ops = true;
3720 value = dxil_module_get_int64_const(&ctx->mod,
3721 load_const->value[i].u64);
3722 break;
3723 default:
3724 unreachable("unexpected bit_size");
3725 }
3726 if (!value)
3727 return false;
3728
3729 store_ssa_def(ctx, &load_const->def, i, value);
3730 }
3731 return true;
3732 }
3733
3734 static bool
emit_deref(struct ntd_context * ctx,nir_deref_instr * instr)3735 emit_deref(struct ntd_context* ctx, nir_deref_instr* instr)
3736 {
3737 assert(instr->deref_type == nir_deref_type_var ||
3738 instr->deref_type == nir_deref_type_array);
3739
3740 /* In the non-Vulkan environment, there's nothing to emit. Any references to
3741 * derefs will emit the necessary logic to handle scratch/shared GEP addressing
3742 */
3743 if (!ctx->opts->vulkan_environment)
3744 return true;
3745
3746 /* In the Vulkan environment, we don't have cached handles for textures or
3747 * samplers, so let's use the opportunity of walking through the derefs to
3748 * emit those.
3749 */
3750 nir_variable *var = nir_deref_instr_get_variable(instr);
3751 assert(var);
3752
3753 if (!glsl_type_is_sampler(glsl_without_array(var->type)) &&
3754 !glsl_type_is_image(glsl_without_array(var->type)))
3755 return true;
3756
3757 const struct glsl_type *type = instr->type;
3758 const struct dxil_value *binding;
3759
3760 if (instr->deref_type == nir_deref_type_var) {
3761 binding = dxil_module_get_int32_const(&ctx->mod, var->data.binding);
3762 } else {
3763 const struct dxil_value *base = get_src(ctx, &instr->parent, 0, nir_type_uint32);
3764 const struct dxil_value *offset = get_src(ctx, &instr->arr.index, 0, nir_type_uint32);
3765 if (!base || !offset)
3766 return false;
3767
3768 binding = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, base, offset, 0);
3769 }
3770
3771 if (!binding)
3772 return false;
3773
3774 /* Haven't finished chasing the deref chain yet, just store the value */
3775 if (glsl_type_is_array(type)) {
3776 store_dest(ctx, &instr->dest, 0, binding, nir_type_uint32);
3777 return true;
3778 }
3779
3780 assert(glsl_type_is_sampler(type) || glsl_type_is_image(type));
3781 enum dxil_resource_class res_class;
3782 if (glsl_type_is_image(type))
3783 res_class = DXIL_RESOURCE_CLASS_UAV;
3784 else if (glsl_get_sampler_result_type(type) == GLSL_TYPE_VOID)
3785 res_class = DXIL_RESOURCE_CLASS_SAMPLER;
3786 else
3787 res_class = DXIL_RESOURCE_CLASS_SRV;
3788
3789 const struct dxil_value *handle = emit_createhandle_call(ctx, res_class,
3790 get_resource_id(ctx, res_class, var->data.descriptor_set, var->data.binding), binding, false);
3791 if (!handle)
3792 return false;
3793
3794 store_dest_value(ctx, &instr->dest, 0, handle);
3795 return true;
3796 }
3797
3798 static bool
emit_cond_branch(struct ntd_context * ctx,const struct dxil_value * cond,int true_block,int false_block)3799 emit_cond_branch(struct ntd_context *ctx, const struct dxil_value *cond,
3800 int true_block, int false_block)
3801 {
3802 assert(cond);
3803 assert(true_block >= 0);
3804 assert(false_block >= 0);
3805 return dxil_emit_branch(&ctx->mod, cond, true_block, false_block);
3806 }
3807
3808 static bool
emit_branch(struct ntd_context * ctx,int block)3809 emit_branch(struct ntd_context *ctx, int block)
3810 {
3811 assert(block >= 0);
3812 return dxil_emit_branch(&ctx->mod, NULL, block, -1);
3813 }
3814
3815 static bool
emit_jump(struct ntd_context * ctx,nir_jump_instr * instr)3816 emit_jump(struct ntd_context *ctx, nir_jump_instr *instr)
3817 {
3818 switch (instr->type) {
3819 case nir_jump_break:
3820 case nir_jump_continue:
3821 assert(instr->instr.block->successors[0]);
3822 assert(!instr->instr.block->successors[1]);
3823 return emit_branch(ctx, instr->instr.block->successors[0]->index);
3824
3825 default:
3826 unreachable("Unsupported jump type\n");
3827 }
3828 }
3829
3830 struct phi_block {
3831 unsigned num_components;
3832 struct dxil_instr *comp[NIR_MAX_VEC_COMPONENTS];
3833 };
3834
3835 static bool
emit_phi(struct ntd_context * ctx,nir_phi_instr * instr)3836 emit_phi(struct ntd_context *ctx, nir_phi_instr *instr)
3837 {
3838 unsigned bit_size = nir_dest_bit_size(instr->dest);
3839 const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod,
3840 bit_size);
3841
3842 struct phi_block *vphi = ralloc(ctx->phis, struct phi_block);
3843 vphi->num_components = nir_dest_num_components(instr->dest);
3844
3845 for (unsigned i = 0; i < vphi->num_components; ++i) {
3846 struct dxil_instr *phi = vphi->comp[i] = dxil_emit_phi(&ctx->mod, type);
3847 if (!phi)
3848 return false;
3849 store_dest_value(ctx, &instr->dest, i, dxil_instr_get_return_value(phi));
3850 }
3851 _mesa_hash_table_insert(ctx->phis, instr, vphi);
3852 return true;
3853 }
3854
3855 static void
fixup_phi(struct ntd_context * ctx,nir_phi_instr * instr,struct phi_block * vphi)3856 fixup_phi(struct ntd_context *ctx, nir_phi_instr *instr,
3857 struct phi_block *vphi)
3858 {
3859 const struct dxil_value *values[128];
3860 unsigned blocks[128];
3861 for (unsigned i = 0; i < vphi->num_components; ++i) {
3862 size_t num_incoming = 0;
3863 nir_foreach_phi_src(src, instr) {
3864 assert(src->src.is_ssa);
3865 const struct dxil_value *val = get_src_ssa(ctx, src->src.ssa, i);
3866 assert(num_incoming < ARRAY_SIZE(values));
3867 values[num_incoming] = val;
3868 assert(num_incoming < ARRAY_SIZE(blocks));
3869 blocks[num_incoming] = src->pred->index;
3870 ++num_incoming;
3871 }
3872 dxil_phi_set_incoming(vphi->comp[i], values, blocks, num_incoming);
3873 }
3874 }
3875
3876 static unsigned
get_n_src(struct ntd_context * ctx,const struct dxil_value ** values,unsigned max_components,nir_tex_src * src,nir_alu_type type)3877 get_n_src(struct ntd_context *ctx, const struct dxil_value **values,
3878 unsigned max_components, nir_tex_src *src, nir_alu_type type)
3879 {
3880 unsigned num_components = nir_src_num_components(src->src);
3881 unsigned i = 0;
3882
3883 assert(num_components <= max_components);
3884
3885 for (i = 0; i < num_components; ++i) {
3886 values[i] = get_src(ctx, &src->src, i, type);
3887 if (!values[i])
3888 return 0;
3889 }
3890
3891 return num_components;
3892 }
3893
3894 #define PAD_SRC(ctx, array, components, undef) \
3895 for (unsigned i = components; i < ARRAY_SIZE(array); ++i) { \
3896 array[i] = undef; \
3897 }
3898
3899 static const struct dxil_value *
emit_sample(struct ntd_context * ctx,struct texop_parameters * params)3900 emit_sample(struct ntd_context *ctx, struct texop_parameters *params)
3901 {
3902 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sample", params->overload);
3903 if (!func)
3904 return NULL;
3905
3906 const struct dxil_value *args[11] = {
3907 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE),
3908 params->tex, params->sampler,
3909 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
3910 params->offset[0], params->offset[1], params->offset[2],
3911 params->min_lod
3912 };
3913
3914 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3915 }
3916
3917 static const struct dxil_value *
emit_sample_bias(struct ntd_context * ctx,struct texop_parameters * params)3918 emit_sample_bias(struct ntd_context *ctx, struct texop_parameters *params)
3919 {
3920 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleBias", params->overload);
3921 if (!func)
3922 return NULL;
3923
3924 assert(params->bias != NULL);
3925
3926 const struct dxil_value *args[12] = {
3927 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_BIAS),
3928 params->tex, params->sampler,
3929 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
3930 params->offset[0], params->offset[1], params->offset[2],
3931 params->bias, params->min_lod
3932 };
3933
3934 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3935 }
3936
3937 static const struct dxil_value *
emit_sample_level(struct ntd_context * ctx,struct texop_parameters * params)3938 emit_sample_level(struct ntd_context *ctx, struct texop_parameters *params)
3939 {
3940 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleLevel", params->overload);
3941 if (!func)
3942 return NULL;
3943
3944 assert(params->lod_or_sample != NULL);
3945
3946 const struct dxil_value *args[11] = {
3947 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_LEVEL),
3948 params->tex, params->sampler,
3949 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
3950 params->offset[0], params->offset[1], params->offset[2],
3951 params->lod_or_sample
3952 };
3953
3954 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3955 }
3956
3957 static const struct dxil_value *
emit_sample_cmp(struct ntd_context * ctx,struct texop_parameters * params)3958 emit_sample_cmp(struct ntd_context *ctx, struct texop_parameters *params)
3959 {
3960 const struct dxil_func *func;
3961 enum dxil_intr opcode;
3962 int numparam;
3963
3964 if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) {
3965 func = dxil_get_function(&ctx->mod, "dx.op.sampleCmp", DXIL_F32);
3966 opcode = DXIL_INTR_SAMPLE_CMP;
3967 numparam = 12;
3968 } else {
3969 func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevelZero", DXIL_F32);
3970 opcode = DXIL_INTR_SAMPLE_CMP_LVL_ZERO;
3971 numparam = 11;
3972 }
3973
3974 if (!func)
3975 return NULL;
3976
3977 const struct dxil_value *args[12] = {
3978 dxil_module_get_int32_const(&ctx->mod, opcode),
3979 params->tex, params->sampler,
3980 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
3981 params->offset[0], params->offset[1], params->offset[2],
3982 params->cmp, params->min_lod
3983 };
3984
3985 return dxil_emit_call(&ctx->mod, func, args, numparam);
3986 }
3987
3988 static const struct dxil_value *
emit_sample_grad(struct ntd_context * ctx,struct texop_parameters * params)3989 emit_sample_grad(struct ntd_context *ctx, struct texop_parameters *params)
3990 {
3991 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleGrad", params->overload);
3992 if (!func)
3993 return false;
3994
3995 const struct dxil_value *args[17] = {
3996 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_GRAD),
3997 params->tex, params->sampler,
3998 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
3999 params->offset[0], params->offset[1], params->offset[2],
4000 params->dx[0], params->dx[1], params->dx[2],
4001 params->dy[0], params->dy[1], params->dy[2],
4002 params->min_lod
4003 };
4004
4005 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4006 }
4007
4008 static const struct dxil_value *
emit_texel_fetch(struct ntd_context * ctx,struct texop_parameters * params)4009 emit_texel_fetch(struct ntd_context *ctx, struct texop_parameters *params)
4010 {
4011 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", params->overload);
4012 if (!func)
4013 return false;
4014
4015 if (!params->lod_or_sample)
4016 params->lod_or_sample = dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32));
4017
4018 const struct dxil_value *args[] = {
4019 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD),
4020 params->tex,
4021 params->lod_or_sample, params->coord[0], params->coord[1], params->coord[2],
4022 params->offset[0], params->offset[1], params->offset[2]
4023 };
4024
4025 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4026 }
4027
4028 static const struct dxil_value *
emit_texture_lod(struct ntd_context * ctx,struct texop_parameters * params)4029 emit_texture_lod(struct ntd_context *ctx, struct texop_parameters *params)
4030 {
4031 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.calculateLOD", DXIL_F32);
4032 if (!func)
4033 return false;
4034
4035 const struct dxil_value *args[] = {
4036 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOD),
4037 params->tex,
4038 params->sampler,
4039 params->coord[0],
4040 params->coord[1],
4041 params->coord[2],
4042 dxil_module_get_int1_const(&ctx->mod, 1)
4043 };
4044
4045 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4046 }
4047
4048 static bool
emit_tex(struct ntd_context * ctx,nir_tex_instr * instr)4049 emit_tex(struct ntd_context *ctx, nir_tex_instr *instr)
4050 {
4051 struct texop_parameters params;
4052 memset(¶ms, 0, sizeof(struct texop_parameters));
4053 if (!ctx->opts->vulkan_environment) {
4054 params.tex = ctx->srv_handles[instr->texture_index];
4055 params.sampler = ctx->sampler_handles[instr->sampler_index];
4056 }
4057
4058 const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
4059 const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32);
4060 const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
4061 const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type);
4062
4063 unsigned coord_components = 0, offset_components = 0, dx_components = 0, dy_components = 0;
4064 params.overload = get_overload(instr->dest_type, 32);
4065
4066 for (unsigned i = 0; i < instr->num_srcs; i++) {
4067 nir_alu_type type = nir_tex_instr_src_type(instr, i);
4068
4069 switch (instr->src[i].src_type) {
4070 case nir_tex_src_coord:
4071 coord_components = get_n_src(ctx, params.coord, ARRAY_SIZE(params.coord),
4072 &instr->src[i], type);
4073 if (!coord_components)
4074 return false;
4075 break;
4076
4077 case nir_tex_src_offset:
4078 offset_components = get_n_src(ctx, params.offset, ARRAY_SIZE(params.offset),
4079 &instr->src[i], nir_type_int);
4080 if (!offset_components)
4081 return false;
4082 break;
4083
4084 case nir_tex_src_bias:
4085 assert(instr->op == nir_texop_txb);
4086 assert(nir_src_num_components(instr->src[i].src) == 1);
4087 params.bias = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
4088 if (!params.bias)
4089 return false;
4090 break;
4091
4092 case nir_tex_src_lod:
4093 assert(nir_src_num_components(instr->src[i].src) == 1);
4094 /* Buffers don't have a LOD */
4095 if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF)
4096 params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, type);
4097 else
4098 params.lod_or_sample = int_undef;
4099 if (!params.lod_or_sample)
4100 return false;
4101 break;
4102
4103 case nir_tex_src_min_lod:
4104 assert(nir_src_num_components(instr->src[i].src) == 1);
4105 params.min_lod = get_src(ctx, &instr->src[i].src, 0, type);
4106 if (!params.min_lod)
4107 return false;
4108 break;
4109
4110 case nir_tex_src_comparator:
4111 assert(nir_src_num_components(instr->src[i].src) == 1);
4112 params.cmp = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
4113 if (!params.cmp)
4114 return false;
4115 break;
4116
4117 case nir_tex_src_ddx:
4118 dx_components = get_n_src(ctx, params.dx, ARRAY_SIZE(params.dx),
4119 &instr->src[i], nir_type_float);
4120 if (!dx_components)
4121 return false;
4122 break;
4123
4124 case nir_tex_src_ddy:
4125 dy_components = get_n_src(ctx, params.dy, ARRAY_SIZE(params.dy),
4126 &instr->src[i], nir_type_float);
4127 if (!dy_components)
4128 return false;
4129 break;
4130
4131 case nir_tex_src_ms_index:
4132 params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, nir_type_int);
4133 if (!params.lod_or_sample)
4134 return false;
4135 break;
4136
4137 case nir_tex_src_texture_deref:
4138 assert(ctx->opts->vulkan_environment);
4139 params.tex = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
4140 break;
4141
4142 case nir_tex_src_sampler_deref:
4143 assert(ctx->opts->vulkan_environment);
4144 params.sampler = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
4145 break;
4146
4147 case nir_tex_src_projector:
4148 unreachable("Texture projector should have been lowered");
4149
4150 default:
4151 fprintf(stderr, "texture source: %d\n", instr->src[i].src_type);
4152 unreachable("unknown texture source");
4153 }
4154 }
4155
4156 assert(params.tex != NULL);
4157 assert(instr->op == nir_texop_txf ||
4158 instr->op == nir_texop_txf_ms ||
4159 nir_tex_instr_is_query(instr) ||
4160 params.sampler != NULL);
4161
4162 PAD_SRC(ctx, params.coord, coord_components, float_undef);
4163 PAD_SRC(ctx, params.offset, offset_components, int_undef);
4164 if (!params.min_lod) params.min_lod = float_undef;
4165
4166 const struct dxil_value *sample = NULL;
4167 switch (instr->op) {
4168 case nir_texop_txb:
4169 sample = emit_sample_bias(ctx, ¶ms);
4170 break;
4171
4172 case nir_texop_tex:
4173 if (params.cmp != NULL) {
4174 sample = emit_sample_cmp(ctx, ¶ms);
4175 break;
4176 } else if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) {
4177 sample = emit_sample(ctx, ¶ms);
4178 break;
4179 }
4180 params.lod_or_sample = dxil_module_get_float_const(&ctx->mod, 0);
4181 FALLTHROUGH;
4182 case nir_texop_txl:
4183 sample = emit_sample_level(ctx, ¶ms);
4184 break;
4185
4186 case nir_texop_txd:
4187 PAD_SRC(ctx, params.dx, dx_components, float_undef);
4188 PAD_SRC(ctx, params.dy, dy_components,float_undef);
4189 sample = emit_sample_grad(ctx, ¶ms);
4190 break;
4191
4192 case nir_texop_txf:
4193 case nir_texop_txf_ms:
4194 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
4195 params.coord[1] = int_undef;
4196 sample = emit_bufferload_call(ctx, params.tex, params.coord, params.overload);
4197 } else {
4198 PAD_SRC(ctx, params.coord, coord_components, int_undef);
4199 sample = emit_texel_fetch(ctx, ¶ms);
4200 }
4201 break;
4202
4203 case nir_texop_txs:
4204 sample = emit_texture_size(ctx, ¶ms);
4205 break;
4206
4207 case nir_texop_lod:
4208 sample = emit_texture_lod(ctx, ¶ms);
4209 store_dest(ctx, &instr->dest, 0, sample, nir_alu_type_get_base_type(instr->dest_type));
4210 return true;
4211
4212 case nir_texop_query_levels:
4213 params.lod_or_sample = dxil_module_get_int_const(&ctx->mod, 0, 32);
4214 sample = emit_texture_size(ctx, ¶ms);
4215 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
4216 store_dest(ctx, &instr->dest, 0, retval, nir_alu_type_get_base_type(instr->dest_type));
4217 return true;
4218
4219 default:
4220 fprintf(stderr, "texture op: %d\n", instr->op);
4221 unreachable("unknown texture op");
4222 }
4223
4224 if (!sample)
4225 return false;
4226
4227 for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
4228 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, i);
4229 store_dest(ctx, &instr->dest, i, retval, nir_alu_type_get_base_type(instr->dest_type));
4230 }
4231
4232 return true;
4233 }
4234
4235 static bool
emit_undefined(struct ntd_context * ctx,nir_ssa_undef_instr * undef)4236 emit_undefined(struct ntd_context *ctx, nir_ssa_undef_instr *undef)
4237 {
4238 for (unsigned i = 0; i < undef->def.num_components; ++i)
4239 store_ssa_def(ctx, &undef->def, i, dxil_module_get_int32_const(&ctx->mod, 0));
4240 return true;
4241 }
4242
emit_instr(struct ntd_context * ctx,struct nir_instr * instr)4243 static bool emit_instr(struct ntd_context *ctx, struct nir_instr* instr)
4244 {
4245 switch (instr->type) {
4246 case nir_instr_type_alu:
4247 return emit_alu(ctx, nir_instr_as_alu(instr));
4248 case nir_instr_type_intrinsic:
4249 return emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
4250 case nir_instr_type_load_const:
4251 return emit_load_const(ctx, nir_instr_as_load_const(instr));
4252 case nir_instr_type_deref:
4253 return emit_deref(ctx, nir_instr_as_deref(instr));
4254 case nir_instr_type_jump:
4255 return emit_jump(ctx, nir_instr_as_jump(instr));
4256 case nir_instr_type_phi:
4257 return emit_phi(ctx, nir_instr_as_phi(instr));
4258 case nir_instr_type_tex:
4259 return emit_tex(ctx, nir_instr_as_tex(instr));
4260 case nir_instr_type_ssa_undef:
4261 return emit_undefined(ctx, nir_instr_as_ssa_undef(instr));
4262 default:
4263 NIR_INSTR_UNSUPPORTED(instr);
4264 unreachable("Unimplemented instruction type");
4265 return false;
4266 }
4267 }
4268
4269
4270 static bool
emit_block(struct ntd_context * ctx,struct nir_block * block)4271 emit_block(struct ntd_context *ctx, struct nir_block *block)
4272 {
4273 assert(block->index < ctx->mod.num_basic_block_ids);
4274 ctx->mod.basic_block_ids[block->index] = ctx->mod.curr_block;
4275
4276 nir_foreach_instr(instr, block) {
4277 TRACE_CONVERSION(instr);
4278
4279 if (!emit_instr(ctx, instr)) {
4280 return false;
4281 }
4282 }
4283 return true;
4284 }
4285
4286 static bool
4287 emit_cf_list(struct ntd_context *ctx, struct exec_list *list);
4288
4289 static bool
emit_if(struct ntd_context * ctx,struct nir_if * if_stmt)4290 emit_if(struct ntd_context *ctx, struct nir_if *if_stmt)
4291 {
4292 assert(nir_src_num_components(if_stmt->condition) == 1);
4293 const struct dxil_value *cond = get_src(ctx, &if_stmt->condition, 0,
4294 nir_type_bool);
4295 if (!cond)
4296 return false;
4297
4298 /* prepare blocks */
4299 nir_block *then_block = nir_if_first_then_block(if_stmt);
4300 assert(nir_if_last_then_block(if_stmt)->successors[0]);
4301 assert(!nir_if_last_then_block(if_stmt)->successors[1]);
4302 int then_succ = nir_if_last_then_block(if_stmt)->successors[0]->index;
4303
4304 nir_block *else_block = NULL;
4305 int else_succ = -1;
4306 if (!exec_list_is_empty(&if_stmt->else_list)) {
4307 else_block = nir_if_first_else_block(if_stmt);
4308 assert(nir_if_last_else_block(if_stmt)->successors[0]);
4309 assert(!nir_if_last_else_block(if_stmt)->successors[1]);
4310 else_succ = nir_if_last_else_block(if_stmt)->successors[0]->index;
4311 }
4312
4313 if (!emit_cond_branch(ctx, cond, then_block->index,
4314 else_block ? else_block->index : then_succ))
4315 return false;
4316
4317 /* handle then-block */
4318 if (!emit_cf_list(ctx, &if_stmt->then_list) ||
4319 (!nir_block_ends_in_jump(nir_if_last_then_block(if_stmt)) &&
4320 !emit_branch(ctx, then_succ)))
4321 return false;
4322
4323 if (else_block) {
4324 /* handle else-block */
4325 if (!emit_cf_list(ctx, &if_stmt->else_list) ||
4326 (!nir_block_ends_in_jump(nir_if_last_else_block(if_stmt)) &&
4327 !emit_branch(ctx, else_succ)))
4328 return false;
4329 }
4330
4331 return true;
4332 }
4333
4334 static bool
emit_loop(struct ntd_context * ctx,nir_loop * loop)4335 emit_loop(struct ntd_context *ctx, nir_loop *loop)
4336 {
4337 nir_block *first_block = nir_loop_first_block(loop);
4338
4339 assert(nir_loop_last_block(loop)->successors[0]);
4340 assert(!nir_loop_last_block(loop)->successors[1]);
4341
4342 if (!emit_branch(ctx, first_block->index))
4343 return false;
4344
4345 if (!emit_cf_list(ctx, &loop->body))
4346 return false;
4347
4348 if (!emit_branch(ctx, first_block->index))
4349 return false;
4350
4351 return true;
4352 }
4353
4354 static bool
emit_cf_list(struct ntd_context * ctx,struct exec_list * list)4355 emit_cf_list(struct ntd_context *ctx, struct exec_list *list)
4356 {
4357 foreach_list_typed(nir_cf_node, node, node, list) {
4358 switch (node->type) {
4359 case nir_cf_node_block:
4360 if (!emit_block(ctx, nir_cf_node_as_block(node)))
4361 return false;
4362 break;
4363
4364 case nir_cf_node_if:
4365 if (!emit_if(ctx, nir_cf_node_as_if(node)))
4366 return false;
4367 break;
4368
4369 case nir_cf_node_loop:
4370 if (!emit_loop(ctx, nir_cf_node_as_loop(node)))
4371 return false;
4372 break;
4373
4374 default:
4375 unreachable("unsupported cf-list node");
4376 break;
4377 }
4378 }
4379 return true;
4380 }
4381
4382 static void
insert_sorted_by_binding(struct exec_list * var_list,nir_variable * new_var)4383 insert_sorted_by_binding(struct exec_list *var_list, nir_variable *new_var)
4384 {
4385 nir_foreach_variable_in_list(var, var_list) {
4386 if (var->data.binding > new_var->data.binding) {
4387 exec_node_insert_node_before(&var->node, &new_var->node);
4388 return;
4389 }
4390 }
4391 exec_list_push_tail(var_list, &new_var->node);
4392 }
4393
4394
4395 static void
sort_uniforms_by_binding_and_remove_structs(nir_shader * s)4396 sort_uniforms_by_binding_and_remove_structs(nir_shader *s)
4397 {
4398 struct exec_list new_list;
4399 exec_list_make_empty(&new_list);
4400
4401 nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) {
4402 exec_node_remove(&var->node);
4403 const struct glsl_type *type = glsl_without_array(var->type);
4404 if (!glsl_type_is_struct(type))
4405 insert_sorted_by_binding(&new_list, var);
4406 }
4407 exec_list_append(&s->variables, &new_list);
4408 }
4409
4410 static void
prepare_phi_values(struct ntd_context * ctx)4411 prepare_phi_values(struct ntd_context *ctx)
4412 {
4413 /* PHI nodes are difficult to get right when tracking the types:
4414 * Since the incoming sources are linked to blocks, we can't bitcast
4415 * on the fly while loading. So scan the shader and insert a typed dummy
4416 * value for each phi source, and when storing we convert if the incoming
4417 * value has a different type then the one expected by the phi node.
4418 * We choose int as default, because it supports more bit sizes.
4419 */
4420 nir_foreach_function(function, ctx->shader) {
4421 if (function->impl) {
4422 nir_foreach_block(block, function->impl) {
4423 nir_foreach_instr(instr, block) {
4424 if (instr->type == nir_instr_type_phi) {
4425 nir_phi_instr *ir = nir_instr_as_phi(instr);
4426 unsigned bitsize = nir_dest_bit_size(ir->dest);
4427 const struct dxil_value *dummy = dxil_module_get_int_const(&ctx->mod, 0, bitsize);
4428 nir_foreach_phi_src(src, ir) {
4429 for(unsigned int i = 0; i < ir->dest.ssa.num_components; ++i)
4430 store_ssa_def(ctx, src->src.ssa, i, dummy);
4431 }
4432 }
4433 }
4434 }
4435 }
4436 }
4437 }
4438
4439 static bool
emit_cbvs(struct ntd_context * ctx)4440 emit_cbvs(struct ntd_context *ctx)
4441 {
4442 if (ctx->shader->info.stage == MESA_SHADER_KERNEL || ctx->opts->vulkan_environment) {
4443 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ubo) {
4444 if (!emit_ubo_var(ctx, var))
4445 return false;
4446 }
4447 } else {
4448 for (int i = ctx->opts->ubo_binding_offset; i < ctx->shader->info.num_ubos; ++i) {
4449 char name[64];
4450 snprintf(name, sizeof(name), "__ubo%d", i);
4451 if (!emit_cbv(ctx, i, 0, 16384 /*4096 vec4's*/, 1, name))
4452 return false;
4453 }
4454 }
4455
4456 return true;
4457 }
4458
4459 static bool
emit_scratch(struct ntd_context * ctx)4460 emit_scratch(struct ntd_context *ctx)
4461 {
4462 if (ctx->shader->scratch_size) {
4463 /*
4464 * We always allocate an u32 array, no matter the actual variable types.
4465 * According to the DXIL spec, the minimum load/store granularity is
4466 * 32-bit, anything smaller requires using a read-extract/read-write-modify
4467 * approach.
4468 */
4469 unsigned size = ALIGN_POT(ctx->shader->scratch_size, sizeof(uint32_t));
4470 const struct dxil_type *int32 = dxil_module_get_int_type(&ctx->mod, 32);
4471 const struct dxil_value *array_length = dxil_module_get_int32_const(&ctx->mod, size / sizeof(uint32_t));
4472 if (!int32 || !array_length)
4473 return false;
4474
4475 const struct dxil_type *type = dxil_module_get_array_type(
4476 &ctx->mod, int32, size / sizeof(uint32_t));
4477 if (!type)
4478 return false;
4479
4480 ctx->scratchvars = dxil_emit_alloca(&ctx->mod, type, int32, array_length, 4);
4481 if (!ctx->scratchvars)
4482 return false;
4483 }
4484
4485 return true;
4486 }
4487
4488 /* The validator complains if we don't have ops that reference a global variable. */
4489 static bool
shader_has_shared_ops(struct nir_shader * s)4490 shader_has_shared_ops(struct nir_shader *s)
4491 {
4492 nir_foreach_function(func, s) {
4493 if (!func->impl)
4494 continue;
4495 nir_foreach_block(block, func->impl) {
4496 nir_foreach_instr(instr, block) {
4497 if (instr->type != nir_instr_type_intrinsic)
4498 continue;
4499 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
4500 switch (intrin->intrinsic) {
4501 case nir_intrinsic_load_shared_dxil:
4502 case nir_intrinsic_store_shared_dxil:
4503 case nir_intrinsic_shared_atomic_add_dxil:
4504 case nir_intrinsic_shared_atomic_and_dxil:
4505 case nir_intrinsic_shared_atomic_comp_swap_dxil:
4506 case nir_intrinsic_shared_atomic_exchange_dxil:
4507 case nir_intrinsic_shared_atomic_imax_dxil:
4508 case nir_intrinsic_shared_atomic_imin_dxil:
4509 case nir_intrinsic_shared_atomic_or_dxil:
4510 case nir_intrinsic_shared_atomic_umax_dxil:
4511 case nir_intrinsic_shared_atomic_umin_dxil:
4512 case nir_intrinsic_shared_atomic_xor_dxil:
4513 return true;
4514 default: break;
4515 }
4516 }
4517 }
4518 }
4519 return false;
4520 }
4521
4522 static bool
emit_module(struct ntd_context * ctx,const struct nir_to_dxil_options * opts)4523 emit_module(struct ntd_context *ctx, const struct nir_to_dxil_options *opts)
4524 {
4525 /* The validator forces us to emit resources in a specific order:
4526 * CBVs, Samplers, SRVs, UAVs. While we are at it also remove
4527 * stale struct uniforms, they are lowered but might not have been removed */
4528 sort_uniforms_by_binding_and_remove_structs(ctx->shader);
4529
4530 /* CBVs */
4531 if (!emit_cbvs(ctx))
4532 return false;
4533
4534 /* Samplers */
4535 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
4536 unsigned count = glsl_type_get_sampler_count(var->type);
4537 const struct glsl_type *without_array = glsl_without_array(var->type);
4538 if (var->data.mode == nir_var_uniform && glsl_type_is_sampler(without_array) &&
4539 glsl_get_sampler_result_type(without_array) == GLSL_TYPE_VOID) {
4540 if (!emit_sampler(ctx, var, count))
4541 return false;
4542 }
4543 }
4544
4545 /* SRVs */
4546 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
4547 unsigned count = glsl_type_get_sampler_count(var->type);
4548 const struct glsl_type *without_array = glsl_without_array(var->type);
4549 if (var->data.mode == nir_var_uniform && glsl_type_is_sampler(without_array) &&
4550 glsl_get_sampler_result_type(without_array) != GLSL_TYPE_VOID) {
4551 if (!emit_srv(ctx, var, count))
4552 return false;
4553 }
4554 }
4555 /* Handle read-only SSBOs as SRVs */
4556 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
4557 if ((var->data.access & ACCESS_NON_WRITEABLE) != 0) {
4558 unsigned count = 1;
4559 if (glsl_type_is_array(var->type))
4560 count = glsl_get_length(var->type);
4561 if (!emit_srv(ctx, var, count))
4562 return false;
4563 }
4564 }
4565
4566 if (ctx->shader->info.shared_size && shader_has_shared_ops(ctx->shader)) {
4567 const struct dxil_type *type;
4568 unsigned size;
4569
4570 /*
4571 * We always allocate an u32 array, no matter the actual variable types.
4572 * According to the DXIL spec, the minimum load/store granularity is
4573 * 32-bit, anything smaller requires using a read-extract/read-write-modify
4574 * approach. Non-atomic 64-bit accesses are allowed, but the
4575 * GEP(cast(gvar, u64[] *), offset) and cast(GEP(gvar, offset), u64 *))
4576 * sequences don't seem to be accepted by the DXIL validator when the
4577 * pointer is in the groupshared address space, making the 32-bit -> 64-bit
4578 * pointer cast impossible.
4579 */
4580 size = ALIGN_POT(ctx->shader->info.shared_size, sizeof(uint32_t));
4581 type = dxil_module_get_array_type(&ctx->mod,
4582 dxil_module_get_int_type(&ctx->mod, 32),
4583 size / sizeof(uint32_t));
4584 ctx->sharedvars = dxil_add_global_ptr_var(&ctx->mod, "shared", type,
4585 DXIL_AS_GROUPSHARED,
4586 ffs(sizeof(uint64_t)),
4587 NULL);
4588 }
4589
4590 if (!emit_scratch(ctx))
4591 return false;
4592
4593 /* UAVs */
4594 if (ctx->shader->info.stage == MESA_SHADER_KERNEL) {
4595 if (!emit_globals(ctx, opts->num_kernel_globals))
4596 return false;
4597
4598 ctx->consts = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
4599 if (!ctx->consts)
4600 return false;
4601 if (!emit_global_consts(ctx))
4602 return false;
4603 } else {
4604 /* Handle read/write SSBOs as UAVs */
4605 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
4606 if ((var->data.access & ACCESS_NON_WRITEABLE) == 0) {
4607 unsigned count = 1;
4608 if (glsl_type_is_array(var->type))
4609 count = glsl_get_length(var->type);
4610 if (!emit_uav(ctx, var->data.binding, var->data.descriptor_set,
4611 count, DXIL_COMP_TYPE_INVALID,
4612 DXIL_RESOURCE_KIND_RAW_BUFFER, var->name))
4613 return false;
4614
4615 }
4616 }
4617 }
4618
4619 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
4620 if (var->data.mode == nir_var_uniform && glsl_type_is_image(glsl_without_array(var->type))) {
4621 if (!emit_uav_var(ctx, var, glsl_type_get_image_count(var->type)))
4622 return false;
4623 }
4624 }
4625
4626 nir_function_impl *entry = nir_shader_get_entrypoint(ctx->shader);
4627 nir_metadata_require(entry, nir_metadata_block_index);
4628
4629 assert(entry->num_blocks > 0);
4630 ctx->mod.basic_block_ids = rzalloc_array(ctx->ralloc_ctx, int,
4631 entry->num_blocks);
4632 if (!ctx->mod.basic_block_ids)
4633 return false;
4634
4635 for (int i = 0; i < entry->num_blocks; ++i)
4636 ctx->mod.basic_block_ids[i] = -1;
4637 ctx->mod.num_basic_block_ids = entry->num_blocks;
4638
4639 ctx->defs = rzalloc_array(ctx->ralloc_ctx, struct dxil_def,
4640 entry->ssa_alloc);
4641 if (!ctx->defs)
4642 return false;
4643 ctx->num_defs = entry->ssa_alloc;
4644
4645 ctx->phis = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
4646 if (!ctx->phis)
4647 return false;
4648
4649 prepare_phi_values(ctx);
4650
4651 if (!emit_cf_list(ctx, &entry->body))
4652 return false;
4653
4654 hash_table_foreach(ctx->phis, entry) {
4655 fixup_phi(ctx, (nir_phi_instr *)entry->key,
4656 (struct phi_block *)entry->data);
4657 }
4658
4659 if (!dxil_emit_ret_void(&ctx->mod))
4660 return false;
4661
4662 if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
4663 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_out) {
4664 if (var->data.location == FRAG_RESULT_STENCIL) {
4665 ctx->mod.feats.stencil_ref = true;
4666 }
4667 }
4668 }
4669
4670 if (ctx->mod.feats.native_low_precision)
4671 ctx->mod.minor_version = MAX2(ctx->mod.minor_version, 2);
4672
4673 return emit_metadata(ctx) &&
4674 dxil_emit_module(&ctx->mod);
4675 }
4676
4677 static unsigned int
get_dxil_shader_kind(struct nir_shader * s)4678 get_dxil_shader_kind(struct nir_shader *s)
4679 {
4680 switch (s->info.stage) {
4681 case MESA_SHADER_VERTEX:
4682 return DXIL_VERTEX_SHADER;
4683 case MESA_SHADER_GEOMETRY:
4684 return DXIL_GEOMETRY_SHADER;
4685 case MESA_SHADER_FRAGMENT:
4686 return DXIL_PIXEL_SHADER;
4687 case MESA_SHADER_KERNEL:
4688 case MESA_SHADER_COMPUTE:
4689 return DXIL_COMPUTE_SHADER;
4690 default:
4691 unreachable("unknown shader stage in nir_to_dxil");
4692 return DXIL_COMPUTE_SHADER;
4693 }
4694 }
4695
4696 static unsigned
lower_bit_size_callback(const nir_instr * instr,void * data)4697 lower_bit_size_callback(const nir_instr* instr, void *data)
4698 {
4699 if (instr->type != nir_instr_type_alu)
4700 return 0;
4701 const nir_alu_instr *alu = nir_instr_as_alu(instr);
4702
4703 if (nir_op_infos[alu->op].is_conversion)
4704 return 0;
4705
4706 unsigned num_inputs = nir_op_infos[alu->op].num_inputs;
4707 const struct nir_to_dxil_options *opts = (const struct nir_to_dxil_options*)data;
4708 unsigned min_bit_size = opts->lower_int16 ? 32 : 16;
4709
4710 unsigned ret = 0;
4711 for (unsigned i = 0; i < num_inputs; i++) {
4712 unsigned bit_size = nir_src_bit_size(alu->src[i].src);
4713 if (bit_size != 1 && bit_size < min_bit_size)
4714 ret = min_bit_size;
4715 }
4716
4717 return ret;
4718 }
4719
4720 static void
optimize_nir(struct nir_shader * s,const struct nir_to_dxil_options * opts)4721 optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts)
4722 {
4723 bool progress;
4724 do {
4725 progress = false;
4726 NIR_PASS_V(s, nir_lower_vars_to_ssa);
4727 NIR_PASS(progress, s, nir_lower_indirect_derefs, nir_var_function_temp, UINT32_MAX);
4728 NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
4729 NIR_PASS(progress, s, nir_copy_prop);
4730 NIR_PASS(progress, s, nir_opt_copy_prop_vars);
4731 NIR_PASS(progress, s, nir_lower_bit_size, lower_bit_size_callback, (void*)opts);
4732 NIR_PASS(progress, s, dxil_nir_lower_8bit_conv);
4733 if (opts->lower_int16)
4734 NIR_PASS(progress, s, dxil_nir_lower_16bit_conv);
4735 NIR_PASS(progress, s, nir_opt_remove_phis);
4736 NIR_PASS(progress, s, nir_opt_dce);
4737 NIR_PASS(progress, s, nir_opt_if, true);
4738 NIR_PASS(progress, s, nir_opt_dead_cf);
4739 NIR_PASS(progress, s, nir_opt_cse);
4740 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
4741 NIR_PASS(progress, s, nir_opt_algebraic);
4742 NIR_PASS(progress, s, dxil_nir_lower_x2b);
4743 if (s->options->lower_int64_options)
4744 NIR_PASS(progress, s, nir_lower_int64);
4745 NIR_PASS(progress, s, nir_lower_alu);
4746 NIR_PASS(progress, s, dxil_nir_lower_inot);
4747 NIR_PASS(progress, s, nir_opt_constant_folding);
4748 NIR_PASS(progress, s, nir_opt_undef);
4749 NIR_PASS(progress, s, nir_lower_undef_to_zero);
4750 NIR_PASS(progress, s, nir_opt_deref);
4751 NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16);
4752 NIR_PASS(progress, s, nir_lower_64bit_phis);
4753 NIR_PASS_V(s, nir_lower_system_values);
4754 } while (progress);
4755
4756 do {
4757 progress = false;
4758 NIR_PASS(progress, s, nir_opt_algebraic_late);
4759 } while (progress);
4760 }
4761
4762 static
dxil_fill_validation_state(struct ntd_context * ctx,struct dxil_validation_state * state)4763 void dxil_fill_validation_state(struct ntd_context *ctx,
4764 struct dxil_validation_state *state)
4765 {
4766 state->num_resources = util_dynarray_num_elements(&ctx->resources, struct dxil_resource);
4767 state->resources = (struct dxil_resource*)ctx->resources.data;
4768 state->state.psv0.max_expected_wave_lane_count = UINT_MAX;
4769 state->state.shader_stage = (uint8_t)ctx->mod.shader_kind;
4770 state->state.sig_input_elements = (uint8_t)ctx->mod.num_sig_inputs;
4771 state->state.sig_output_elements = (uint8_t)ctx->mod.num_sig_outputs;
4772 //state->state.sig_patch_const_or_prim_elements = 0;
4773
4774 switch (ctx->mod.shader_kind) {
4775 case DXIL_VERTEX_SHADER:
4776 state->state.psv0.vs.output_position_present = ctx->mod.info.has_out_position;
4777 break;
4778 case DXIL_PIXEL_SHADER:
4779 /* TODO: handle depth outputs */
4780 state->state.psv0.ps.depth_output = ctx->mod.info.has_out_depth;
4781 state->state.psv0.ps.sample_frequency =
4782 ctx->mod.info.has_per_sample_input;
4783 break;
4784 case DXIL_COMPUTE_SHADER:
4785 break;
4786 case DXIL_GEOMETRY_SHADER:
4787 state->state.max_vertex_count = ctx->shader->info.gs.vertices_out;
4788 state->state.psv0.gs.input_primitive = dxil_get_input_primitive(ctx->shader->info.gs.input_primitive);
4789 state->state.psv0.gs.output_toplology = dxil_get_primitive_topology(ctx->shader->info.gs.output_primitive);
4790 state->state.psv0.gs.output_stream_mask = ctx->shader->info.gs.active_stream_mask;
4791 state->state.psv0.gs.output_position_present = ctx->mod.info.has_out_position;
4792 break;
4793 default:
4794 assert(0 && "Shader type not (yet) supported");
4795 }
4796 }
4797
4798 static nir_variable *
add_sysvalue(struct ntd_context * ctx,uint8_t value,char * name,int driver_location)4799 add_sysvalue(struct ntd_context *ctx,
4800 uint8_t value, char *name,
4801 int driver_location)
4802 {
4803
4804 nir_variable *var = rzalloc(ctx->shader, nir_variable);
4805 if (!var)
4806 return NULL;
4807 var->data.driver_location = driver_location;
4808 var->data.location = value;
4809 var->type = glsl_uint_type();
4810 var->name = name;
4811 var->data.mode = nir_var_system_value;
4812 var->data.interpolation = INTERP_MODE_FLAT;
4813 return var;
4814 }
4815
4816 static bool
append_input_or_sysvalue(struct ntd_context * ctx,int input_loc,int sv_slot,char * name,int driver_location)4817 append_input_or_sysvalue(struct ntd_context *ctx,
4818 int input_loc, int sv_slot,
4819 char *name, int driver_location)
4820 {
4821 if (input_loc >= 0) {
4822 /* Check inputs whether a variable is available the corresponds
4823 * to the sysvalue */
4824 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
4825 if (var->data.location == input_loc) {
4826 ctx->system_value[sv_slot] = var;
4827 return true;
4828 }
4829 }
4830 }
4831
4832 ctx->system_value[sv_slot] = add_sysvalue(ctx, sv_slot, name, driver_location);
4833 if (!ctx->system_value[sv_slot])
4834 return false;
4835
4836 nir_shader_add_variable(ctx->shader, ctx->system_value[sv_slot]);
4837 return true;
4838 }
4839
4840 struct sysvalue_name {
4841 gl_system_value value;
4842 int slot;
4843 char *name;
4844 } possible_sysvalues[] = {
4845 {SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, -1, "SV_VertexID"},
4846 {SYSTEM_VALUE_INSTANCE_ID, -1, "SV_InstanceID"},
4847 {SYSTEM_VALUE_FRONT_FACE, VARYING_SLOT_FACE, "SV_IsFrontFace"},
4848 {SYSTEM_VALUE_PRIMITIVE_ID, VARYING_SLOT_PRIMITIVE_ID, "SV_PrimitiveID"},
4849 {SYSTEM_VALUE_SAMPLE_ID, -1, "SV_SampleIndex"},
4850 };
4851
4852 static bool
allocate_sysvalues(struct ntd_context * ctx)4853 allocate_sysvalues(struct ntd_context *ctx)
4854 {
4855 unsigned driver_location = 0;
4856 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in)
4857 driver_location++;
4858 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_system_value)
4859 driver_location++;
4860
4861 for (unsigned i = 0; i < ARRAY_SIZE(possible_sysvalues); ++i) {
4862 struct sysvalue_name *info = &possible_sysvalues[i];
4863 if (BITSET_TEST(ctx->shader->info.system_values_read, info->value)) {
4864 if (!append_input_or_sysvalue(ctx, info->slot,
4865 info->value, info->name,
4866 driver_location++))
4867 return false;
4868 }
4869 }
4870 return true;
4871 }
4872
4873 bool
nir_to_dxil(struct nir_shader * s,const struct nir_to_dxil_options * opts,struct blob * blob)4874 nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts,
4875 struct blob *blob)
4876 {
4877 assert(opts);
4878 bool retval = true;
4879 debug_dxil = (int)debug_get_option_debug_dxil();
4880 blob_init(blob);
4881
4882 struct ntd_context *ctx = calloc(1, sizeof(*ctx));
4883 if (!ctx)
4884 return false;
4885
4886 ctx->opts = opts;
4887 ctx->shader = s;
4888
4889 ctx->ralloc_ctx = ralloc_context(NULL);
4890 if (!ctx->ralloc_ctx) {
4891 retval = false;
4892 goto out;
4893 }
4894
4895 util_dynarray_init(&ctx->srv_metadata_nodes, ctx->ralloc_ctx);
4896 util_dynarray_init(&ctx->uav_metadata_nodes, ctx->ralloc_ctx);
4897 util_dynarray_init(&ctx->cbv_metadata_nodes, ctx->ralloc_ctx);
4898 util_dynarray_init(&ctx->sampler_metadata_nodes, ctx->ralloc_ctx);
4899 util_dynarray_init(&ctx->resources, ctx->ralloc_ctx);
4900 dxil_module_init(&ctx->mod, ctx->ralloc_ctx);
4901 ctx->mod.shader_kind = get_dxil_shader_kind(s);
4902 ctx->mod.major_version = 6;
4903 ctx->mod.minor_version = 1;
4904
4905 NIR_PASS_V(s, nir_lower_pack);
4906 NIR_PASS_V(s, nir_lower_frexp);
4907 NIR_PASS_V(s, nir_lower_flrp, 16 | 32 | 64, true);
4908
4909 optimize_nir(s, opts);
4910
4911 NIR_PASS_V(s, nir_remove_dead_variables,
4912 nir_var_function_temp | nir_var_shader_temp, NULL);
4913
4914 if (!allocate_sysvalues(ctx))
4915 return false;
4916
4917 if (debug_dxil & DXIL_DEBUG_VERBOSE)
4918 nir_print_shader(s, stderr);
4919
4920 if (!emit_module(ctx, opts)) {
4921 debug_printf("D3D12: dxil_container_add_module failed\n");
4922 retval = false;
4923 goto out;
4924 }
4925
4926 if (debug_dxil & DXIL_DEBUG_DUMP_MODULE) {
4927 struct dxil_dumper *dumper = dxil_dump_create();
4928 dxil_dump_module(dumper, &ctx->mod);
4929 fprintf(stderr, "\n");
4930 dxil_dump_buf_to_file(dumper, stderr);
4931 fprintf(stderr, "\n\n");
4932 dxil_dump_free(dumper);
4933 }
4934
4935 struct dxil_container container;
4936 dxil_container_init(&container);
4937 if (!dxil_container_add_features(&container, &ctx->mod.feats)) {
4938 debug_printf("D3D12: dxil_container_add_features failed\n");
4939 retval = false;
4940 goto out;
4941 }
4942
4943 if (!dxil_container_add_io_signature(&container,
4944 DXIL_ISG1,
4945 ctx->mod.num_sig_inputs,
4946 ctx->mod.inputs)) {
4947 debug_printf("D3D12: failed to write input signature\n");
4948 retval = false;
4949 goto out;
4950 }
4951
4952 if (!dxil_container_add_io_signature(&container,
4953 DXIL_OSG1,
4954 ctx->mod.num_sig_outputs,
4955 ctx->mod.outputs)) {
4956 debug_printf("D3D12: failed to write output signature\n");
4957 retval = false;
4958 goto out;
4959 }
4960
4961 struct dxil_validation_state validation_state;
4962 memset(&validation_state, 0, sizeof(validation_state));
4963 dxil_fill_validation_state(ctx, &validation_state);
4964
4965 if (!dxil_container_add_state_validation(&container,&ctx->mod,
4966 &validation_state)) {
4967 debug_printf("D3D12: failed to write state-validation\n");
4968 retval = false;
4969 goto out;
4970 }
4971
4972 if (!dxil_container_add_module(&container, &ctx->mod)) {
4973 debug_printf("D3D12: failed to write module\n");
4974 retval = false;
4975 goto out;
4976 }
4977
4978 if (!dxil_container_write(&container, blob)) {
4979 debug_printf("D3D12: dxil_container_write failed\n");
4980 retval = false;
4981 goto out;
4982 }
4983 dxil_container_finish(&container);
4984
4985 if (debug_dxil & DXIL_DEBUG_DUMP_BLOB) {
4986 static int shader_id = 0;
4987 char buffer[64];
4988 snprintf(buffer, sizeof(buffer), "shader_%s_%d.blob",
4989 get_shader_kind_str(ctx->mod.shader_kind), shader_id++);
4990 debug_printf("Try to write blob to %s\n", buffer);
4991 FILE *f = fopen(buffer, "wb");
4992 if (f) {
4993 fwrite(blob->data, 1, blob->size, f);
4994 fclose(f);
4995 }
4996 }
4997
4998 out:
4999 dxil_module_release(&ctx->mod);
5000 ralloc_free(ctx->ralloc_ctx);
5001 free(ctx);
5002 return retval;
5003 }
5004
5005 enum dxil_sysvalue_type
nir_var_to_dxil_sysvalue_type(nir_variable * var,uint64_t other_stage_mask)5006 nir_var_to_dxil_sysvalue_type(nir_variable *var, uint64_t other_stage_mask)
5007 {
5008 switch (var->data.location) {
5009 case VARYING_SLOT_FACE:
5010 return DXIL_GENERATED_SYSVALUE;
5011 case VARYING_SLOT_POS:
5012 case VARYING_SLOT_PRIMITIVE_ID:
5013 case VARYING_SLOT_CLIP_DIST0:
5014 case VARYING_SLOT_CLIP_DIST1:
5015 case VARYING_SLOT_PSIZ:
5016 if (!((1ull << var->data.location) & other_stage_mask))
5017 return DXIL_SYSVALUE;
5018 FALLTHROUGH;
5019 default:
5020 return DXIL_NO_SYSVALUE;
5021 }
5022 }
5023