1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_to_dxil.h"
25
26 #include "dxil_container.h"
27 #include "dxil_dump.h"
28 #include "dxil_enums.h"
29 #include "dxil_function.h"
30 #include "dxil_module.h"
31 #include "dxil_nir.h"
32 #include "dxil_signature.h"
33
34 #include "nir/nir_builder.h"
35 #include "util/u_debug.h"
36 #include "util/u_dynarray.h"
37 #include "util/u_math.h"
38
39 #include "git_sha1.h"
40
41 #include "vulkan/vulkan_core.h"
42
43 #include <stdint.h>
44
45 int debug_dxil = 0;
46
47 static const struct debug_named_value
48 dxil_debug_options[] = {
49 { "verbose", DXIL_DEBUG_VERBOSE, NULL },
50 { "dump_blob", DXIL_DEBUG_DUMP_BLOB , "Write shader blobs" },
51 { "trace", DXIL_DEBUG_TRACE , "Trace instruction conversion" },
52 { "dump_module", DXIL_DEBUG_DUMP_MODULE, "dump module tree to stderr"},
53 DEBUG_NAMED_VALUE_END
54 };
55
56 DEBUG_GET_ONCE_FLAGS_OPTION(debug_dxil, "DXIL_DEBUG", dxil_debug_options, 0)
57
58 #define NIR_INSTR_UNSUPPORTED(instr) \
59 if (debug_dxil & DXIL_DEBUG_VERBOSE) \
60 do { \
61 fprintf(stderr, "Unsupported instruction:"); \
62 nir_print_instr(instr, stderr); \
63 fprintf(stderr, "\n"); \
64 } while (0)
65
66 #define TRACE_CONVERSION(instr) \
67 if (debug_dxil & DXIL_DEBUG_TRACE) \
68 do { \
69 fprintf(stderr, "Convert '"); \
70 nir_print_instr(instr, stderr); \
71 fprintf(stderr, "'\n"); \
72 } while (0)
73
74 static const nir_shader_compiler_options
75 nir_options = {
76 .lower_ineg = true,
77 .lower_fneg = true,
78 .lower_ffma16 = true,
79 .lower_ffma32 = true,
80 .lower_isign = true,
81 .lower_fsign = true,
82 .lower_iabs = true,
83 .lower_fmod = true,
84 .lower_fpow = true,
85 .lower_scmp = true,
86 .lower_ldexp = true,
87 .lower_flrp16 = true,
88 .lower_flrp32 = true,
89 .lower_flrp64 = true,
90 .lower_bitfield_extract = true,
91 .lower_find_msb_to_reverse = true,
92 .lower_extract_word = true,
93 .lower_extract_byte = true,
94 .lower_insert_word = true,
95 .lower_insert_byte = true,
96 .lower_all_io_to_elements = true,
97 .lower_all_io_to_temps = true,
98 .lower_hadd = true,
99 .lower_uadd_sat = true,
100 .lower_usub_sat = true,
101 .lower_iadd_sat = true,
102 .lower_uadd_carry = true,
103 .lower_mul_high = true,
104 .lower_rotate = true,
105 .lower_pack_64_2x32_split = true,
106 .lower_pack_32_2x16_split = true,
107 .lower_unpack_64_2x32_split = true,
108 .lower_unpack_32_2x16_split = true,
109 .lower_unpack_half_2x16 = true,
110 .lower_unpack_snorm_2x16 = true,
111 .lower_unpack_snorm_4x8 = true,
112 .lower_unpack_unorm_2x16 = true,
113 .lower_unpack_unorm_4x8 = true,
114 .lower_interpolate_at = true,
115 .has_fsub = true,
116 .has_isub = true,
117 .use_scoped_barrier = true,
118 .vertex_id_zero_based = true,
119 .lower_base_vertex = true,
120 .lower_helper_invocation = true,
121 .has_cs_global_id = true,
122 .has_txs = true,
123 .lower_mul_2x32_64 = true,
124 .lower_doubles_options =
125 nir_lower_drcp |
126 nir_lower_dsqrt |
127 nir_lower_drsq |
128 nir_lower_dfract |
129 nir_lower_dtrunc |
130 nir_lower_dfloor |
131 nir_lower_dceil |
132 nir_lower_dround_even,
133 .max_unroll_iterations = 32, /* arbitrary */
134 .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
135 };
136
137 const nir_shader_compiler_options*
dxil_get_nir_compiler_options(void)138 dxil_get_nir_compiler_options(void)
139 {
140 return &nir_options;
141 }
142
143 static bool
emit_llvm_ident(struct dxil_module * m)144 emit_llvm_ident(struct dxil_module *m)
145 {
146 const struct dxil_mdnode *compiler = dxil_get_metadata_string(m, "Mesa version " PACKAGE_VERSION MESA_GIT_SHA1);
147 if (!compiler)
148 return false;
149
150 const struct dxil_mdnode *llvm_ident = dxil_get_metadata_node(m, &compiler, 1);
151 return llvm_ident &&
152 dxil_add_metadata_named_node(m, "llvm.ident", &llvm_ident, 1);
153 }
154
155 static bool
emit_named_version(struct dxil_module * m,const char * name,int major,int minor)156 emit_named_version(struct dxil_module *m, const char *name,
157 int major, int minor)
158 {
159 const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, major);
160 const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, minor);
161 const struct dxil_mdnode *version_nodes[] = { major_node, minor_node };
162 const struct dxil_mdnode *version = dxil_get_metadata_node(m, version_nodes,
163 ARRAY_SIZE(version_nodes));
164 return dxil_add_metadata_named_node(m, name, &version, 1);
165 }
166
167 static const char *
get_shader_kind_str(enum dxil_shader_kind kind)168 get_shader_kind_str(enum dxil_shader_kind kind)
169 {
170 switch (kind) {
171 case DXIL_PIXEL_SHADER:
172 return "ps";
173 case DXIL_VERTEX_SHADER:
174 return "vs";
175 case DXIL_GEOMETRY_SHADER:
176 return "gs";
177 case DXIL_HULL_SHADER:
178 return "hs";
179 case DXIL_DOMAIN_SHADER:
180 return "ds";
181 case DXIL_COMPUTE_SHADER:
182 return "cs";
183 default:
184 unreachable("invalid shader kind");
185 }
186 }
187
188 static bool
emit_dx_shader_model(struct dxil_module * m)189 emit_dx_shader_model(struct dxil_module *m)
190 {
191 const struct dxil_mdnode *type_node = dxil_get_metadata_string(m, get_shader_kind_str(m->shader_kind));
192 const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, m->major_version);
193 const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, m->minor_version);
194 const struct dxil_mdnode *shader_model[] = { type_node, major_node,
195 minor_node };
196 const struct dxil_mdnode *dx_shader_model = dxil_get_metadata_node(m, shader_model, ARRAY_SIZE(shader_model));
197
198 return dxil_add_metadata_named_node(m, "dx.shaderModel",
199 &dx_shader_model, 1);
200 }
201
202 enum {
203 DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG = 0,
204 DXIL_STRUCTURED_BUFFER_ELEMENT_STRIDE_TAG = 1
205 };
206
207 enum dxil_intr {
208 DXIL_INTR_LOAD_INPUT = 4,
209 DXIL_INTR_STORE_OUTPUT = 5,
210 DXIL_INTR_FABS = 6,
211 DXIL_INTR_SATURATE = 7,
212
213 DXIL_INTR_ISFINITE = 10,
214 DXIL_INTR_ISNORMAL = 11,
215
216 DXIL_INTR_FCOS = 12,
217 DXIL_INTR_FSIN = 13,
218
219 DXIL_INTR_FEXP2 = 21,
220 DXIL_INTR_FRC = 22,
221 DXIL_INTR_FLOG2 = 23,
222
223 DXIL_INTR_SQRT = 24,
224 DXIL_INTR_RSQRT = 25,
225 DXIL_INTR_ROUND_NE = 26,
226 DXIL_INTR_ROUND_NI = 27,
227 DXIL_INTR_ROUND_PI = 28,
228 DXIL_INTR_ROUND_Z = 29,
229
230 DXIL_INTR_BFREV = 30,
231 DXIL_INTR_COUNTBITS = 31,
232 DXIL_INTR_FIRSTBIT_LO = 32,
233 DXIL_INTR_FIRSTBIT_HI = 33,
234 DXIL_INTR_FIRSTBIT_SHI = 34,
235
236 DXIL_INTR_FMAX = 35,
237 DXIL_INTR_FMIN = 36,
238 DXIL_INTR_IMAX = 37,
239 DXIL_INTR_IMIN = 38,
240 DXIL_INTR_UMAX = 39,
241 DXIL_INTR_UMIN = 40,
242
243 DXIL_INTR_FMA = 47,
244
245 DXIL_INTR_IBFE = 51,
246 DXIL_INTR_UBFE = 52,
247 DXIL_INTR_BFI = 53,
248
249 DXIL_INTR_CREATE_HANDLE = 57,
250 DXIL_INTR_CBUFFER_LOAD_LEGACY = 59,
251
252 DXIL_INTR_SAMPLE = 60,
253 DXIL_INTR_SAMPLE_BIAS = 61,
254 DXIL_INTR_SAMPLE_LEVEL = 62,
255 DXIL_INTR_SAMPLE_GRAD = 63,
256 DXIL_INTR_SAMPLE_CMP = 64,
257 DXIL_INTR_SAMPLE_CMP_LVL_ZERO = 65,
258
259 DXIL_INTR_TEXTURE_LOAD = 66,
260 DXIL_INTR_TEXTURE_STORE = 67,
261
262 DXIL_INTR_BUFFER_LOAD = 68,
263 DXIL_INTR_BUFFER_STORE = 69,
264
265 DXIL_INTR_TEXTURE_SIZE = 72,
266 DXIL_INTR_TEXTURE_GATHER = 73,
267 DXIL_INTR_TEXTURE_GATHER_CMP = 74,
268
269 DXIL_INTR_TEXTURE2DMS_GET_SAMPLE_POSITION = 75,
270 DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION = 76,
271 DXIL_INTR_RENDER_TARGET_GET_SAMPLE_COUNT = 77,
272
273 DXIL_INTR_ATOMIC_BINOP = 78,
274 DXIL_INTR_ATOMIC_CMPXCHG = 79,
275 DXIL_INTR_BARRIER = 80,
276 DXIL_INTR_TEXTURE_LOD = 81,
277
278 DXIL_INTR_DISCARD = 82,
279 DXIL_INTR_DDX_COARSE = 83,
280 DXIL_INTR_DDY_COARSE = 84,
281 DXIL_INTR_DDX_FINE = 85,
282 DXIL_INTR_DDY_FINE = 86,
283
284 DXIL_INTR_EVAL_SNAPPED = 87,
285 DXIL_INTR_EVAL_SAMPLE_INDEX = 88,
286 DXIL_INTR_EVAL_CENTROID = 89,
287
288 DXIL_INTR_SAMPLE_INDEX = 90,
289 DXIL_INTR_COVERAGE = 91,
290
291 DXIL_INTR_THREAD_ID = 93,
292 DXIL_INTR_GROUP_ID = 94,
293 DXIL_INTR_THREAD_ID_IN_GROUP = 95,
294 DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP = 96,
295
296 DXIL_INTR_EMIT_STREAM = 97,
297 DXIL_INTR_CUT_STREAM = 98,
298
299 DXIL_INTR_GS_INSTANCE_ID = 100,
300
301 DXIL_INTR_MAKE_DOUBLE = 101,
302 DXIL_INTR_SPLIT_DOUBLE = 102,
303
304 DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT = 103,
305 DXIL_INTR_LOAD_PATCH_CONSTANT = 104,
306 DXIL_INTR_DOMAIN_LOCATION = 105,
307 DXIL_INTR_STORE_PATCH_CONSTANT = 106,
308 DXIL_INTR_OUTPUT_CONTROL_POINT_ID = 107,
309 DXIL_INTR_PRIMITIVE_ID = 108,
310
311 DXIL_INTR_LEGACY_F32TOF16 = 130,
312 DXIL_INTR_LEGACY_F16TOF32 = 131,
313
314 DXIL_INTR_ATTRIBUTE_AT_VERTEX = 137,
315 };
316
317 enum dxil_atomic_op {
318 DXIL_ATOMIC_ADD = 0,
319 DXIL_ATOMIC_AND = 1,
320 DXIL_ATOMIC_OR = 2,
321 DXIL_ATOMIC_XOR = 3,
322 DXIL_ATOMIC_IMIN = 4,
323 DXIL_ATOMIC_IMAX = 5,
324 DXIL_ATOMIC_UMIN = 6,
325 DXIL_ATOMIC_UMAX = 7,
326 DXIL_ATOMIC_EXCHANGE = 8,
327 };
328
329 typedef struct {
330 unsigned id;
331 unsigned binding;
332 unsigned size;
333 unsigned space;
334 } resource_array_layout;
335
336 static void
fill_resource_metadata(struct dxil_module * m,const struct dxil_mdnode ** fields,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout)337 fill_resource_metadata(struct dxil_module *m, const struct dxil_mdnode **fields,
338 const struct dxil_type *struct_type,
339 const char *name, const resource_array_layout *layout)
340 {
341 const struct dxil_type *pointer_type = dxil_module_get_pointer_type(m, struct_type);
342 const struct dxil_value *pointer_undef = dxil_module_get_undef(m, pointer_type);
343
344 fields[0] = dxil_get_metadata_int32(m, layout->id); // resource ID
345 fields[1] = dxil_get_metadata_value(m, pointer_type, pointer_undef); // global constant symbol
346 fields[2] = dxil_get_metadata_string(m, name ? name : ""); // name
347 fields[3] = dxil_get_metadata_int32(m, layout->space); // space ID
348 fields[4] = dxil_get_metadata_int32(m, layout->binding); // lower bound
349 fields[5] = dxil_get_metadata_int32(m, layout->size); // range size
350 }
351
352 static const struct dxil_mdnode *
emit_srv_metadata(struct dxil_module * m,const struct dxil_type * elem_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind)353 emit_srv_metadata(struct dxil_module *m, const struct dxil_type *elem_type,
354 const char *name, const resource_array_layout *layout,
355 enum dxil_component_type comp_type,
356 enum dxil_resource_kind res_kind)
357 {
358 const struct dxil_mdnode *fields[9];
359
360 const struct dxil_mdnode *metadata_tag_nodes[2];
361
362 fill_resource_metadata(m, fields, elem_type, name, layout);
363 fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
364 fields[7] = dxil_get_metadata_int1(m, 0); // sample count
365 if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
366 res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
367 metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
368 metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
369 fields[8] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
370 } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
371 fields[8] = NULL;
372 else
373 unreachable("Structured buffers not supported yet");
374
375 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
376 }
377
378 static const struct dxil_mdnode *
emit_uav_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind)379 emit_uav_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
380 const char *name, const resource_array_layout *layout,
381 enum dxil_component_type comp_type,
382 enum dxil_resource_kind res_kind)
383 {
384 const struct dxil_mdnode *fields[11];
385
386 const struct dxil_mdnode *metadata_tag_nodes[2];
387
388 fill_resource_metadata(m, fields, struct_type, name, layout);
389 fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape
390 fields[7] = dxil_get_metadata_int1(m, false); // globally-coherent
391 fields[8] = dxil_get_metadata_int1(m, false); // has counter
392 fields[9] = dxil_get_metadata_int1(m, false); // is ROV
393 if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&
394 res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {
395 metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);
396 metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);
397 fields[10] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata
398 } else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
399 fields[10] = NULL;
400 else
401 unreachable("Structured buffers not supported yet");
402
403 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
404 }
405
406 static const struct dxil_mdnode *
emit_cbv_metadata(struct dxil_module * m,const struct dxil_type * struct_type,const char * name,const resource_array_layout * layout,unsigned size)407 emit_cbv_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
408 const char *name, const resource_array_layout *layout,
409 unsigned size)
410 {
411 const struct dxil_mdnode *fields[8];
412
413 fill_resource_metadata(m, fields, struct_type, name, layout);
414 fields[6] = dxil_get_metadata_int32(m, size); // constant buffer size
415 fields[7] = NULL; // metadata
416
417 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
418 }
419
420 static const struct dxil_mdnode *
emit_sampler_metadata(struct dxil_module * m,const struct dxil_type * struct_type,nir_variable * var,const resource_array_layout * layout)421 emit_sampler_metadata(struct dxil_module *m, const struct dxil_type *struct_type,
422 nir_variable *var, const resource_array_layout *layout)
423 {
424 const struct dxil_mdnode *fields[8];
425 const struct glsl_type *type = glsl_without_array(var->type);
426
427 fill_resource_metadata(m, fields, struct_type, var->name, layout);
428 fields[6] = dxil_get_metadata_int32(m, DXIL_SAMPLER_KIND_DEFAULT); // sampler kind
429 enum dxil_sampler_kind sampler_kind = glsl_sampler_type_is_shadow(type) ?
430 DXIL_SAMPLER_KIND_COMPARISON : DXIL_SAMPLER_KIND_DEFAULT;
431 fields[6] = dxil_get_metadata_int32(m, sampler_kind); // sampler kind
432 fields[7] = NULL; // metadata
433
434 return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));
435 }
436
437
438 #define MAX_SRVS 128
439 #define MAX_UAVS 64
440 #define MAX_CBVS 64 // ??
441 #define MAX_SAMPLERS 64 // ??
442
443 struct dxil_def {
444 const struct dxil_value *chans[NIR_MAX_VEC_COMPONENTS];
445 };
446
447 struct ntd_context {
448 void *ralloc_ctx;
449 const struct nir_to_dxil_options *opts;
450 struct nir_shader *shader;
451
452 struct dxil_module mod;
453
454 struct util_dynarray srv_metadata_nodes;
455 const struct dxil_value *srv_handles[MAX_SRVS];
456
457 struct util_dynarray uav_metadata_nodes;
458 const struct dxil_value *ssbo_handles[MAX_UAVS];
459 const struct dxil_value *image_handles[MAX_UAVS];
460 uint32_t num_uavs;
461
462 struct util_dynarray cbv_metadata_nodes;
463 const struct dxil_value *cbv_handles[MAX_CBVS];
464
465 struct util_dynarray sampler_metadata_nodes;
466 const struct dxil_value *sampler_handles[MAX_SAMPLERS];
467
468 struct util_dynarray resources;
469
470 const struct dxil_mdnode *shader_property_nodes[6];
471 size_t num_shader_property_nodes;
472
473 struct dxil_def *defs;
474 unsigned num_defs;
475 struct hash_table *phis;
476
477 const struct dxil_value *sharedvars;
478 const struct dxil_value *scratchvars;
479 struct hash_table *consts;
480
481 nir_variable *ps_front_face;
482 nir_variable *system_value[SYSTEM_VALUE_MAX];
483
484 nir_function *tess_ctrl_patch_constant_func;
485 unsigned tess_input_control_point_count;
486
487 struct dxil_func_def *main_func_def;
488 struct dxil_func_def *tess_ctrl_patch_constant_func_def;
489 unsigned unnamed_ubo_count;
490 };
491
492 static const char*
unary_func_name(enum dxil_intr intr)493 unary_func_name(enum dxil_intr intr)
494 {
495 switch (intr) {
496 case DXIL_INTR_COUNTBITS:
497 case DXIL_INTR_FIRSTBIT_HI:
498 case DXIL_INTR_FIRSTBIT_SHI:
499 case DXIL_INTR_FIRSTBIT_LO:
500 return "dx.op.unaryBits";
501 case DXIL_INTR_ISFINITE:
502 case DXIL_INTR_ISNORMAL:
503 return "dx.op.isSpecialFloat";
504 default:
505 return "dx.op.unary";
506 }
507 }
508
509 static const struct dxil_value *
emit_unary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0)510 emit_unary_call(struct ntd_context *ctx, enum overload_type overload,
511 enum dxil_intr intr,
512 const struct dxil_value *op0)
513 {
514 const struct dxil_func *func = dxil_get_function(&ctx->mod,
515 unary_func_name(intr),
516 overload);
517 if (!func)
518 return NULL;
519
520 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
521 if (!opcode)
522 return NULL;
523
524 const struct dxil_value *args[] = {
525 opcode,
526 op0
527 };
528
529 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
530 }
531
532 static const struct dxil_value *
emit_binary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)533 emit_binary_call(struct ntd_context *ctx, enum overload_type overload,
534 enum dxil_intr intr,
535 const struct dxil_value *op0, const struct dxil_value *op1)
536 {
537 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.binary", overload);
538 if (!func)
539 return NULL;
540
541 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
542 if (!opcode)
543 return NULL;
544
545 const struct dxil_value *args[] = {
546 opcode,
547 op0,
548 op1
549 };
550
551 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
552 }
553
554 static const struct dxil_value *
emit_tertiary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)555 emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload,
556 enum dxil_intr intr,
557 const struct dxil_value *op0,
558 const struct dxil_value *op1,
559 const struct dxil_value *op2)
560 {
561 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.tertiary", overload);
562 if (!func)
563 return NULL;
564
565 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
566 if (!opcode)
567 return NULL;
568
569 const struct dxil_value *args[] = {
570 opcode,
571 op0,
572 op1,
573 op2
574 };
575
576 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
577 }
578
579 static const struct dxil_value *
emit_quaternary_call(struct ntd_context * ctx,enum overload_type overload,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2,const struct dxil_value * op3)580 emit_quaternary_call(struct ntd_context *ctx, enum overload_type overload,
581 enum dxil_intr intr,
582 const struct dxil_value *op0,
583 const struct dxil_value *op1,
584 const struct dxil_value *op2,
585 const struct dxil_value *op3)
586 {
587 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quaternary", overload);
588 if (!func)
589 return NULL;
590
591 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
592 if (!opcode)
593 return NULL;
594
595 const struct dxil_value *args[] = {
596 opcode,
597 op0,
598 op1,
599 op2,
600 op3
601 };
602
603 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
604 }
605
606 static const struct dxil_value *
emit_threadid_call(struct ntd_context * ctx,const struct dxil_value * comp)607 emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp)
608 {
609 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadId", DXIL_I32);
610 if (!func)
611 return NULL;
612
613 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
614 DXIL_INTR_THREAD_ID);
615 if (!opcode)
616 return NULL;
617
618 const struct dxil_value *args[] = {
619 opcode,
620 comp
621 };
622
623 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
624 }
625
626 static const struct dxil_value *
emit_threadidingroup_call(struct ntd_context * ctx,const struct dxil_value * comp)627 emit_threadidingroup_call(struct ntd_context *ctx,
628 const struct dxil_value *comp)
629 {
630 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadIdInGroup", DXIL_I32);
631
632 if (!func)
633 return NULL;
634
635 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
636 DXIL_INTR_THREAD_ID_IN_GROUP);
637 if (!opcode)
638 return NULL;
639
640 const struct dxil_value *args[] = {
641 opcode,
642 comp
643 };
644
645 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
646 }
647
648 static const struct dxil_value *
emit_flattenedthreadidingroup_call(struct ntd_context * ctx)649 emit_flattenedthreadidingroup_call(struct ntd_context *ctx)
650 {
651 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.flattenedThreadIdInGroup", DXIL_I32);
652
653 if (!func)
654 return NULL;
655
656 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
657 DXIL_INTR_FLATTENED_THREAD_ID_IN_GROUP);
658 if (!opcode)
659 return NULL;
660
661 const struct dxil_value *args[] = {
662 opcode
663 };
664
665 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
666 }
667
668 static const struct dxil_value *
emit_groupid_call(struct ntd_context * ctx,const struct dxil_value * comp)669 emit_groupid_call(struct ntd_context *ctx, const struct dxil_value *comp)
670 {
671 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.groupId", DXIL_I32);
672
673 if (!func)
674 return NULL;
675
676 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
677 DXIL_INTR_GROUP_ID);
678 if (!opcode)
679 return NULL;
680
681 const struct dxil_value *args[] = {
682 opcode,
683 comp
684 };
685
686 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
687 }
688
689 static const struct dxil_value *
emit_bufferload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],enum overload_type overload)690 emit_bufferload_call(struct ntd_context *ctx,
691 const struct dxil_value *handle,
692 const struct dxil_value *coord[2],
693 enum overload_type overload)
694 {
695 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferLoad", overload);
696 if (!func)
697 return NULL;
698
699 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
700 DXIL_INTR_BUFFER_LOAD);
701 const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1] };
702
703 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
704 }
705
706 static bool
emit_bufferstore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[2],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)707 emit_bufferstore_call(struct ntd_context *ctx,
708 const struct dxil_value *handle,
709 const struct dxil_value *coord[2],
710 const struct dxil_value *value[4],
711 const struct dxil_value *write_mask,
712 enum overload_type overload)
713 {
714 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferStore", overload);
715
716 if (!func)
717 return false;
718
719 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
720 DXIL_INTR_BUFFER_STORE);
721 const struct dxil_value *args[] = {
722 opcode, handle, coord[0], coord[1],
723 value[0], value[1], value[2], value[3],
724 write_mask
725 };
726
727 return dxil_emit_call_void(&ctx->mod, func,
728 args, ARRAY_SIZE(args));
729 }
730
731 static const struct dxil_value *
emit_textureload_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],enum overload_type overload)732 emit_textureload_call(struct ntd_context *ctx,
733 const struct dxil_value *handle,
734 const struct dxil_value *coord[3],
735 enum overload_type overload)
736 {
737 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", overload);
738 if (!func)
739 return NULL;
740 const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
741 const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
742
743 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
744 DXIL_INTR_TEXTURE_LOAD);
745 const struct dxil_value *args[] = { opcode, handle,
746 /*lod_or_sample*/ int_undef,
747 coord[0], coord[1], coord[2],
748 /* offsets */ int_undef, int_undef, int_undef};
749
750 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
751 }
752
753 static bool
emit_texturestore_call(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * value[4],const struct dxil_value * write_mask,enum overload_type overload)754 emit_texturestore_call(struct ntd_context *ctx,
755 const struct dxil_value *handle,
756 const struct dxil_value *coord[3],
757 const struct dxil_value *value[4],
758 const struct dxil_value *write_mask,
759 enum overload_type overload)
760 {
761 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureStore", overload);
762
763 if (!func)
764 return false;
765
766 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,
767 DXIL_INTR_TEXTURE_STORE);
768 const struct dxil_value *args[] = {
769 opcode, handle, coord[0], coord[1], coord[2],
770 value[0], value[1], value[2], value[3],
771 write_mask
772 };
773
774 return dxil_emit_call_void(&ctx->mod, func,
775 args, ARRAY_SIZE(args));
776 }
777
778 static const struct dxil_value *
emit_atomic_binop(struct ntd_context * ctx,const struct dxil_value * handle,enum dxil_atomic_op atomic_op,const struct dxil_value * coord[3],const struct dxil_value * value)779 emit_atomic_binop(struct ntd_context *ctx,
780 const struct dxil_value *handle,
781 enum dxil_atomic_op atomic_op,
782 const struct dxil_value *coord[3],
783 const struct dxil_value *value)
784 {
785 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicBinOp", DXIL_I32);
786
787 if (!func)
788 return false;
789
790 const struct dxil_value *opcode =
791 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_BINOP);
792 const struct dxil_value *atomic_op_value =
793 dxil_module_get_int32_const(&ctx->mod, atomic_op);
794 const struct dxil_value *args[] = {
795 opcode, handle, atomic_op_value,
796 coord[0], coord[1], coord[2], value
797 };
798
799 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
800 }
801
802 static const struct dxil_value *
emit_atomic_cmpxchg(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * coord[3],const struct dxil_value * cmpval,const struct dxil_value * newval)803 emit_atomic_cmpxchg(struct ntd_context *ctx,
804 const struct dxil_value *handle,
805 const struct dxil_value *coord[3],
806 const struct dxil_value *cmpval,
807 const struct dxil_value *newval)
808 {
809 const struct dxil_func *func =
810 dxil_get_function(&ctx->mod, "dx.op.atomicCompareExchange", DXIL_I32);
811
812 if (!func)
813 return false;
814
815 const struct dxil_value *opcode =
816 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_CMPXCHG);
817 const struct dxil_value *args[] = {
818 opcode, handle, coord[0], coord[1], coord[2], cmpval, newval
819 };
820
821 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
822 }
823
824 static const struct dxil_value *
emit_createhandle_call(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned resource_range_id,const struct dxil_value * resource_range_index,bool non_uniform_resource_index)825 emit_createhandle_call(struct ntd_context *ctx,
826 enum dxil_resource_class resource_class,
827 unsigned resource_range_id,
828 const struct dxil_value *resource_range_index,
829 bool non_uniform_resource_index)
830 {
831 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE);
832 const struct dxil_value *resource_class_value = dxil_module_get_int8_const(&ctx->mod, resource_class);
833 const struct dxil_value *resource_range_id_value = dxil_module_get_int32_const(&ctx->mod, resource_range_id);
834 const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);
835 if (!opcode || !resource_class_value || !resource_range_id_value ||
836 !non_uniform_resource_index_value)
837 return NULL;
838
839 const struct dxil_value *args[] = {
840 opcode,
841 resource_class_value,
842 resource_range_id_value,
843 resource_range_index,
844 non_uniform_resource_index_value
845 };
846
847 const struct dxil_func *func =
848 dxil_get_function(&ctx->mod, "dx.op.createHandle", DXIL_NONE);
849
850 if (!func)
851 return NULL;
852
853 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
854 }
855
856 static const struct dxil_value *
emit_createhandle_call_const_index(struct ntd_context * ctx,enum dxil_resource_class resource_class,unsigned resource_range_id,unsigned resource_range_index,bool non_uniform_resource_index)857 emit_createhandle_call_const_index(struct ntd_context *ctx,
858 enum dxil_resource_class resource_class,
859 unsigned resource_range_id,
860 unsigned resource_range_index,
861 bool non_uniform_resource_index)
862 {
863
864 const struct dxil_value *resource_range_index_value = dxil_module_get_int32_const(&ctx->mod, resource_range_index);
865 if (!resource_range_index_value)
866 return NULL;
867
868 return emit_createhandle_call(ctx, resource_class, resource_range_id,
869 resource_range_index_value,
870 non_uniform_resource_index);
871 }
872
873 static void
add_resource(struct ntd_context * ctx,enum dxil_resource_type type,enum dxil_resource_kind kind,const resource_array_layout * layout)874 add_resource(struct ntd_context *ctx, enum dxil_resource_type type,
875 enum dxil_resource_kind kind,
876 const resource_array_layout *layout)
877 {
878 struct dxil_resource_v0 *resource_v0 = NULL;
879 struct dxil_resource_v1 *resource_v1 = NULL;
880 if (ctx->mod.minor_validator >= 6) {
881 resource_v1 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v1, 1);
882 resource_v0 = &resource_v1->v0;
883 } else {
884 resource_v0 = util_dynarray_grow(&ctx->resources, struct dxil_resource_v0, 1);
885 }
886 resource_v0->resource_type = type;
887 resource_v0->space = layout->space;
888 resource_v0->lower_bound = layout->binding;
889 if (layout->size == 0 || (uint64_t)layout->size + layout->binding >= UINT_MAX)
890 resource_v0->upper_bound = UINT_MAX;
891 else
892 resource_v0->upper_bound = layout->binding + layout->size - 1;
893 if (type == DXIL_RES_UAV_TYPED ||
894 type == DXIL_RES_UAV_RAW ||
895 type == DXIL_RES_UAV_STRUCTURED) {
896 uint32_t new_uav_count = ctx->num_uavs + layout->size;
897 if (layout->size == 0 || new_uav_count < ctx->num_uavs)
898 ctx->num_uavs = UINT_MAX;
899 else
900 ctx->num_uavs = new_uav_count;
901 if (ctx->mod.minor_validator >= 6 && ctx->num_uavs > 8)
902 ctx->mod.feats.use_64uavs = 1;
903 }
904
905 if (resource_v1) {
906 resource_v1->resource_kind = kind;
907 /* No flags supported yet */
908 resource_v1->resource_flags = 0;
909 }
910 }
911
912 static unsigned
get_resource_id(struct ntd_context * ctx,enum dxil_resource_class class,unsigned space,unsigned binding)913 get_resource_id(struct ntd_context *ctx, enum dxil_resource_class class,
914 unsigned space, unsigned binding)
915 {
916 unsigned offset = 0;
917 unsigned count = 0;
918
919 unsigned num_srvs = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
920 unsigned num_uavs = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
921 unsigned num_cbvs = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
922 unsigned num_samplers = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
923
924 switch (class) {
925 case DXIL_RESOURCE_CLASS_UAV:
926 offset = num_srvs + num_samplers + num_cbvs;
927 count = num_uavs;
928 break;
929 case DXIL_RESOURCE_CLASS_SRV:
930 offset = num_samplers + num_cbvs;
931 count = num_srvs;
932 break;
933 case DXIL_RESOURCE_CLASS_SAMPLER:
934 offset = num_cbvs;
935 count = num_samplers;
936 break;
937 case DXIL_RESOURCE_CLASS_CBV:
938 offset = 0;
939 count = num_cbvs;
940 break;
941 }
942
943 unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
944 sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
945 assert(offset + count <= ctx->resources.size / resource_element_size);
946 for (unsigned i = offset; i < offset + count; ++i) {
947 const struct dxil_resource_v0 *resource = (const struct dxil_resource_v0 *)((const char *)ctx->resources.data + resource_element_size * i);
948 if (resource->space == space &&
949 resource->lower_bound <= binding &&
950 resource->upper_bound >= binding) {
951 return i - offset;
952 }
953 }
954
955 unreachable("Resource access for undeclared range");
956 return 0;
957 }
958
959 static bool
emit_srv(struct ntd_context * ctx,nir_variable * var,unsigned count)960 emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned count)
961 {
962 unsigned id = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);
963 unsigned binding = var->data.binding;
964 resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
965
966 enum dxil_component_type comp_type;
967 enum dxil_resource_kind res_kind;
968 enum dxil_resource_type res_type;
969 if (var->data.mode == nir_var_mem_ssbo) {
970 comp_type = DXIL_COMP_TYPE_INVALID;
971 res_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;
972 res_type = DXIL_RES_SRV_RAW;
973 } else {
974 comp_type = dxil_get_comp_type(var->type);
975 res_kind = dxil_get_resource_kind(var->type);
976 res_type = DXIL_RES_SRV_TYPED;
977 }
978 const struct dxil_type *res_type_as_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, false /* readwrite */);
979
980 if (glsl_type_is_array(var->type))
981 res_type_as_type = dxil_module_get_array_type(&ctx->mod, res_type_as_type, count);
982
983 const struct dxil_mdnode *srv_meta = emit_srv_metadata(&ctx->mod, res_type_as_type, var->name,
984 &layout, comp_type, res_kind);
985
986 if (!srv_meta)
987 return false;
988
989 util_dynarray_append(&ctx->srv_metadata_nodes, const struct dxil_mdnode *, srv_meta);
990 add_resource(ctx, res_type, res_kind, &layout);
991 if (res_type == DXIL_RES_SRV_RAW)
992 ctx->mod.raw_and_structured_buffers = true;
993
994 return true;
995 }
996
997 static bool
emit_globals(struct ntd_context * ctx,unsigned size)998 emit_globals(struct ntd_context *ctx, unsigned size)
999 {
1000 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo)
1001 size++;
1002
1003 if (!size)
1004 return true;
1005
1006 const struct dxil_type *struct_type = dxil_module_get_res_type(&ctx->mod,
1007 DXIL_RESOURCE_KIND_RAW_BUFFER, DXIL_COMP_TYPE_INVALID, true /* readwrite */);
1008 if (!struct_type)
1009 return false;
1010
1011 const struct dxil_type *array_type =
1012 dxil_module_get_array_type(&ctx->mod, struct_type, size);
1013 if (!array_type)
1014 return false;
1015
1016 resource_array_layout layout = {0, 0, size, 0};
1017 const struct dxil_mdnode *uav_meta =
1018 emit_uav_metadata(&ctx->mod, array_type,
1019 "globals", &layout,
1020 DXIL_COMP_TYPE_INVALID,
1021 DXIL_RESOURCE_KIND_RAW_BUFFER);
1022 if (!uav_meta)
1023 return false;
1024
1025 util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
1026 if (ctx->mod.minor_validator < 6 &&
1027 util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
1028 ctx->mod.feats.use_64uavs = 1;
1029 /* Handles to UAVs used for kernel globals are created on-demand */
1030 add_resource(ctx, DXIL_RES_UAV_RAW, DXIL_RESOURCE_KIND_RAW_BUFFER, &layout);
1031 ctx->mod.raw_and_structured_buffers = true;
1032 return true;
1033 }
1034
1035 static bool
emit_uav(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned count,enum dxil_component_type comp_type,enum dxil_resource_kind res_kind,const char * name)1036 emit_uav(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned count,
1037 enum dxil_component_type comp_type, enum dxil_resource_kind res_kind, const char *name)
1038 {
1039 unsigned id = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);
1040 resource_array_layout layout = { id, binding, count, space };
1041
1042 const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, true /* readwrite */);
1043 res_type = dxil_module_get_array_type(&ctx->mod, res_type, count);
1044 const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, res_type, name,
1045 &layout, comp_type, res_kind);
1046
1047 if (!uav_meta)
1048 return false;
1049
1050 util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);
1051 if (ctx->mod.minor_validator < 6 &&
1052 util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)
1053 ctx->mod.feats.use_64uavs = 1;
1054
1055 add_resource(ctx, res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER ? DXIL_RES_UAV_RAW : DXIL_RES_UAV_TYPED, res_kind, &layout);
1056 if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
1057 ctx->mod.raw_and_structured_buffers = true;
1058 if (ctx->mod.shader_kind != DXIL_PIXEL_SHADER &&
1059 ctx->mod.shader_kind != DXIL_COMPUTE_SHADER)
1060 ctx->mod.feats.uavs_at_every_stage = true;
1061
1062 return true;
1063 }
1064
1065 static bool
emit_uav_var(struct ntd_context * ctx,nir_variable * var,unsigned count)1066 emit_uav_var(struct ntd_context *ctx, nir_variable *var, unsigned count)
1067 {
1068 unsigned binding, space;
1069 if (ctx->opts->environment == DXIL_ENVIRONMENT_GL) {
1070 /* For GL, the image intrinsics are already lowered, using driver_location
1071 * as the 0-based image index. Use space 1 so that we can keep using these
1072 * NIR constants without having to remap them, and so they don't overlap
1073 * SSBOs, which are also 0-based UAV bindings.
1074 */
1075 binding = var->data.driver_location;
1076 space = 1;
1077 } else {
1078 binding = var->data.binding;
1079 space = var->data.descriptor_set;
1080 }
1081 enum dxil_component_type comp_type = dxil_get_comp_type(var->type);
1082 enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type);
1083 const char *name = var->name;
1084
1085 return emit_uav(ctx, binding, space, count, comp_type, res_kind, name);
1086 }
1087
1088 static void
var_fill_const_array_with_vector_or_scalar(struct ntd_context * ctx,const struct nir_constant * c,const struct glsl_type * type,void * const_vals,unsigned int offset)1089 var_fill_const_array_with_vector_or_scalar(struct ntd_context *ctx,
1090 const struct nir_constant *c,
1091 const struct glsl_type *type,
1092 void *const_vals,
1093 unsigned int offset)
1094 {
1095 assert(glsl_type_is_vector_or_scalar(type));
1096 unsigned int components = glsl_get_vector_elements(type);
1097 unsigned bit_size = glsl_get_bit_size(type);
1098 unsigned int increment = bit_size / 8;
1099
1100 for (unsigned int comp = 0; comp < components; comp++) {
1101 uint8_t *dst = (uint8_t *)const_vals + offset;
1102
1103 switch (bit_size) {
1104 case 64:
1105 memcpy(dst, &c->values[comp].u64, sizeof(c->values[0].u64));
1106 break;
1107 case 32:
1108 memcpy(dst, &c->values[comp].u32, sizeof(c->values[0].u32));
1109 break;
1110 case 16:
1111 memcpy(dst, &c->values[comp].u16, sizeof(c->values[0].u16));
1112 break;
1113 case 8:
1114 assert(glsl_base_type_is_integer(glsl_get_base_type(type)));
1115 memcpy(dst, &c->values[comp].u8, sizeof(c->values[0].u8));
1116 break;
1117 default:
1118 unreachable("unexpeted bit-size");
1119 }
1120
1121 offset += increment;
1122 }
1123 }
1124
1125 static void
var_fill_const_array(struct ntd_context * ctx,const struct nir_constant * c,const struct glsl_type * type,void * const_vals,unsigned int offset)1126 var_fill_const_array(struct ntd_context *ctx, const struct nir_constant *c,
1127 const struct glsl_type *type, void *const_vals,
1128 unsigned int offset)
1129 {
1130 assert(!glsl_type_is_interface(type));
1131
1132 if (glsl_type_is_vector_or_scalar(type)) {
1133 var_fill_const_array_with_vector_or_scalar(ctx, c, type,
1134 const_vals,
1135 offset);
1136 } else if (glsl_type_is_array(type)) {
1137 assert(!glsl_type_is_unsized_array(type));
1138 const struct glsl_type *without = glsl_without_array(type);
1139 unsigned stride = glsl_get_explicit_stride(without);
1140
1141 for (unsigned elt = 0; elt < glsl_get_length(type); elt++) {
1142 var_fill_const_array(ctx, c->elements[elt], without,
1143 const_vals, offset + (elt * stride));
1144 offset += glsl_get_cl_size(without);
1145 }
1146 } else if (glsl_type_is_struct(type)) {
1147 for (unsigned int elt = 0; elt < glsl_get_length(type); elt++) {
1148 const struct glsl_type *elt_type = glsl_get_struct_field(type, elt);
1149 unsigned field_offset = glsl_get_struct_field_offset(type, elt);
1150
1151 var_fill_const_array(ctx, c->elements[elt],
1152 elt_type, const_vals,
1153 offset + field_offset);
1154 }
1155 } else
1156 unreachable("unknown GLSL type in var_fill_const_array");
1157 }
1158
1159 static bool
emit_global_consts(struct ntd_context * ctx)1160 emit_global_consts(struct ntd_context *ctx)
1161 {
1162 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_temp) {
1163 assert(var->constant_initializer);
1164
1165 unsigned int num_members = DIV_ROUND_UP(glsl_get_cl_size(var->type), 4);
1166 uint32_t *const_ints = ralloc_array(ctx->ralloc_ctx, uint32_t, num_members);
1167 var_fill_const_array(ctx, var->constant_initializer, var->type,
1168 const_ints, 0);
1169 const struct dxil_value **const_vals =
1170 ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, num_members);
1171 if (!const_vals)
1172 return false;
1173 for (int i = 0; i < num_members; i++)
1174 const_vals[i] = dxil_module_get_int32_const(&ctx->mod, const_ints[i]);
1175
1176 const struct dxil_type *elt_type = dxil_module_get_int_type(&ctx->mod, 32);
1177 if (!elt_type)
1178 return false;
1179 const struct dxil_type *type =
1180 dxil_module_get_array_type(&ctx->mod, elt_type, num_members);
1181 if (!type)
1182 return false;
1183 const struct dxil_value *agg_vals =
1184 dxil_module_get_array_const(&ctx->mod, type, const_vals);
1185 if (!agg_vals)
1186 return false;
1187
1188 const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name, type,
1189 DXIL_AS_DEFAULT, 4,
1190 agg_vals);
1191 if (!gvar)
1192 return false;
1193
1194 if (!_mesa_hash_table_insert(ctx->consts, var, (void *)gvar))
1195 return false;
1196 }
1197
1198 return true;
1199 }
1200
1201 static bool
emit_cbv(struct ntd_context * ctx,unsigned binding,unsigned space,unsigned size,unsigned count,char * name)1202 emit_cbv(struct ntd_context *ctx, unsigned binding, unsigned space,
1203 unsigned size, unsigned count, char *name)
1204 {
1205 assert(count != 0);
1206
1207 unsigned idx = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);
1208
1209 const struct dxil_type *float32 = dxil_module_get_float_type(&ctx->mod, 32);
1210 const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, float32, size);
1211 const struct dxil_type *buffer_type = dxil_module_get_struct_type(&ctx->mod, name,
1212 &array_type, 1);
1213 // All ubo[1]s should have been lowered to ubo with static indexing
1214 const struct dxil_type *final_type = count != 1 ? dxil_module_get_array_type(&ctx->mod, buffer_type, count) : buffer_type;
1215 resource_array_layout layout = {idx, binding, count, space};
1216 const struct dxil_mdnode *cbv_meta = emit_cbv_metadata(&ctx->mod, final_type,
1217 name, &layout, 4 * size);
1218
1219 if (!cbv_meta)
1220 return false;
1221
1222 util_dynarray_append(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *, cbv_meta);
1223 add_resource(ctx, DXIL_RES_CBV, DXIL_RESOURCE_KIND_CBUFFER, &layout);
1224
1225 return true;
1226 }
1227
1228 static bool
emit_ubo_var(struct ntd_context * ctx,nir_variable * var)1229 emit_ubo_var(struct ntd_context *ctx, nir_variable *var)
1230 {
1231 unsigned count = 1;
1232 if (glsl_type_is_array(var->type))
1233 count = glsl_get_length(var->type);
1234
1235 char *name = var->name;
1236 char temp_name[30];
1237 if (name && strlen(name) == 0) {
1238 snprintf(temp_name, sizeof(temp_name), "__unnamed_ubo_%d",
1239 ctx->unnamed_ubo_count++);
1240 name = temp_name;
1241 }
1242
1243 const struct glsl_type *type = glsl_without_array(var->type);
1244 assert(glsl_type_is_struct(type) || glsl_type_is_interface(type));
1245 unsigned dwords = ALIGN_POT(glsl_get_explicit_size(type, false), 16) / 4;
1246
1247 return emit_cbv(ctx, var->data.binding, var->data.descriptor_set,
1248 dwords, count, name);
1249 }
1250
1251 static bool
emit_sampler(struct ntd_context * ctx,nir_variable * var,unsigned count)1252 emit_sampler(struct ntd_context *ctx, nir_variable *var, unsigned count)
1253 {
1254 unsigned id = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);
1255 unsigned binding = var->data.binding;
1256 resource_array_layout layout = {id, binding, count, var->data.descriptor_set};
1257 const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
1258 const struct dxil_type *sampler_type = dxil_module_get_struct_type(&ctx->mod, "struct.SamplerState", &int32_type, 1);
1259
1260 if (glsl_type_is_array(var->type))
1261 sampler_type = dxil_module_get_array_type(&ctx->mod, sampler_type, count);
1262
1263 const struct dxil_mdnode *sampler_meta = emit_sampler_metadata(&ctx->mod, sampler_type, var, &layout);
1264
1265 if (!sampler_meta)
1266 return false;
1267
1268 util_dynarray_append(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *, sampler_meta);
1269 add_resource(ctx, DXIL_RES_SAMPLER, DXIL_RESOURCE_KIND_SAMPLER, &layout);
1270
1271 return true;
1272 }
1273
1274 static bool
emit_static_indexing_handles(struct ntd_context * ctx)1275 emit_static_indexing_handles(struct ntd_context *ctx)
1276 {
1277 /* Vulkan always uses dynamic handles, from instructions in the NIR */
1278 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN)
1279 return true;
1280
1281 unsigned last_res_class = -1;
1282 unsigned id = 0;
1283
1284 unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
1285 sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
1286 for (struct dxil_resource_v0 *res = (struct dxil_resource_v0 *)ctx->resources.data;
1287 res < (struct dxil_resource_v0 *)((char *)ctx->resources.data + ctx->resources.size);
1288 res = (struct dxil_resource_v0 *)((char *)res + resource_element_size)) {
1289 enum dxil_resource_class res_class;
1290 const struct dxil_value **handle_array;
1291 switch (res->resource_type) {
1292 case DXIL_RES_SRV_TYPED:
1293 case DXIL_RES_SRV_RAW:
1294 case DXIL_RES_SRV_STRUCTURED:
1295 res_class = DXIL_RESOURCE_CLASS_SRV;
1296 handle_array = ctx->srv_handles;
1297 break;
1298 case DXIL_RES_CBV:
1299 res_class = DXIL_RESOURCE_CLASS_CBV;
1300 handle_array = ctx->cbv_handles;
1301 break;
1302 case DXIL_RES_SAMPLER:
1303 res_class = DXIL_RESOURCE_CLASS_SAMPLER;
1304 handle_array = ctx->sampler_handles;
1305 break;
1306 case DXIL_RES_UAV_RAW:
1307 res_class = DXIL_RESOURCE_CLASS_UAV;
1308 handle_array = ctx->ssbo_handles;
1309 break;
1310 case DXIL_RES_UAV_TYPED:
1311 case DXIL_RES_UAV_STRUCTURED:
1312 case DXIL_RES_UAV_STRUCTURED_WITH_COUNTER:
1313 res_class = DXIL_RESOURCE_CLASS_UAV;
1314 handle_array = ctx->image_handles;
1315 break;
1316 default:
1317 unreachable("Unexpected resource type");
1318 }
1319
1320 if (last_res_class != res_class)
1321 id = 0;
1322 else
1323 id++;
1324 last_res_class = res_class;
1325
1326 if (res->space > 1)
1327 continue;
1328 assert(res->space == 0 ||
1329 (res->space == 1 &&
1330 res->resource_type != DXIL_RES_UAV_RAW &&
1331 ctx->opts->environment == DXIL_ENVIRONMENT_GL));
1332
1333 /* CL uses dynamic handles for the "globals" UAV array, but uses static
1334 * handles for UBOs, textures, and samplers.
1335 */
1336 if (ctx->opts->environment == DXIL_ENVIRONMENT_CL &&
1337 res->resource_type == DXIL_RES_UAV_RAW)
1338 continue;
1339
1340 for (unsigned i = res->lower_bound; i <= res->upper_bound; ++i) {
1341 handle_array[i] = emit_createhandle_call_const_index(ctx, res_class, id, i, false);
1342 if (!handle_array[i])
1343 return false;
1344 }
1345 }
1346 return true;
1347 }
1348
1349 static const struct dxil_mdnode *
emit_gs_state(struct ntd_context * ctx)1350 emit_gs_state(struct ntd_context *ctx)
1351 {
1352 const struct dxil_mdnode *gs_state_nodes[5];
1353 const nir_shader *s = ctx->shader;
1354
1355 gs_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, dxil_get_input_primitive(s->info.gs.input_primitive));
1356 gs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.vertices_out);
1357 gs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.gs.active_stream_mask, 1));
1358 gs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, dxil_get_primitive_topology(s->info.gs.output_primitive));
1359 gs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.invocations);
1360
1361 for (unsigned i = 0; i < ARRAY_SIZE(gs_state_nodes); ++i) {
1362 if (!gs_state_nodes[i])
1363 return NULL;
1364 }
1365
1366 return dxil_get_metadata_node(&ctx->mod, gs_state_nodes, ARRAY_SIZE(gs_state_nodes));
1367 }
1368
1369 static enum dxil_tessellator_domain
get_tessellator_domain(enum tess_primitive_mode primitive_mode)1370 get_tessellator_domain(enum tess_primitive_mode primitive_mode)
1371 {
1372 switch (primitive_mode) {
1373 case TESS_PRIMITIVE_QUADS: return DXIL_TESSELLATOR_DOMAIN_QUAD;
1374 case TESS_PRIMITIVE_TRIANGLES: return DXIL_TESSELLATOR_DOMAIN_TRI;
1375 case TESS_PRIMITIVE_ISOLINES: return DXIL_TESSELLATOR_DOMAIN_ISOLINE;
1376 default:
1377 unreachable("Invalid tessellator primitive mode");
1378 }
1379 }
1380
1381 static enum dxil_tessellator_partitioning
get_tessellator_partitioning(enum gl_tess_spacing spacing)1382 get_tessellator_partitioning(enum gl_tess_spacing spacing)
1383 {
1384 switch (spacing) {
1385 default:
1386 case TESS_SPACING_EQUAL:
1387 return DXIL_TESSELLATOR_PARTITIONING_INTEGER;
1388 case TESS_SPACING_FRACTIONAL_EVEN:
1389 return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
1390 case TESS_SPACING_FRACTIONAL_ODD:
1391 return DXIL_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
1392 }
1393 }
1394
1395 static enum dxil_tessellator_output_primitive
get_tessellator_output_primitive(const struct shader_info * info)1396 get_tessellator_output_primitive(const struct shader_info *info)
1397 {
1398 if (info->tess.point_mode)
1399 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_POINT;
1400 if (info->tess._primitive_mode == TESS_PRIMITIVE_ISOLINES)
1401 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_LINE;
1402 /* Note: GL tessellation domain is inverted from D3D, which means triangle
1403 * winding needs to be inverted.
1404 */
1405 if (info->tess.ccw)
1406 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CW;
1407 return DXIL_TESSELLATOR_OUTPUT_PRIMITIVE_TRIANGLE_CCW;
1408 }
1409
1410 static const struct dxil_mdnode *
emit_hs_state(struct ntd_context * ctx)1411 emit_hs_state(struct ntd_context *ctx)
1412 {
1413 const struct dxil_mdnode *hs_state_nodes[7];
1414
1415 hs_state_nodes[0] = dxil_get_metadata_func(&ctx->mod, ctx->tess_ctrl_patch_constant_func_def->func);
1416 hs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->tess_input_control_point_count);
1417 hs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1418 hs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1419 hs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_partitioning(ctx->shader->info.tess.spacing));
1420 hs_state_nodes[5] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_output_primitive(&ctx->shader->info));
1421 hs_state_nodes[6] = dxil_get_metadata_float32(&ctx->mod, 64.0f);
1422
1423 return dxil_get_metadata_node(&ctx->mod, hs_state_nodes, ARRAY_SIZE(hs_state_nodes));
1424 }
1425
1426 static const struct dxil_mdnode *
emit_ds_state(struct ntd_context * ctx)1427 emit_ds_state(struct ntd_context *ctx)
1428 {
1429 const struct dxil_mdnode *ds_state_nodes[2];
1430
1431 ds_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, get_tessellator_domain(ctx->shader->info.tess._primitive_mode));
1432 ds_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, ctx->shader->info.tess.tcs_vertices_out);
1433
1434 return dxil_get_metadata_node(&ctx->mod, ds_state_nodes, ARRAY_SIZE(ds_state_nodes));
1435 }
1436
1437 static const struct dxil_mdnode *
emit_threads(struct ntd_context * ctx)1438 emit_threads(struct ntd_context *ctx)
1439 {
1440 const nir_shader *s = ctx->shader;
1441 const struct dxil_mdnode *threads_x = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[0], 1));
1442 const struct dxil_mdnode *threads_y = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[1], 1));
1443 const struct dxil_mdnode *threads_z = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[2], 1));
1444 if (!threads_x || !threads_y || !threads_z)
1445 return false;
1446
1447 const struct dxil_mdnode *threads_nodes[] = { threads_x, threads_y, threads_z };
1448 return dxil_get_metadata_node(&ctx->mod, threads_nodes, ARRAY_SIZE(threads_nodes));
1449 }
1450
1451 static int64_t
get_module_flags(struct ntd_context * ctx)1452 get_module_flags(struct ntd_context *ctx)
1453 {
1454 /* See the DXIL documentation for the definition of these flags:
1455 *
1456 * https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-flags
1457 */
1458
1459 uint64_t flags = 0;
1460 if (ctx->mod.feats.doubles)
1461 flags |= (1 << 2);
1462 if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
1463 ctx->shader->info.fs.early_fragment_tests)
1464 flags |= (1 << 3);
1465 if (ctx->mod.raw_and_structured_buffers)
1466 flags |= (1 << 4);
1467 if (ctx->mod.feats.min_precision)
1468 flags |= (1 << 5);
1469 if (ctx->mod.feats.dx11_1_double_extensions)
1470 flags |= (1 << 6);
1471 if (ctx->mod.feats.array_layer_from_vs_or_ds)
1472 flags |= (1 << 9);
1473 if (ctx->mod.feats.inner_coverage)
1474 flags |= (1 << 10);
1475 if (ctx->mod.feats.typed_uav_load_additional_formats)
1476 flags |= (1 << 13);
1477 if (ctx->mod.feats.use_64uavs)
1478 flags |= (1 << 15);
1479 if (ctx->mod.feats.uavs_at_every_stage)
1480 flags |= (1 << 16);
1481 if (ctx->mod.feats.cs_4x_raw_sb)
1482 flags |= (1 << 17);
1483 if (ctx->mod.feats.wave_ops)
1484 flags |= (1 << 19);
1485 if (ctx->mod.feats.int64_ops)
1486 flags |= (1 << 20);
1487 if (ctx->mod.feats.barycentrics)
1488 flags |= (1 << 22);
1489 if (ctx->mod.feats.stencil_ref)
1490 flags |= (1 << 11);
1491 if (ctx->mod.feats.native_low_precision)
1492 flags |= (1 << 23) | (1 << 5);
1493
1494 if (ctx->opts->disable_math_refactoring)
1495 flags |= (1 << 1);
1496
1497 return flags;
1498 }
1499
1500 static const struct dxil_mdnode *
emit_entrypoint(struct ntd_context * ctx,const struct dxil_func * func,const char * name,const struct dxil_mdnode * signatures,const struct dxil_mdnode * resources,const struct dxil_mdnode * shader_props)1501 emit_entrypoint(struct ntd_context *ctx,
1502 const struct dxil_func *func, const char *name,
1503 const struct dxil_mdnode *signatures,
1504 const struct dxil_mdnode *resources,
1505 const struct dxil_mdnode *shader_props)
1506 {
1507 char truncated_name[254] = { 0 };
1508 strncpy(truncated_name, name, ARRAY_SIZE(truncated_name) - 1);
1509
1510 const struct dxil_mdnode *func_md = dxil_get_metadata_func(&ctx->mod, func);
1511 const struct dxil_mdnode *name_md = dxil_get_metadata_string(&ctx->mod, truncated_name);
1512 const struct dxil_mdnode *nodes[] = {
1513 func_md,
1514 name_md,
1515 signatures,
1516 resources,
1517 shader_props
1518 };
1519 return dxil_get_metadata_node(&ctx->mod, nodes,
1520 ARRAY_SIZE(nodes));
1521 }
1522
1523 static const struct dxil_mdnode *
emit_resources(struct ntd_context * ctx)1524 emit_resources(struct ntd_context *ctx)
1525 {
1526 bool emit_resources = false;
1527 const struct dxil_mdnode *resources_nodes[] = {
1528 NULL, NULL, NULL, NULL
1529 };
1530
1531 #define ARRAY_AND_SIZE(arr) arr.data, util_dynarray_num_elements(&arr, const struct dxil_mdnode *)
1532
1533 if (ctx->srv_metadata_nodes.size) {
1534 resources_nodes[0] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->srv_metadata_nodes));
1535 emit_resources = true;
1536 }
1537
1538 if (ctx->uav_metadata_nodes.size) {
1539 resources_nodes[1] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->uav_metadata_nodes));
1540 emit_resources = true;
1541 }
1542
1543 if (ctx->cbv_metadata_nodes.size) {
1544 resources_nodes[2] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->cbv_metadata_nodes));
1545 emit_resources = true;
1546 }
1547
1548 if (ctx->sampler_metadata_nodes.size) {
1549 resources_nodes[3] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->sampler_metadata_nodes));
1550 emit_resources = true;
1551 }
1552
1553 #undef ARRAY_AND_SIZE
1554
1555 return emit_resources ?
1556 dxil_get_metadata_node(&ctx->mod, resources_nodes, ARRAY_SIZE(resources_nodes)): NULL;
1557 }
1558
1559 static boolean
emit_tag(struct ntd_context * ctx,enum dxil_shader_tag tag,const struct dxil_mdnode * value_node)1560 emit_tag(struct ntd_context *ctx, enum dxil_shader_tag tag,
1561 const struct dxil_mdnode *value_node)
1562 {
1563 const struct dxil_mdnode *tag_node = dxil_get_metadata_int32(&ctx->mod, tag);
1564 if (!tag_node || !value_node)
1565 return false;
1566 assert(ctx->num_shader_property_nodes <= ARRAY_SIZE(ctx->shader_property_nodes) - 2);
1567 ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = tag_node;
1568 ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = value_node;
1569
1570 return true;
1571 }
1572
1573 static bool
emit_metadata(struct ntd_context * ctx)1574 emit_metadata(struct ntd_context *ctx)
1575 {
1576 /* DXIL versions are 1.x for shader model 6.x */
1577 assert(ctx->mod.major_version == 6);
1578 unsigned dxilMajor = 1;
1579 unsigned dxilMinor = ctx->mod.minor_version;
1580 unsigned valMajor = ctx->mod.major_validator;
1581 unsigned valMinor = ctx->mod.minor_validator;
1582 if (!emit_llvm_ident(&ctx->mod) ||
1583 !emit_named_version(&ctx->mod, "dx.version", dxilMajor, dxilMinor) ||
1584 !emit_named_version(&ctx->mod, "dx.valver", valMajor, valMinor) ||
1585 !emit_dx_shader_model(&ctx->mod))
1586 return false;
1587
1588 const struct dxil_func_def *main_func_def = ctx->main_func_def;
1589 if (!main_func_def)
1590 return false;
1591 const struct dxil_func *main_func = main_func_def->func;
1592
1593 const struct dxil_mdnode *resources_node = emit_resources(ctx);
1594
1595 const struct dxil_mdnode *main_entrypoint = dxil_get_metadata_func(&ctx->mod, main_func);
1596 const struct dxil_mdnode *node27 = dxil_get_metadata_node(&ctx->mod, NULL, 0);
1597
1598 const struct dxil_mdnode *node4 = dxil_get_metadata_int32(&ctx->mod, 0);
1599 const struct dxil_mdnode *nodes_4_27_27[] = {
1600 node4, node27, node27
1601 };
1602 const struct dxil_mdnode *node28 = dxil_get_metadata_node(&ctx->mod, nodes_4_27_27,
1603 ARRAY_SIZE(nodes_4_27_27));
1604
1605 const struct dxil_mdnode *node29 = dxil_get_metadata_node(&ctx->mod, &node28, 1);
1606
1607 const struct dxil_mdnode *node3 = dxil_get_metadata_int32(&ctx->mod, 1);
1608 const struct dxil_mdnode *main_type_annotation_nodes[] = {
1609 node3, main_entrypoint, node29
1610 };
1611 const struct dxil_mdnode *main_type_annotation = dxil_get_metadata_node(&ctx->mod, main_type_annotation_nodes,
1612 ARRAY_SIZE(main_type_annotation_nodes));
1613
1614 if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {
1615 if (!emit_tag(ctx, DXIL_SHADER_TAG_GS_STATE, emit_gs_state(ctx)))
1616 return false;
1617 } else if (ctx->mod.shader_kind == DXIL_HULL_SHADER) {
1618 ctx->tess_input_control_point_count = 32;
1619 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
1620 if (nir_is_arrayed_io(var, MESA_SHADER_TESS_CTRL)) {
1621 ctx->tess_input_control_point_count = glsl_array_size(var->type);
1622 break;
1623 }
1624 }
1625
1626 if (!emit_tag(ctx, DXIL_SHADER_TAG_HS_STATE, emit_hs_state(ctx)))
1627 return false;
1628 } else if (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
1629 if (!emit_tag(ctx, DXIL_SHADER_TAG_DS_STATE, emit_ds_state(ctx)))
1630 return false;
1631 } else if (ctx->mod.shader_kind == DXIL_COMPUTE_SHADER) {
1632 if (!emit_tag(ctx, DXIL_SHADER_TAG_NUM_THREADS, emit_threads(ctx)))
1633 return false;
1634 }
1635
1636 uint64_t flags = get_module_flags(ctx);
1637 if (flags != 0) {
1638 if (!emit_tag(ctx, DXIL_SHADER_TAG_FLAGS, dxil_get_metadata_int64(&ctx->mod, flags)))
1639 return false;
1640 }
1641 const struct dxil_mdnode *shader_properties = NULL;
1642 if (ctx->num_shader_property_nodes > 0) {
1643 shader_properties = dxil_get_metadata_node(&ctx->mod, ctx->shader_property_nodes,
1644 ctx->num_shader_property_nodes);
1645 if (!shader_properties)
1646 return false;
1647 }
1648
1649 nir_function_impl *entry_func_impl = nir_shader_get_entrypoint(ctx->shader);
1650 const struct dxil_mdnode *dx_entry_point = emit_entrypoint(ctx, main_func,
1651 entry_func_impl->function->name, get_signatures(&ctx->mod), resources_node, shader_properties);
1652 if (!dx_entry_point)
1653 return false;
1654
1655 if (resources_node) {
1656 const struct dxil_mdnode *dx_resources = resources_node;
1657 dxil_add_metadata_named_node(&ctx->mod, "dx.resources",
1658 &dx_resources, 1);
1659 }
1660
1661 const struct dxil_mdnode *dx_type_annotations[] = { main_type_annotation };
1662 return dxil_add_metadata_named_node(&ctx->mod, "dx.typeAnnotations",
1663 dx_type_annotations,
1664 ARRAY_SIZE(dx_type_annotations)) &&
1665 dxil_add_metadata_named_node(&ctx->mod, "dx.entryPoints",
1666 &dx_entry_point, 1);
1667 }
1668
1669 static const struct dxil_value *
bitcast_to_int(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)1670 bitcast_to_int(struct ntd_context *ctx, unsigned bit_size,
1671 const struct dxil_value *value)
1672 {
1673 const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size);
1674 if (!type)
1675 return NULL;
1676
1677 return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
1678 }
1679
1680 static const struct dxil_value *
bitcast_to_float(struct ntd_context * ctx,unsigned bit_size,const struct dxil_value * value)1681 bitcast_to_float(struct ntd_context *ctx, unsigned bit_size,
1682 const struct dxil_value *value)
1683 {
1684 const struct dxil_type *type = dxil_module_get_float_type(&ctx->mod, bit_size);
1685 if (!type)
1686 return NULL;
1687
1688 return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);
1689 }
1690
1691 static void
store_ssa_def(struct ntd_context * ctx,nir_ssa_def * ssa,unsigned chan,const struct dxil_value * value)1692 store_ssa_def(struct ntd_context *ctx, nir_ssa_def *ssa, unsigned chan,
1693 const struct dxil_value *value)
1694 {
1695 assert(ssa->index < ctx->num_defs);
1696 assert(chan < ssa->num_components);
1697 /* We pre-defined the dest value because of a phi node, so bitcast while storing if the
1698 * base type differs */
1699 if (ctx->defs[ssa->index].chans[chan]) {
1700 const struct dxil_type *expect_type = dxil_value_get_type(ctx->defs[ssa->index].chans[chan]);
1701 const struct dxil_type *value_type = dxil_value_get_type(value);
1702 if (dxil_type_to_nir_type(expect_type) != dxil_type_to_nir_type(value_type))
1703 value = dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, expect_type, value);
1704 }
1705 ctx->defs[ssa->index].chans[chan] = value;
1706 }
1707
1708 static void
store_dest_value(struct ntd_context * ctx,nir_dest * dest,unsigned chan,const struct dxil_value * value)1709 store_dest_value(struct ntd_context *ctx, nir_dest *dest, unsigned chan,
1710 const struct dxil_value *value)
1711 {
1712 assert(dest->is_ssa);
1713 assert(value);
1714 store_ssa_def(ctx, &dest->ssa, chan, value);
1715 }
1716
1717 static void
store_dest(struct ntd_context * ctx,nir_dest * dest,unsigned chan,const struct dxil_value * value,nir_alu_type type)1718 store_dest(struct ntd_context *ctx, nir_dest *dest, unsigned chan,
1719 const struct dxil_value *value, nir_alu_type type)
1720 {
1721 switch (nir_alu_type_get_base_type(type)) {
1722 case nir_type_float:
1723 if (nir_dest_bit_size(*dest) == 64)
1724 ctx->mod.feats.doubles = true;
1725 store_dest_value(ctx, dest, chan, value);
1726 break;
1727 case nir_type_uint:
1728 case nir_type_int:
1729 if (nir_dest_bit_size(*dest) == 16)
1730 ctx->mod.feats.native_low_precision = true;
1731 if (nir_dest_bit_size(*dest) == 64)
1732 ctx->mod.feats.int64_ops = true;
1733 FALLTHROUGH;
1734 case nir_type_bool:
1735 store_dest_value(ctx, dest, chan, value);
1736 break;
1737 default:
1738 unreachable("unexpected nir_alu_type");
1739 }
1740 }
1741
1742 static void
store_alu_dest(struct ntd_context * ctx,nir_alu_instr * alu,unsigned chan,const struct dxil_value * value)1743 store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan,
1744 const struct dxil_value *value)
1745 {
1746 assert(!alu->dest.saturate);
1747 store_dest(ctx, &alu->dest.dest, chan, value,
1748 nir_op_infos[alu->op].output_type);
1749 }
1750
1751 static const struct dxil_value *
get_src_ssa(struct ntd_context * ctx,const nir_ssa_def * ssa,unsigned chan)1752 get_src_ssa(struct ntd_context *ctx, const nir_ssa_def *ssa, unsigned chan)
1753 {
1754 assert(ssa->index < ctx->num_defs);
1755 assert(chan < ssa->num_components);
1756 assert(ctx->defs[ssa->index].chans[chan]);
1757 return ctx->defs[ssa->index].chans[chan];
1758 }
1759
1760 static const struct dxil_value *
get_src(struct ntd_context * ctx,nir_src * src,unsigned chan,nir_alu_type type)1761 get_src(struct ntd_context *ctx, nir_src *src, unsigned chan,
1762 nir_alu_type type)
1763 {
1764 assert(src->is_ssa);
1765 const struct dxil_value *value = get_src_ssa(ctx, src->ssa, chan);
1766
1767 const int bit_size = nir_src_bit_size(*src);
1768
1769 switch (nir_alu_type_get_base_type(type)) {
1770 case nir_type_int:
1771 case nir_type_uint: {
1772 assert(bit_size != 64 || ctx->mod.feats.int64_ops);
1773 const struct dxil_type *expect_type = dxil_module_get_int_type(&ctx->mod, bit_size);
1774 /* nohing to do */
1775 if (dxil_value_type_equal_to(value, expect_type))
1776 return value;
1777 assert(dxil_value_type_bitsize_equal_to(value, bit_size));
1778 return bitcast_to_int(ctx, bit_size, value);
1779 }
1780
1781 case nir_type_float:
1782 assert(nir_src_bit_size(*src) >= 16);
1783 assert(nir_src_bit_size(*src) != 64 || ctx->mod.feats.doubles);
1784 if (dxil_value_type_equal_to(value, dxil_module_get_float_type(&ctx->mod, bit_size)))
1785 return value;
1786 assert(dxil_value_type_bitsize_equal_to(value, bit_size));
1787 return bitcast_to_float(ctx, bit_size, value);
1788
1789 case nir_type_bool:
1790 if (!dxil_value_type_bitsize_equal_to(value, 1)) {
1791 return dxil_emit_cast(&ctx->mod, DXIL_CAST_TRUNC,
1792 dxil_module_get_int_type(&ctx->mod, 1), value);
1793 }
1794 return value;
1795
1796 default:
1797 unreachable("unexpected nir_alu_type");
1798 }
1799 }
1800
1801 static const struct dxil_type *
get_alu_src_type(struct ntd_context * ctx,nir_alu_instr * alu,unsigned src)1802 get_alu_src_type(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)
1803 {
1804 assert(!alu->src[src].abs);
1805 assert(!alu->src[src].negate);
1806 nir_ssa_def *ssa_src = alu->src[src].src.ssa;
1807 unsigned chan = alu->src[src].swizzle[0];
1808 const struct dxil_value *value = get_src_ssa(ctx, ssa_src, chan);
1809 return dxil_value_get_type(value);
1810 }
1811
1812 static const struct dxil_value *
get_alu_src(struct ntd_context * ctx,nir_alu_instr * alu,unsigned src)1813 get_alu_src(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)
1814 {
1815 assert(!alu->src[src].abs);
1816 assert(!alu->src[src].negate);
1817
1818 unsigned chan = alu->src[src].swizzle[0];
1819 return get_src(ctx, &alu->src[src].src, chan,
1820 nir_op_infos[alu->op].input_types[src]);
1821 }
1822
1823 static bool
emit_binop(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)1824 emit_binop(struct ntd_context *ctx, nir_alu_instr *alu,
1825 enum dxil_bin_opcode opcode,
1826 const struct dxil_value *op0, const struct dxil_value *op1)
1827 {
1828 bool is_float_op = nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float;
1829
1830 enum dxil_opt_flags flags = 0;
1831 if (is_float_op && !alu->exact)
1832 flags |= DXIL_UNSAFE_ALGEBRA;
1833
1834 const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, flags);
1835 if (!v)
1836 return false;
1837 store_alu_dest(ctx, alu, 0, v);
1838 return true;
1839 }
1840
1841 static bool
emit_shift(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_bin_opcode opcode,const struct dxil_value * op0,const struct dxil_value * op1)1842 emit_shift(struct ntd_context *ctx, nir_alu_instr *alu,
1843 enum dxil_bin_opcode opcode,
1844 const struct dxil_value *op0, const struct dxil_value *op1)
1845 {
1846 unsigned op0_bit_size = nir_src_bit_size(alu->src[0].src);
1847 unsigned op1_bit_size = nir_src_bit_size(alu->src[1].src);
1848 if (op0_bit_size != op1_bit_size) {
1849 const struct dxil_type *type =
1850 dxil_module_get_int_type(&ctx->mod, op0_bit_size);
1851 enum dxil_cast_opcode cast_op =
1852 op1_bit_size < op0_bit_size ? DXIL_CAST_ZEXT : DXIL_CAST_TRUNC;
1853 op1 = dxil_emit_cast(&ctx->mod, cast_op, type, op1);
1854 }
1855
1856 const struct dxil_value *v =
1857 dxil_emit_binop(&ctx->mod, opcode, op0, op1, 0);
1858 if (!v)
1859 return false;
1860 store_alu_dest(ctx, alu, 0, v);
1861 return true;
1862 }
1863
1864 static bool
emit_cmp(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_cmp_pred pred,const struct dxil_value * op0,const struct dxil_value * op1)1865 emit_cmp(struct ntd_context *ctx, nir_alu_instr *alu,
1866 enum dxil_cmp_pred pred,
1867 const struct dxil_value *op0, const struct dxil_value *op1)
1868 {
1869 const struct dxil_value *v = dxil_emit_cmp(&ctx->mod, pred, op0, op1);
1870 if (!v)
1871 return false;
1872 store_alu_dest(ctx, alu, 0, v);
1873 return true;
1874 }
1875
1876 static enum dxil_cast_opcode
get_cast_op(nir_alu_instr * alu)1877 get_cast_op(nir_alu_instr *alu)
1878 {
1879 unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1880 unsigned src_bits = nir_src_bit_size(alu->src[0].src);
1881
1882 switch (alu->op) {
1883 /* bool -> int */
1884 case nir_op_b2i16:
1885 case nir_op_b2i32:
1886 case nir_op_b2i64:
1887 return DXIL_CAST_ZEXT;
1888
1889 /* float -> float */
1890 case nir_op_f2f16_rtz:
1891 case nir_op_f2f32:
1892 case nir_op_f2f64:
1893 assert(dst_bits != src_bits);
1894 if (dst_bits < src_bits)
1895 return DXIL_CAST_FPTRUNC;
1896 else
1897 return DXIL_CAST_FPEXT;
1898
1899 /* int -> int */
1900 case nir_op_i2i16:
1901 case nir_op_i2i32:
1902 case nir_op_i2i64:
1903 assert(dst_bits != src_bits);
1904 if (dst_bits < src_bits)
1905 return DXIL_CAST_TRUNC;
1906 else
1907 return DXIL_CAST_SEXT;
1908
1909 /* uint -> uint */
1910 case nir_op_u2u16:
1911 case nir_op_u2u32:
1912 case nir_op_u2u64:
1913 assert(dst_bits != src_bits);
1914 if (dst_bits < src_bits)
1915 return DXIL_CAST_TRUNC;
1916 else
1917 return DXIL_CAST_ZEXT;
1918
1919 /* float -> int */
1920 case nir_op_f2i16:
1921 case nir_op_f2i32:
1922 case nir_op_f2i64:
1923 return DXIL_CAST_FPTOSI;
1924
1925 /* float -> uint */
1926 case nir_op_f2u16:
1927 case nir_op_f2u32:
1928 case nir_op_f2u64:
1929 return DXIL_CAST_FPTOUI;
1930
1931 /* int -> float */
1932 case nir_op_i2f16:
1933 case nir_op_i2f32:
1934 case nir_op_i2f64:
1935 return DXIL_CAST_SITOFP;
1936
1937 /* uint -> float */
1938 case nir_op_u2f16:
1939 case nir_op_u2f32:
1940 case nir_op_u2f64:
1941 return DXIL_CAST_UITOFP;
1942
1943 default:
1944 unreachable("unexpected cast op");
1945 }
1946 }
1947
1948 static const struct dxil_type *
get_cast_dest_type(struct ntd_context * ctx,nir_alu_instr * alu)1949 get_cast_dest_type(struct ntd_context *ctx, nir_alu_instr *alu)
1950 {
1951 unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
1952 switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type)) {
1953 case nir_type_bool:
1954 assert(dst_bits == 1);
1955 FALLTHROUGH;
1956 case nir_type_int:
1957 case nir_type_uint:
1958 return dxil_module_get_int_type(&ctx->mod, dst_bits);
1959
1960 case nir_type_float:
1961 return dxil_module_get_float_type(&ctx->mod, dst_bits);
1962
1963 default:
1964 unreachable("unknown nir_alu_type");
1965 }
1966 }
1967
1968 static bool
is_double(nir_alu_type alu_type,unsigned bit_size)1969 is_double(nir_alu_type alu_type, unsigned bit_size)
1970 {
1971 return nir_alu_type_get_base_type(alu_type) == nir_type_float &&
1972 bit_size == 64;
1973 }
1974
1975 static bool
emit_cast(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * value)1976 emit_cast(struct ntd_context *ctx, nir_alu_instr *alu,
1977 const struct dxil_value *value)
1978 {
1979 enum dxil_cast_opcode opcode = get_cast_op(alu);
1980 const struct dxil_type *type = get_cast_dest_type(ctx, alu);
1981 if (!type)
1982 return false;
1983
1984 const nir_op_info *info = &nir_op_infos[alu->op];
1985 switch (opcode) {
1986 case DXIL_CAST_UITOFP:
1987 case DXIL_CAST_SITOFP:
1988 if (is_double(info->output_type, nir_dest_bit_size(alu->dest.dest)))
1989 ctx->mod.feats.dx11_1_double_extensions = true;
1990 break;
1991 case DXIL_CAST_FPTOUI:
1992 case DXIL_CAST_FPTOSI:
1993 if (is_double(info->input_types[0], nir_src_bit_size(alu->src[0].src)))
1994 ctx->mod.feats.dx11_1_double_extensions = true;
1995 break;
1996 default:
1997 break;
1998 }
1999
2000 const struct dxil_value *v = dxil_emit_cast(&ctx->mod, opcode, type,
2001 value);
2002 if (!v)
2003 return false;
2004 store_alu_dest(ctx, alu, 0, v);
2005 return true;
2006 }
2007
2008 static enum overload_type
get_overload(nir_alu_type alu_type,unsigned bit_size)2009 get_overload(nir_alu_type alu_type, unsigned bit_size)
2010 {
2011 switch (nir_alu_type_get_base_type(alu_type)) {
2012 case nir_type_int:
2013 case nir_type_uint:
2014 switch (bit_size) {
2015 case 16: return DXIL_I16;
2016 case 32: return DXIL_I32;
2017 case 64: return DXIL_I64;
2018 default:
2019 unreachable("unexpected bit_size");
2020 }
2021 case nir_type_float:
2022 switch (bit_size) {
2023 case 16: return DXIL_F16;
2024 case 32: return DXIL_F32;
2025 case 64: return DXIL_F64;
2026 default:
2027 unreachable("unexpected bit_size");
2028 }
2029 default:
2030 unreachable("unexpected output type");
2031 }
2032 }
2033
2034 static bool
emit_unary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op)2035 emit_unary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2036 enum dxil_intr intr, const struct dxil_value *op)
2037 {
2038 const nir_op_info *info = &nir_op_infos[alu->op];
2039 unsigned src_bits = nir_src_bit_size(alu->src[0].src);
2040 enum overload_type overload = get_overload(info->input_types[0], src_bits);
2041
2042 const struct dxil_value *v = emit_unary_call(ctx, overload, intr, op);
2043 if (!v)
2044 return false;
2045 store_alu_dest(ctx, alu, 0, v);
2046 return true;
2047 }
2048
2049 static bool
emit_binary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1)2050 emit_binary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2051 enum dxil_intr intr,
2052 const struct dxil_value *op0, const struct dxil_value *op1)
2053 {
2054 const nir_op_info *info = &nir_op_infos[alu->op];
2055 assert(info->output_type == info->input_types[0]);
2056 assert(info->output_type == info->input_types[1]);
2057 unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
2058 assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2059 assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2060 enum overload_type overload = get_overload(info->output_type, dst_bits);
2061
2062 const struct dxil_value *v = emit_binary_call(ctx, overload, intr,
2063 op0, op1);
2064 if (!v)
2065 return false;
2066 store_alu_dest(ctx, alu, 0, v);
2067 return true;
2068 }
2069
2070 static bool
emit_tertiary_intin(struct ntd_context * ctx,nir_alu_instr * alu,enum dxil_intr intr,const struct dxil_value * op0,const struct dxil_value * op1,const struct dxil_value * op2)2071 emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
2072 enum dxil_intr intr,
2073 const struct dxil_value *op0,
2074 const struct dxil_value *op1,
2075 const struct dxil_value *op2)
2076 {
2077 const nir_op_info *info = &nir_op_infos[alu->op];
2078 unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);
2079 assert(nir_src_bit_size(alu->src[0].src) == dst_bits);
2080 assert(nir_src_bit_size(alu->src[1].src) == dst_bits);
2081 assert(nir_src_bit_size(alu->src[2].src) == dst_bits);
2082
2083 assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[0], dst_bits));
2084 assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[1], dst_bits));
2085 assert(get_overload(info->output_type, dst_bits) == get_overload(info->input_types[2], dst_bits));
2086
2087 enum overload_type overload = get_overload(info->output_type, dst_bits);
2088
2089 const struct dxil_value *v = emit_tertiary_call(ctx, overload, intr,
2090 op0, op1, op2);
2091 if (!v)
2092 return false;
2093 store_alu_dest(ctx, alu, 0, v);
2094 return true;
2095 }
2096
2097 static bool
emit_bitfield_insert(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * base,const struct dxil_value * insert,const struct dxil_value * offset,const struct dxil_value * width)2098 emit_bitfield_insert(struct ntd_context *ctx, nir_alu_instr *alu,
2099 const struct dxil_value *base,
2100 const struct dxil_value *insert,
2101 const struct dxil_value *offset,
2102 const struct dxil_value *width)
2103 {
2104 /* DXIL is width, offset, insert, base, NIR is base, insert, offset, width */
2105 const struct dxil_value *v = emit_quaternary_call(ctx, DXIL_I32, DXIL_INTR_BFI,
2106 width, offset, insert, base);
2107 if (!v)
2108 return false;
2109
2110 /* DXIL uses the 5 LSB from width/offset. Special-case width >= 32 == copy insert. */
2111 const struct dxil_value *compare_width = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_SGE,
2112 width, dxil_module_get_int32_const(&ctx->mod, 32));
2113 v = dxil_emit_select(&ctx->mod, compare_width, insert, v);
2114 store_alu_dest(ctx, alu, 0, v);
2115 return true;
2116 }
2117
emit_select(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * sel,const struct dxil_value * val_true,const struct dxil_value * val_false)2118 static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu,
2119 const struct dxil_value *sel,
2120 const struct dxil_value *val_true,
2121 const struct dxil_value *val_false)
2122 {
2123 assert(sel);
2124 assert(val_true);
2125 assert(val_false);
2126
2127 const struct dxil_value *v = dxil_emit_select(&ctx->mod, sel, val_true, val_false);
2128 if (!v)
2129 return false;
2130
2131 store_alu_dest(ctx, alu, 0, v);
2132 return true;
2133 }
2134
2135 static bool
emit_b2f16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2136 emit_b2f16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2137 {
2138 assert(val);
2139
2140 struct dxil_module *m = &ctx->mod;
2141
2142 const struct dxil_value *c1 = dxil_module_get_float16_const(m, 0x3C00);
2143 const struct dxil_value *c0 = dxil_module_get_float16_const(m, 0);
2144
2145 if (!c0 || !c1)
2146 return false;
2147
2148 return emit_select(ctx, alu, val, c1, c0);
2149 }
2150
2151 static bool
emit_b2f32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2152 emit_b2f32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2153 {
2154 assert(val);
2155
2156 struct dxil_module *m = &ctx->mod;
2157
2158 const struct dxil_value *c1 = dxil_module_get_float_const(m, 1.0f);
2159 const struct dxil_value *c0 = dxil_module_get_float_const(m, 0.0f);
2160
2161 if (!c0 || !c1)
2162 return false;
2163
2164 return emit_select(ctx, alu, val, c1, c0);
2165 }
2166
2167 static bool
emit_b2f64(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2168 emit_b2f64(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2169 {
2170 assert(val);
2171
2172 struct dxil_module *m = &ctx->mod;
2173
2174 const struct dxil_value *c1 = dxil_module_get_double_const(m, 1.0);
2175 const struct dxil_value *c0 = dxil_module_get_double_const(m, 0.0);
2176
2177 if (!c0 || !c1)
2178 return false;
2179
2180 ctx->mod.feats.doubles = 1;
2181 return emit_select(ctx, alu, val, c1, c0);
2182 }
2183
2184 static bool
emit_f2b32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val)2185 emit_f2b32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
2186 {
2187 assert(val);
2188
2189 const struct dxil_value *zero = dxil_module_get_float_const(&ctx->mod, 0.0f);
2190 return emit_cmp(ctx, alu, DXIL_FCMP_UNE, val, zero);
2191 }
2192
2193 static bool
emit_f16tof32(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val,bool shift)2194 emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val, bool shift)
2195 {
2196 if (shift) {
2197 val = dxil_emit_binop(&ctx->mod, DXIL_BINOP_LSHR, val,
2198 dxil_module_get_int32_const(&ctx->mod, 16), 0);
2199 if (!val)
2200 return false;
2201 }
2202
2203 const struct dxil_func *func = dxil_get_function(&ctx->mod,
2204 "dx.op.legacyF16ToF32",
2205 DXIL_NONE);
2206 if (!func)
2207 return false;
2208
2209 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F16TOF32);
2210 if (!opcode)
2211 return false;
2212
2213 const struct dxil_value *args[] = {
2214 opcode,
2215 val
2216 };
2217
2218 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2219 if (!v)
2220 return false;
2221 store_alu_dest(ctx, alu, 0, v);
2222 return true;
2223 }
2224
2225 static bool
emit_f32tof16(struct ntd_context * ctx,nir_alu_instr * alu,const struct dxil_value * val0,const struct dxil_value * val1)2226 emit_f32tof16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val0, const struct dxil_value *val1)
2227 {
2228 const struct dxil_func *func = dxil_get_function(&ctx->mod,
2229 "dx.op.legacyF32ToF16",
2230 DXIL_NONE);
2231 if (!func)
2232 return false;
2233
2234 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F32TOF16);
2235 if (!opcode)
2236 return false;
2237
2238 const struct dxil_value *args[] = {
2239 opcode,
2240 val0
2241 };
2242
2243 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2244 if (!v)
2245 return false;
2246
2247 if (!nir_src_is_const(alu->src[1].src) || nir_src_as_int(alu->src[1].src) != 0) {
2248 args[1] = val1;
2249 const struct dxil_value *v_high = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2250 if (!v_high)
2251 return false;
2252
2253 v_high = dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL, v_high,
2254 dxil_module_get_int32_const(&ctx->mod, 16), 0);
2255 if (!v_high)
2256 return false;
2257
2258 v = dxil_emit_binop(&ctx->mod, DXIL_BINOP_OR, v, v_high, 0);
2259 if (!v)
2260 return false;
2261 }
2262
2263 store_alu_dest(ctx, alu, 0, v);
2264 return true;
2265 }
2266
2267 static bool
emit_vec(struct ntd_context * ctx,nir_alu_instr * alu,unsigned num_inputs)2268 emit_vec(struct ntd_context *ctx, nir_alu_instr *alu, unsigned num_inputs)
2269 {
2270 const struct dxil_type *type = get_alu_src_type(ctx, alu, 0);
2271 nir_alu_type t = dxil_type_to_nir_type(type);
2272
2273 for (unsigned i = 0; i < num_inputs; i++) {
2274 const struct dxil_value *src =
2275 get_src(ctx, &alu->src[i].src, alu->src[i].swizzle[0], t);
2276 if (!src)
2277 return false;
2278
2279 store_alu_dest(ctx, alu, i, src);
2280 }
2281 return true;
2282 }
2283
2284 static bool
emit_make_double(struct ntd_context * ctx,nir_alu_instr * alu)2285 emit_make_double(struct ntd_context *ctx, nir_alu_instr *alu)
2286 {
2287 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.makeDouble", DXIL_F64);
2288 if (!func)
2289 return false;
2290
2291 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_MAKE_DOUBLE);
2292 if (!opcode)
2293 return false;
2294
2295 const struct dxil_value *args[3] = {
2296 opcode,
2297 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_uint32),
2298 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[1], nir_type_uint32),
2299 };
2300 if (!args[1] || !args[2])
2301 return false;
2302
2303 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2304 if (!v)
2305 return false;
2306 store_dest(ctx, &alu->dest.dest, 0, v, nir_type_float64);
2307 return true;
2308 }
2309
2310 static bool
emit_split_double(struct ntd_context * ctx,nir_alu_instr * alu)2311 emit_split_double(struct ntd_context *ctx, nir_alu_instr *alu)
2312 {
2313 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.splitDouble", DXIL_F64);
2314 if (!func)
2315 return false;
2316
2317 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SPLIT_DOUBLE);
2318 if (!opcode)
2319 return false;
2320
2321 const struct dxil_value *args[] = {
2322 opcode,
2323 get_src(ctx, &alu->src[0].src, alu->src[0].swizzle[0], nir_type_float64)
2324 };
2325 if (!args[1])
2326 return false;
2327
2328 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2329 if (!v)
2330 return false;
2331
2332 const struct dxil_value *hi = dxil_emit_extractval(&ctx->mod, v, 0);
2333 const struct dxil_value *lo = dxil_emit_extractval(&ctx->mod, v, 1);
2334 if (!hi || !lo)
2335 return false;
2336
2337 store_dest_value(ctx, &alu->dest.dest, 0, hi);
2338 store_dest_value(ctx, &alu->dest.dest, 1, lo);
2339 return true;
2340 }
2341
2342 static bool
emit_alu(struct ntd_context * ctx,nir_alu_instr * alu)2343 emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
2344 {
2345 /* handle vec-instructions first; they are the only ones that produce
2346 * vector results.
2347 */
2348 switch (alu->op) {
2349 case nir_op_vec2:
2350 case nir_op_vec3:
2351 case nir_op_vec4:
2352 case nir_op_vec8:
2353 case nir_op_vec16:
2354 return emit_vec(ctx, alu, nir_op_infos[alu->op].num_inputs);
2355 case nir_op_mov: {
2356 assert(nir_dest_num_components(alu->dest.dest) == 1);
2357 store_ssa_def(ctx, &alu->dest.dest.ssa, 0, get_src_ssa(ctx,
2358 alu->src->src.ssa, alu->src->swizzle[0]));
2359 return true;
2360 }
2361 case nir_op_pack_double_2x32_dxil:
2362 return emit_make_double(ctx, alu);
2363 case nir_op_unpack_double_2x32_dxil:
2364 return emit_split_double(ctx, alu);
2365 default:
2366 /* silence warnings */
2367 ;
2368 }
2369
2370 /* other ops should be scalar */
2371 assert(alu->dest.write_mask == 1);
2372 const struct dxil_value *src[4];
2373 assert(nir_op_infos[alu->op].num_inputs <= 4);
2374 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
2375 src[i] = get_alu_src(ctx, alu, i);
2376 if (!src[i])
2377 return false;
2378 }
2379
2380 switch (alu->op) {
2381 case nir_op_iadd:
2382 case nir_op_fadd: return emit_binop(ctx, alu, DXIL_BINOP_ADD, src[0], src[1]);
2383
2384 case nir_op_isub:
2385 case nir_op_fsub: return emit_binop(ctx, alu, DXIL_BINOP_SUB, src[0], src[1]);
2386
2387 case nir_op_imul:
2388 case nir_op_fmul: return emit_binop(ctx, alu, DXIL_BINOP_MUL, src[0], src[1]);
2389
2390 case nir_op_fdiv:
2391 if (alu->dest.dest.ssa.bit_size == 64)
2392 ctx->mod.feats.dx11_1_double_extensions = 1;
2393 FALLTHROUGH;
2394 case nir_op_idiv:
2395 return emit_binop(ctx, alu, DXIL_BINOP_SDIV, src[0], src[1]);
2396
2397 case nir_op_udiv: return emit_binop(ctx, alu, DXIL_BINOP_UDIV, src[0], src[1]);
2398 case nir_op_irem: return emit_binop(ctx, alu, DXIL_BINOP_SREM, src[0], src[1]);
2399 case nir_op_imod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2400 case nir_op_umod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);
2401 case nir_op_ishl: return emit_shift(ctx, alu, DXIL_BINOP_SHL, src[0], src[1]);
2402 case nir_op_ishr: return emit_shift(ctx, alu, DXIL_BINOP_ASHR, src[0], src[1]);
2403 case nir_op_ushr: return emit_shift(ctx, alu, DXIL_BINOP_LSHR, src[0], src[1]);
2404 case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]);
2405 case nir_op_ior: return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]);
2406 case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]);
2407 case nir_op_inot: {
2408 unsigned bit_size = alu->dest.dest.ssa.bit_size;
2409 intmax_t val = bit_size == 1 ? 1 : -1;
2410 const struct dxil_value *negative_one = dxil_module_get_int_const(&ctx->mod, val, bit_size);
2411 return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], negative_one);
2412 }
2413 case nir_op_ieq: return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]);
2414 case nir_op_ine: return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]);
2415 case nir_op_ige: return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]);
2416 case nir_op_uge: return emit_cmp(ctx, alu, DXIL_ICMP_UGE, src[0], src[1]);
2417 case nir_op_ilt: return emit_cmp(ctx, alu, DXIL_ICMP_SLT, src[0], src[1]);
2418 case nir_op_ult: return emit_cmp(ctx, alu, DXIL_ICMP_ULT, src[0], src[1]);
2419 case nir_op_feq: return emit_cmp(ctx, alu, DXIL_FCMP_OEQ, src[0], src[1]);
2420 case nir_op_fneu: return emit_cmp(ctx, alu, DXIL_FCMP_UNE, src[0], src[1]);
2421 case nir_op_flt: return emit_cmp(ctx, alu, DXIL_FCMP_OLT, src[0], src[1]);
2422 case nir_op_fge: return emit_cmp(ctx, alu, DXIL_FCMP_OGE, src[0], src[1]);
2423 case nir_op_bcsel: return emit_select(ctx, alu, src[0], src[1], src[2]);
2424 case nir_op_ftrunc: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_Z, src[0]);
2425 case nir_op_fabs: return emit_unary_intin(ctx, alu, DXIL_INTR_FABS, src[0]);
2426 case nir_op_fcos: return emit_unary_intin(ctx, alu, DXIL_INTR_FCOS, src[0]);
2427 case nir_op_fsin: return emit_unary_intin(ctx, alu, DXIL_INTR_FSIN, src[0]);
2428 case nir_op_fceil: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_PI, src[0]);
2429 case nir_op_fexp2: return emit_unary_intin(ctx, alu, DXIL_INTR_FEXP2, src[0]);
2430 case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]);
2431 case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]);
2432 case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]);
2433 case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]);
2434 case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]);
2435
2436 case nir_op_fddx:
2437 case nir_op_fddx_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_COARSE, src[0]);
2438 case nir_op_fddx_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_FINE, src[0]);
2439 case nir_op_fddy:
2440 case nir_op_fddy_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_COARSE, src[0]);
2441 case nir_op_fddy_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_FINE, src[0]);
2442
2443 case nir_op_fround_even: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NE, src[0]);
2444 case nir_op_frcp: {
2445 const struct dxil_value *one = dxil_module_get_float_const(&ctx->mod, 1.0f);
2446 return emit_binop(ctx, alu, DXIL_BINOP_SDIV, one, src[0]);
2447 }
2448 case nir_op_fsat: return emit_unary_intin(ctx, alu, DXIL_INTR_SATURATE, src[0]);
2449 case nir_op_bit_count: return emit_unary_intin(ctx, alu, DXIL_INTR_COUNTBITS, src[0]);
2450 case nir_op_bitfield_reverse: return emit_unary_intin(ctx, alu, DXIL_INTR_BFREV, src[0]);
2451 case nir_op_ufind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_HI, src[0]);
2452 case nir_op_ifind_msb_rev: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_SHI, src[0]);
2453 case nir_op_find_lsb: return emit_unary_intin(ctx, alu, DXIL_INTR_FIRSTBIT_LO, src[0]);
2454 case nir_op_imax: return emit_binary_intin(ctx, alu, DXIL_INTR_IMAX, src[0], src[1]);
2455 case nir_op_imin: return emit_binary_intin(ctx, alu, DXIL_INTR_IMIN, src[0], src[1]);
2456 case nir_op_umax: return emit_binary_intin(ctx, alu, DXIL_INTR_UMAX, src[0], src[1]);
2457 case nir_op_umin: return emit_binary_intin(ctx, alu, DXIL_INTR_UMIN, src[0], src[1]);
2458 case nir_op_frsq: return emit_unary_intin(ctx, alu, DXIL_INTR_RSQRT, src[0]);
2459 case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]);
2460 case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]);
2461 case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]);
2462 case nir_op_ffma:
2463 if (alu->dest.dest.ssa.bit_size == 64)
2464 ctx->mod.feats.dx11_1_double_extensions = 1;
2465 return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]);
2466
2467 case nir_op_ibfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_IBFE, src[2], src[1], src[0]);
2468 case nir_op_ubfe: return emit_tertiary_intin(ctx, alu, DXIL_INTR_UBFE, src[2], src[1], src[0]);
2469 case nir_op_bitfield_insert: return emit_bitfield_insert(ctx, alu, src[0], src[1], src[2], src[3]);
2470
2471 case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0], false);
2472 case nir_op_unpack_half_2x16_split_y: return emit_f16tof32(ctx, alu, src[0], true);
2473 case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0], src[1]);
2474
2475 case nir_op_b2i16:
2476 case nir_op_i2i16:
2477 case nir_op_f2i16:
2478 case nir_op_f2u16:
2479 case nir_op_u2u16:
2480 case nir_op_u2f16:
2481 case nir_op_i2f16:
2482 case nir_op_f2f16_rtz:
2483 case nir_op_b2i32:
2484 case nir_op_f2f32:
2485 case nir_op_f2i32:
2486 case nir_op_f2u32:
2487 case nir_op_i2f32:
2488 case nir_op_i2i32:
2489 case nir_op_u2f32:
2490 case nir_op_u2u32:
2491 case nir_op_b2i64:
2492 case nir_op_f2f64:
2493 case nir_op_f2i64:
2494 case nir_op_f2u64:
2495 case nir_op_i2f64:
2496 case nir_op_i2i64:
2497 case nir_op_u2f64:
2498 case nir_op_u2u64:
2499 return emit_cast(ctx, alu, src[0]);
2500
2501 case nir_op_f2b32: return emit_f2b32(ctx, alu, src[0]);
2502 case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]);
2503 case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]);
2504 case nir_op_b2f64: return emit_b2f64(ctx, alu, src[0]);
2505 default:
2506 NIR_INSTR_UNSUPPORTED(&alu->instr);
2507 assert("Unimplemented ALU instruction");
2508 return false;
2509 }
2510 }
2511
2512 static const struct dxil_value *
load_ubo(struct ntd_context * ctx,const struct dxil_value * handle,const struct dxil_value * offset,enum overload_type overload)2513 load_ubo(struct ntd_context *ctx, const struct dxil_value *handle,
2514 const struct dxil_value *offset, enum overload_type overload)
2515 {
2516 assert(handle && offset);
2517
2518 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CBUFFER_LOAD_LEGACY);
2519 if (!opcode)
2520 return NULL;
2521
2522 const struct dxil_value *args[] = {
2523 opcode, handle, offset
2524 };
2525
2526 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cbufferLoadLegacy", overload);
2527 if (!func)
2528 return NULL;
2529 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2530 }
2531
2532 static bool
emit_barrier_impl(struct ntd_context * ctx,nir_variable_mode modes,nir_scope execution_scope,nir_scope mem_scope)2533 emit_barrier_impl(struct ntd_context *ctx, nir_variable_mode modes, nir_scope execution_scope, nir_scope mem_scope)
2534 {
2535 const struct dxil_value *opcode, *mode;
2536 const struct dxil_func *func;
2537 uint32_t flags = 0;
2538
2539 if (execution_scope == NIR_SCOPE_WORKGROUP)
2540 flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP;
2541
2542 if (modes & (nir_var_mem_ssbo | nir_var_mem_global | nir_var_image)) {
2543 if (mem_scope > NIR_SCOPE_WORKGROUP)
2544 flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL;
2545 else
2546 flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP;
2547 }
2548
2549 if (modes & nir_var_mem_shared)
2550 flags |= DXIL_BARRIER_MODE_GROUPSHARED_MEM_FENCE;
2551
2552 func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE);
2553 if (!func)
2554 return false;
2555
2556 opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER);
2557 if (!opcode)
2558 return false;
2559
2560 mode = dxil_module_get_int32_const(&ctx->mod, flags);
2561 if (!mode)
2562 return false;
2563
2564 const struct dxil_value *args[] = { opcode, mode };
2565
2566 return dxil_emit_call_void(&ctx->mod, func,
2567 args, ARRAY_SIZE(args));
2568 }
2569
2570 static bool
emit_barrier(struct ntd_context * ctx,nir_intrinsic_instr * intr)2571 emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2572 {
2573 return emit_barrier_impl(ctx,
2574 nir_intrinsic_memory_modes(intr),
2575 nir_intrinsic_execution_scope(intr),
2576 nir_intrinsic_memory_scope(intr));
2577 }
2578
2579 /* Memory barrier for UAVs (buffers/images) at cross-workgroup scope */
2580 static bool
emit_memory_barrier(struct ntd_context * ctx,nir_intrinsic_instr * intr)2581 emit_memory_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2582 {
2583 return emit_barrier_impl(ctx,
2584 nir_var_mem_global,
2585 NIR_SCOPE_NONE,
2586 NIR_SCOPE_DEVICE);
2587 }
2588
2589 /* Memory barrier for TGSM */
2590 static bool
emit_memory_barrier_shared(struct ntd_context * ctx,nir_intrinsic_instr * intr)2591 emit_memory_barrier_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2592 {
2593 return emit_barrier_impl(ctx,
2594 nir_var_mem_shared,
2595 NIR_SCOPE_NONE,
2596 NIR_SCOPE_WORKGROUP);
2597 }
2598
2599 /* Memory barrier for all intra-workgroup memory accesses (UAVs and TGSM) */
2600 static bool
emit_group_memory_barrier(struct ntd_context * ctx,nir_intrinsic_instr * intr)2601 emit_group_memory_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2602 {
2603 return emit_barrier_impl(ctx,
2604 nir_var_mem_shared | nir_var_mem_global,
2605 NIR_SCOPE_NONE,
2606 NIR_SCOPE_WORKGROUP);
2607 }
2608
2609 static bool
emit_control_barrier(struct ntd_context * ctx,nir_intrinsic_instr * intr)2610 emit_control_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2611 {
2612 return emit_barrier_impl(ctx,
2613 nir_var_mem_shared,
2614 NIR_SCOPE_WORKGROUP,
2615 NIR_SCOPE_NONE);
2616 }
2617
2618 static bool
emit_load_global_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)2619 emit_load_global_invocation_id(struct ntd_context *ctx,
2620 nir_intrinsic_instr *intr)
2621 {
2622 assert(intr->dest.is_ssa);
2623 nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
2624
2625 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2626 if (comps & (1 << i)) {
2627 const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
2628 if (!idx)
2629 return false;
2630 const struct dxil_value *globalid = emit_threadid_call(ctx, idx);
2631
2632 if (!globalid)
2633 return false;
2634
2635 store_dest_value(ctx, &intr->dest, i, globalid);
2636 }
2637 }
2638 return true;
2639 }
2640
2641 static bool
emit_load_local_invocation_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)2642 emit_load_local_invocation_id(struct ntd_context *ctx,
2643 nir_intrinsic_instr *intr)
2644 {
2645 assert(intr->dest.is_ssa);
2646 nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
2647
2648 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2649 if (comps & (1 << i)) {
2650 const struct dxil_value
2651 *idx = dxil_module_get_int32_const(&ctx->mod, i);
2652 if (!idx)
2653 return false;
2654 const struct dxil_value
2655 *threadidingroup = emit_threadidingroup_call(ctx, idx);
2656 if (!threadidingroup)
2657 return false;
2658 store_dest_value(ctx, &intr->dest, i, threadidingroup);
2659 }
2660 }
2661 return true;
2662 }
2663
2664 static bool
emit_load_local_invocation_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)2665 emit_load_local_invocation_index(struct ntd_context *ctx,
2666 nir_intrinsic_instr *intr)
2667 {
2668 assert(intr->dest.is_ssa);
2669
2670 const struct dxil_value
2671 *flattenedthreadidingroup = emit_flattenedthreadidingroup_call(ctx);
2672 if (!flattenedthreadidingroup)
2673 return false;
2674 store_dest_value(ctx, &intr->dest, 0, flattenedthreadidingroup);
2675
2676 return true;
2677 }
2678
2679 static bool
emit_load_local_workgroup_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)2680 emit_load_local_workgroup_id(struct ntd_context *ctx,
2681 nir_intrinsic_instr *intr)
2682 {
2683 assert(intr->dest.is_ssa);
2684 nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);
2685
2686 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2687 if (comps & (1 << i)) {
2688 const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);
2689 if (!idx)
2690 return false;
2691 const struct dxil_value *groupid = emit_groupid_call(ctx, idx);
2692 if (!groupid)
2693 return false;
2694 store_dest_value(ctx, &intr->dest, i, groupid);
2695 }
2696 }
2697 return true;
2698 }
2699
2700 static const struct dxil_value *
call_unary_external_function(struct ntd_context * ctx,const char * name,int32_t dxil_intr)2701 call_unary_external_function(struct ntd_context *ctx,
2702 const char *name,
2703 int32_t dxil_intr)
2704 {
2705 const struct dxil_func *func =
2706 dxil_get_function(&ctx->mod, name, DXIL_I32);
2707 if (!func)
2708 return false;
2709
2710 const struct dxil_value *opcode =
2711 dxil_module_get_int32_const(&ctx->mod, dxil_intr);
2712 if (!opcode)
2713 return false;
2714
2715 const struct dxil_value *args[] = {opcode};
2716
2717 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2718 }
2719
2720 static bool
emit_load_unary_external_function(struct ntd_context * ctx,nir_intrinsic_instr * intr,const char * name,int32_t dxil_intr)2721 emit_load_unary_external_function(struct ntd_context *ctx,
2722 nir_intrinsic_instr *intr, const char *name,
2723 int32_t dxil_intr)
2724 {
2725 const struct dxil_value *value = call_unary_external_function(ctx, name, dxil_intr);
2726 store_dest_value(ctx, &intr->dest, 0, value);
2727
2728 return true;
2729 }
2730
2731 static bool
emit_load_sample_mask_in(struct ntd_context * ctx,nir_intrinsic_instr * intr)2732 emit_load_sample_mask_in(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2733 {
2734 const struct dxil_value *value = call_unary_external_function(ctx,
2735 "dx.op.coverage", DXIL_INTR_COVERAGE);
2736
2737 /* Mask coverage with (1 << sample index). Note, done as an AND to handle extrapolation cases. */
2738 if (ctx->mod.info.has_per_sample_input) {
2739 value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_AND, value,
2740 dxil_emit_binop(&ctx->mod, DXIL_BINOP_SHL,
2741 dxil_module_get_int32_const(&ctx->mod, 1),
2742 call_unary_external_function(ctx, "dx.op.sampleIndex", DXIL_INTR_SAMPLE_INDEX), 0), 0);
2743 }
2744
2745 store_dest_value(ctx, &intr->dest, 0, value);
2746 return true;
2747 }
2748
2749 static bool
emit_load_tess_coord(struct ntd_context * ctx,nir_intrinsic_instr * intr)2750 emit_load_tess_coord(struct ntd_context *ctx,
2751 nir_intrinsic_instr *intr)
2752 {
2753 const struct dxil_func *func =
2754 dxil_get_function(&ctx->mod, "dx.op.domainLocation", DXIL_F32);
2755 if (!func)
2756 return false;
2757
2758 const struct dxil_value *opcode =
2759 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DOMAIN_LOCATION);
2760 if (!opcode)
2761 return false;
2762
2763 unsigned num_coords = ctx->shader->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES ? 3 : 2;
2764 for (unsigned i = 0; i < num_coords; ++i) {
2765 unsigned component_idx = i;
2766
2767 const struct dxil_value *component = dxil_module_get_int32_const(&ctx->mod, component_idx);
2768 if (!component)
2769 return false;
2770
2771 const struct dxil_value *args[] = { opcode, component };
2772
2773 const struct dxil_value *value =
2774 dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
2775 store_dest_value(ctx, &intr->dest, i, value);
2776 }
2777
2778 for (unsigned i = num_coords; i < intr->dest.ssa.num_components; ++i) {
2779 const struct dxil_value *value = dxil_module_get_float_const(&ctx->mod, 0.0f);
2780 store_dest_value(ctx, &intr->dest, i, value);
2781 }
2782
2783 return true;
2784 }
2785
2786 static const struct dxil_value *
get_int32_undef(struct dxil_module * m)2787 get_int32_undef(struct dxil_module *m)
2788 {
2789 const struct dxil_type *int32_type =
2790 dxil_module_get_int_type(m, 32);
2791 if (!int32_type)
2792 return NULL;
2793
2794 return dxil_module_get_undef(m, int32_type);
2795 }
2796
2797 static const struct dxil_value *
emit_gep_for_index(struct ntd_context * ctx,const nir_variable * var,const struct dxil_value * index)2798 emit_gep_for_index(struct ntd_context *ctx, const nir_variable *var,
2799 const struct dxil_value *index)
2800 {
2801 assert(var->data.mode == nir_var_shader_temp);
2802
2803 struct hash_entry *he = _mesa_hash_table_search(ctx->consts, var);
2804 assert(he != NULL);
2805 const struct dxil_value *ptr = he->data;
2806
2807 const struct dxil_value *zero = dxil_module_get_int32_const(&ctx->mod, 0);
2808 if (!zero)
2809 return NULL;
2810
2811 const struct dxil_value *ops[] = { ptr, zero, index };
2812 return dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
2813 }
2814
2815 static const struct dxil_value *
get_resource_handle(struct ntd_context * ctx,nir_src * src,enum dxil_resource_class class,enum dxil_resource_kind kind)2816 get_resource_handle(struct ntd_context *ctx, nir_src *src, enum dxil_resource_class class,
2817 enum dxil_resource_kind kind)
2818 {
2819 /* This source might be one of:
2820 * 1. Constant resource index - just look it up in precomputed handle arrays
2821 * If it's null in that array, create a handle, and store the result
2822 * 2. A handle from load_vulkan_descriptor - just get the stored SSA value
2823 * 3. Dynamic resource index - create a handle for it here
2824 */
2825 assert(src->ssa->num_components == 1 && src->ssa->bit_size == 32);
2826 nir_const_value *const_block_index = nir_src_as_const_value(*src);
2827 const struct dxil_value **handle_entry = NULL;
2828 if (const_block_index) {
2829 assert(ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN);
2830 switch (kind) {
2831 case DXIL_RESOURCE_KIND_CBUFFER:
2832 handle_entry = &ctx->cbv_handles[const_block_index->u32];
2833 break;
2834 case DXIL_RESOURCE_KIND_RAW_BUFFER:
2835 if (class == DXIL_RESOURCE_CLASS_UAV)
2836 handle_entry = &ctx->ssbo_handles[const_block_index->u32];
2837 else
2838 handle_entry = &ctx->srv_handles[const_block_index->u32];
2839 break;
2840 case DXIL_RESOURCE_KIND_SAMPLER:
2841 handle_entry = &ctx->sampler_handles[const_block_index->u32];
2842 break;
2843 default:
2844 if (class == DXIL_RESOURCE_CLASS_UAV)
2845 handle_entry = &ctx->image_handles[const_block_index->u32];
2846 else
2847 handle_entry = &ctx->srv_handles[const_block_index->u32];
2848 break;
2849 }
2850 }
2851
2852 if (handle_entry && *handle_entry)
2853 return *handle_entry;
2854
2855 const struct dxil_value *value = get_src_ssa(ctx, src->ssa, 0);
2856 if (nir_src_as_deref(*src) ||
2857 ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
2858 return value;
2859 }
2860
2861 unsigned space = 0;
2862 if (ctx->opts->environment == DXIL_ENVIRONMENT_GL &&
2863 class == DXIL_RESOURCE_CLASS_UAV) {
2864 if (kind == DXIL_RESOURCE_KIND_RAW_BUFFER)
2865 space = 2;
2866 else
2867 space = 1;
2868 }
2869
2870 /* The base binding here will almost always be zero. The only cases where we end
2871 * up in this type of dynamic indexing are:
2872 * 1. GL UBOs
2873 * 2. GL SSBOs
2874 * 2. CL SSBOs
2875 * In all cases except GL UBOs, the resources are a single zero-based array.
2876 * In that case, the base is 1, because uniforms use 0 and cannot by dynamically
2877 * indexed. All other cases should either fall into static indexing (first early return),
2878 * deref-based dynamic handle creation (images, or Vulkan textures/samplers), or
2879 * load_vulkan_descriptor handle creation.
2880 */
2881 unsigned base_binding = 0;
2882 if (ctx->opts->environment == DXIL_ENVIRONMENT_GL &&
2883 class == DXIL_RESOURCE_CLASS_CBV)
2884 base_binding = 1;
2885
2886 const struct dxil_value *handle = emit_createhandle_call(ctx, class,
2887 get_resource_id(ctx, class, space, base_binding), value, !const_block_index);
2888 if (handle_entry)
2889 *handle_entry = handle;
2890
2891 return handle;
2892 }
2893
2894 static bool
emit_load_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)2895 emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2896 {
2897 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
2898
2899 enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
2900 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
2901 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
2902 if (var && var->data.access & ACCESS_NON_WRITEABLE)
2903 class = DXIL_RESOURCE_CLASS_SRV;
2904 }
2905
2906 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
2907 const struct dxil_value *offset =
2908 get_src(ctx, &intr->src[1], 0, nir_type_uint);
2909 if (!int32_undef || !handle || !offset)
2910 return false;
2911
2912 assert(nir_src_bit_size(intr->src[0]) == 32);
2913 assert(nir_intrinsic_dest_components(intr) <= 4);
2914
2915 const struct dxil_value *coord[2] = {
2916 offset,
2917 int32_undef
2918 };
2919
2920 const struct dxil_value *load = emit_bufferload_call(ctx, handle, coord, DXIL_I32);
2921 if (!load)
2922 return false;
2923
2924 for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {
2925 const struct dxil_value *val =
2926 dxil_emit_extractval(&ctx->mod, load, i);
2927 if (!val)
2928 return false;
2929 store_dest_value(ctx, &intr->dest, i, val);
2930 }
2931 return true;
2932 }
2933
2934 static bool
emit_store_ssbo(struct ntd_context * ctx,nir_intrinsic_instr * intr)2935 emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2936 {
2937 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[1], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
2938 const struct dxil_value *offset =
2939 get_src(ctx, &intr->src[2], 0, nir_type_uint);
2940 if (!handle || !offset)
2941 return false;
2942
2943 assert(nir_src_bit_size(intr->src[0]) == 32);
2944 unsigned num_components = nir_src_num_components(intr->src[0]);
2945 assert(num_components <= 4);
2946 const struct dxil_value *value[4];
2947 for (unsigned i = 0; i < num_components; ++i) {
2948 value[i] = get_src(ctx, &intr->src[0], i, nir_type_uint);
2949 if (!value[i])
2950 return false;
2951 }
2952
2953 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
2954 if (!int32_undef)
2955 return false;
2956
2957 const struct dxil_value *coord[2] = {
2958 offset,
2959 int32_undef
2960 };
2961
2962 for (int i = num_components; i < 4; ++i)
2963 value[i] = int32_undef;
2964
2965 const struct dxil_value *write_mask =
2966 dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
2967 if (!write_mask)
2968 return false;
2969
2970 return emit_bufferstore_call(ctx, handle, coord, value, write_mask, DXIL_I32);
2971 }
2972
2973 static bool
emit_store_ssbo_masked(struct ntd_context * ctx,nir_intrinsic_instr * intr)2974 emit_store_ssbo_masked(struct ntd_context *ctx, nir_intrinsic_instr *intr)
2975 {
2976 const struct dxil_value *value =
2977 get_src(ctx, &intr->src[0], 0, nir_type_uint);
2978 const struct dxil_value *mask =
2979 get_src(ctx, &intr->src[1], 0, nir_type_uint);
2980 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[2], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
2981 const struct dxil_value *offset =
2982 get_src(ctx, &intr->src[3], 0, nir_type_uint);
2983 if (!value || !mask || !handle || !offset)
2984 return false;
2985
2986 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
2987 if (!int32_undef)
2988 return false;
2989
2990 const struct dxil_value *coord[3] = {
2991 offset, int32_undef, int32_undef
2992 };
2993
2994 return
2995 emit_atomic_binop(ctx, handle, DXIL_ATOMIC_AND, coord, mask) != NULL &&
2996 emit_atomic_binop(ctx, handle, DXIL_ATOMIC_OR, coord, value) != NULL;
2997 }
2998
2999 static bool
emit_store_shared(struct ntd_context * ctx,nir_intrinsic_instr * intr)3000 emit_store_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3001 {
3002 const struct dxil_value *zero, *index;
3003
3004 /* All shared mem accesses should have been lowered to scalar 32bit
3005 * accesses.
3006 */
3007 assert(nir_src_bit_size(intr->src[0]) == 32);
3008 assert(nir_src_num_components(intr->src[0]) == 1);
3009
3010 zero = dxil_module_get_int32_const(&ctx->mod, 0);
3011 if (!zero)
3012 return false;
3013
3014 if (intr->intrinsic == nir_intrinsic_store_shared_dxil)
3015 index = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3016 else
3017 index = get_src(ctx, &intr->src[2], 0, nir_type_uint);
3018 if (!index)
3019 return false;
3020
3021 const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
3022 const struct dxil_value *ptr, *value;
3023
3024 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3025 if (!ptr)
3026 return false;
3027
3028 value = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3029 if (!value)
3030 return false;
3031
3032 if (intr->intrinsic == nir_intrinsic_store_shared_dxil)
3033 return dxil_emit_store(&ctx->mod, value, ptr, 4, false);
3034
3035 const struct dxil_value *mask = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3036 if (!mask)
3037 return false;
3038
3039 if (!dxil_emit_atomicrmw(&ctx->mod, mask, ptr, DXIL_RMWOP_AND, false,
3040 DXIL_ATOMIC_ORDERING_ACQREL,
3041 DXIL_SYNC_SCOPE_CROSSTHREAD))
3042 return false;
3043
3044 if (!dxil_emit_atomicrmw(&ctx->mod, value, ptr, DXIL_RMWOP_OR, false,
3045 DXIL_ATOMIC_ORDERING_ACQREL,
3046 DXIL_SYNC_SCOPE_CROSSTHREAD))
3047 return false;
3048
3049 return true;
3050 }
3051
3052 static bool
emit_store_scratch(struct ntd_context * ctx,nir_intrinsic_instr * intr)3053 emit_store_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3054 {
3055 const struct dxil_value *zero, *index;
3056
3057 /* All scratch mem accesses should have been lowered to scalar 32bit
3058 * accesses.
3059 */
3060 assert(nir_src_bit_size(intr->src[0]) == 32);
3061 assert(nir_src_num_components(intr->src[0]) == 1);
3062
3063 zero = dxil_module_get_int32_const(&ctx->mod, 0);
3064 if (!zero)
3065 return false;
3066
3067 index = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3068 if (!index)
3069 return false;
3070
3071 const struct dxil_value *ops[] = { ctx->scratchvars, zero, index };
3072 const struct dxil_value *ptr, *value;
3073
3074 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3075 if (!ptr)
3076 return false;
3077
3078 value = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3079 if (!value)
3080 return false;
3081
3082 return dxil_emit_store(&ctx->mod, value, ptr, 4, false);
3083 }
3084
3085 static bool
emit_load_ubo(struct ntd_context * ctx,nir_intrinsic_instr * intr)3086 emit_load_ubo(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3087 {
3088 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER);
3089 if (!handle)
3090 return false;
3091
3092 const struct dxil_value *offset;
3093 nir_const_value *const_offset = nir_src_as_const_value(intr->src[1]);
3094 if (const_offset) {
3095 offset = dxil_module_get_int32_const(&ctx->mod, const_offset->i32 >> 4);
3096 } else {
3097 const struct dxil_value *offset_src = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3098 const struct dxil_value *c4 = dxil_module_get_int32_const(&ctx->mod, 4);
3099 if (!offset_src || !c4)
3100 return false;
3101
3102 offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ASHR, offset_src, c4, 0);
3103 }
3104
3105 const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_F32);
3106
3107 if (!agg)
3108 return false;
3109
3110 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
3111 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, agg, i);
3112 store_dest(ctx, &intr->dest, i, retval,
3113 nir_dest_bit_size(intr->dest) > 1 ? nir_type_float : nir_type_bool);
3114 }
3115 return true;
3116 }
3117
3118 static bool
emit_load_ubo_dxil(struct ntd_context * ctx,nir_intrinsic_instr * intr)3119 emit_load_ubo_dxil(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3120 {
3121 assert(nir_dest_num_components(intr->dest) <= 4);
3122 assert(nir_dest_bit_size(intr->dest) == 32);
3123
3124 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, DXIL_RESOURCE_KIND_CBUFFER);
3125 const struct dxil_value *offset =
3126 get_src(ctx, &intr->src[1], 0, nir_type_uint);
3127
3128 if (!handle || !offset)
3129 return false;
3130
3131 const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_I32);
3132 if (!agg)
3133 return false;
3134
3135 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); i++)
3136 store_dest_value(ctx, &intr->dest, i,
3137 dxil_emit_extractval(&ctx->mod, agg, i));
3138
3139 return true;
3140 }
3141
3142 /* Need to add patch-ness as a matching parameter, since driver_location is *not* unique
3143 * between control points and patch variables in HS/DS
3144 */
3145 static nir_variable *
find_patch_matching_variable_by_driver_location(nir_shader * s,nir_variable_mode mode,unsigned driver_location,bool patch)3146 find_patch_matching_variable_by_driver_location(nir_shader *s, nir_variable_mode mode, unsigned driver_location, bool patch)
3147 {
3148 nir_foreach_variable_with_modes(var, s, mode) {
3149 if (var->data.driver_location == driver_location &&
3150 var->data.patch == patch)
3151 return var;
3152 }
3153 return NULL;
3154 }
3155
3156 static bool
emit_store_output_via_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3157 emit_store_output_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3158 {
3159 assert(intr->intrinsic == nir_intrinsic_store_output ||
3160 ctx->mod.shader_kind == DXIL_HULL_SHADER);
3161 bool is_patch_constant = intr->intrinsic == nir_intrinsic_store_output &&
3162 ctx->mod.shader_kind == DXIL_HULL_SHADER;
3163 nir_alu_type out_type = nir_intrinsic_src_type(intr);
3164 enum overload_type overload = get_overload(out_type, intr->src[0].ssa->bit_size);
3165 const struct dxil_func *func = dxil_get_function(&ctx->mod, is_patch_constant ?
3166 "dx.op.storePatchConstant" : "dx.op.storeOutput",
3167 overload);
3168
3169 if (!func)
3170 return false;
3171
3172 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, is_patch_constant ?
3173 DXIL_INTR_STORE_PATCH_CONSTANT : DXIL_INTR_STORE_OUTPUT);
3174 const struct dxil_value *output_id = dxil_module_get_int32_const(&ctx->mod, nir_intrinsic_base(intr));
3175 unsigned row_index = intr->intrinsic == nir_intrinsic_store_output ? 1 : 2;
3176
3177 /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3178 * generation, so muck with them here too.
3179 */
3180 nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3181 bool is_tess_level = is_patch_constant &&
3182 (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3183 semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3184
3185 const struct dxil_value *row = NULL;
3186 const struct dxil_value *col = NULL;
3187 if (is_tess_level)
3188 col = dxil_module_get_int8_const(&ctx->mod, 0);
3189 else
3190 row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3191
3192 bool success = true;
3193 uint32_t writemask = nir_intrinsic_write_mask(intr);
3194
3195 nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_out, nir_intrinsic_base(intr), is_patch_constant);
3196 unsigned var_base_component = var->data.location_frac;
3197 unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3198
3199 if (ctx->mod.minor_validator >= 5) {
3200 struct dxil_signature_record *sig_rec = is_patch_constant ?
3201 &ctx->mod.patch_consts[nir_intrinsic_base(intr)] :
3202 &ctx->mod.outputs[nir_intrinsic_base(intr)];
3203 unsigned comp_size = intr->src[0].ssa->bit_size == 64 ? 2 : 1;
3204 unsigned comp_mask = 0;
3205 if (is_tess_level)
3206 comp_mask = 1;
3207 else if (comp_size == 1)
3208 comp_mask = writemask << var_base_component;
3209 else {
3210 for (unsigned i = 0; i < intr->num_components; ++i)
3211 if ((writemask & (1 << i)))
3212 comp_mask |= 3 << ((i + var_base_component) * comp_size);
3213 }
3214 for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3215 sig_rec->elements[r].never_writes_mask &= ~comp_mask;
3216
3217 if (!nir_src_is_const(intr->src[row_index])) {
3218 struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3219 &ctx->mod.psv_patch_consts[nir_intrinsic_base(intr)] :
3220 &ctx->mod.psv_outputs[nir_intrinsic_base(intr)];
3221 psv_rec->dynamic_mask_and_stream |= comp_mask;
3222 }
3223 }
3224
3225 for (unsigned i = 0; i < intr->num_components && success; ++i) {
3226 if (writemask & (1 << i)) {
3227 if (is_tess_level)
3228 row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3229 else
3230 col = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3231 const struct dxil_value *value = get_src(ctx, &intr->src[0], i, out_type);
3232 if (!col || !row || !value)
3233 return false;
3234
3235 const struct dxil_value *args[] = {
3236 opcode, output_id, row, col, value
3237 };
3238 success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3239 }
3240 }
3241
3242 return success;
3243 }
3244
3245 static bool
emit_load_input_via_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)3246 emit_load_input_via_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3247 {
3248 bool attr_at_vertex = false;
3249 if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER &&
3250 ctx->opts->interpolate_at_vertex &&
3251 ctx->opts->provoking_vertex != 0 &&
3252 (nir_intrinsic_dest_type(intr) & nir_type_float)) {
3253 nir_variable *var = nir_find_variable_with_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr));
3254
3255 attr_at_vertex = var && var->data.interpolation == INTERP_MODE_FLAT;
3256 }
3257
3258 bool is_patch_constant = (ctx->mod.shader_kind == DXIL_DOMAIN_SHADER &&
3259 intr->intrinsic == nir_intrinsic_load_input) ||
3260 (ctx->mod.shader_kind == DXIL_HULL_SHADER &&
3261 intr->intrinsic == nir_intrinsic_load_output);
3262 bool is_output_control_point = intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3263
3264 unsigned opcode_val;
3265 const char *func_name;
3266 if (attr_at_vertex) {
3267 opcode_val = DXIL_INTR_ATTRIBUTE_AT_VERTEX;
3268 func_name = "dx.op.attributeAtVertex";
3269 if (ctx->mod.minor_validator >= 6)
3270 ctx->mod.feats.barycentrics = 1;
3271 } else if (is_patch_constant) {
3272 opcode_val = DXIL_INTR_LOAD_PATCH_CONSTANT;
3273 func_name = "dx.op.loadPatchConstant";
3274 } else if (is_output_control_point) {
3275 opcode_val = DXIL_INTR_LOAD_OUTPUT_CONTROL_POINT;
3276 func_name = "dx.op.loadOutputControlPoint";
3277 } else {
3278 opcode_val = DXIL_INTR_LOAD_INPUT;
3279 func_name = "dx.op.loadInput";
3280 }
3281
3282 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3283 if (!opcode)
3284 return false;
3285
3286 const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod,
3287 is_patch_constant || is_output_control_point ?
3288 nir_intrinsic_base(intr) :
3289 ctx->mod.input_mappings[nir_intrinsic_base(intr)]);
3290 if (!input_id)
3291 return false;
3292
3293 bool is_per_vertex =
3294 intr->intrinsic == nir_intrinsic_load_per_vertex_input ||
3295 intr->intrinsic == nir_intrinsic_load_per_vertex_output;
3296 int row_index = is_per_vertex ? 1 : 0;
3297 const struct dxil_value *vertex_id = NULL;
3298 if (!is_patch_constant) {
3299 if (is_per_vertex) {
3300 vertex_id = get_src(ctx, &intr->src[0], 0, nir_type_int);
3301 } else if (attr_at_vertex) {
3302 vertex_id = dxil_module_get_int8_const(&ctx->mod, ctx->opts->provoking_vertex);
3303 } else {
3304 const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);
3305 if (!int32_type)
3306 return false;
3307
3308 vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);
3309 }
3310 if (!vertex_id)
3311 return false;
3312 }
3313
3314 /* NIR has these as 1 row, N cols, but DXIL wants them as N rows, 1 col. We muck with these in the signature
3315 * generation, so muck with them here too.
3316 */
3317 nir_io_semantics semantics = nir_intrinsic_io_semantics(intr);
3318 bool is_tess_level = is_patch_constant &&
3319 (semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
3320 semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER);
3321
3322 const struct dxil_value *row = NULL;
3323 const struct dxil_value *comp = NULL;
3324 if (is_tess_level)
3325 comp = dxil_module_get_int8_const(&ctx->mod, 0);
3326 else
3327 row = get_src(ctx, &intr->src[row_index], 0, nir_type_int);
3328
3329 nir_alu_type out_type = nir_intrinsic_dest_type(intr);
3330 enum overload_type overload = get_overload(out_type, intr->dest.ssa.bit_size);
3331
3332 const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, overload);
3333
3334 if (!func)
3335 return false;
3336
3337 nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), is_patch_constant);
3338 unsigned var_base_component = var ? var->data.location_frac : 0;
3339 unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3340
3341 if (ctx->mod.minor_validator >= 5 &&
3342 !is_output_control_point &&
3343 intr->intrinsic != nir_intrinsic_load_output) {
3344 struct dxil_signature_record *sig_rec = is_patch_constant ?
3345 &ctx->mod.patch_consts[nir_intrinsic_base(intr)] :
3346 &ctx->mod.inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3347 unsigned comp_size = intr->dest.ssa.bit_size == 64 ? 2 : 1;
3348 unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3349 comp_mask <<= (var_base_component * comp_size);
3350 if (is_tess_level)
3351 comp_mask = 1;
3352 for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3353 sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3354
3355 if (!nir_src_is_const(intr->src[row_index])) {
3356 struct dxil_psv_signature_element *psv_rec = is_patch_constant ?
3357 &ctx->mod.psv_patch_consts[nir_intrinsic_base(intr)] :
3358 &ctx->mod.psv_inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3359 psv_rec->dynamic_mask_and_stream |= comp_mask;
3360 }
3361 }
3362
3363 for (unsigned i = 0; i < intr->num_components; ++i) {
3364 if (is_tess_level)
3365 row = dxil_module_get_int32_const(&ctx->mod, i + base_component);
3366 else
3367 comp = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3368
3369 if (!row || !comp)
3370 return false;
3371
3372 const struct dxil_value *args[] = {
3373 opcode, input_id, row, comp, vertex_id
3374 };
3375
3376 unsigned num_args = ARRAY_SIZE(args) - (is_patch_constant ? 1 : 0);
3377 const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3378 if (!retval)
3379 return false;
3380 store_dest(ctx, &intr->dest, i, retval, out_type);
3381 }
3382 return true;
3383 }
3384
3385 static bool
emit_load_interpolated_input(struct ntd_context * ctx,nir_intrinsic_instr * intr)3386 emit_load_interpolated_input(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3387 {
3388 nir_intrinsic_instr *barycentric = nir_src_as_intrinsic(intr->src[0]);
3389
3390 const struct dxil_value *args[6] = { 0 };
3391
3392 unsigned opcode_val;
3393 const char *func_name;
3394 unsigned num_args;
3395 switch (barycentric->intrinsic) {
3396 case nir_intrinsic_load_barycentric_at_offset:
3397 opcode_val = DXIL_INTR_EVAL_SNAPPED;
3398 func_name = "dx.op.evalSnapped";
3399 num_args = 6;
3400 for (unsigned i = 0; i < 2; ++i) {
3401 const struct dxil_value *float_offset = get_src(ctx, &barycentric->src[0], i, nir_type_float);
3402 /* GLSL uses [-0.5f, 0.5f), DXIL uses (-8, 7) */
3403 const struct dxil_value *offset_16 = dxil_emit_binop(&ctx->mod,
3404 DXIL_BINOP_MUL, float_offset, dxil_module_get_float_const(&ctx->mod, 16.0f), 0);
3405 args[i + 4] = dxil_emit_cast(&ctx->mod, DXIL_CAST_FPTOSI,
3406 dxil_module_get_int_type(&ctx->mod, 32), offset_16);
3407 }
3408 break;
3409 case nir_intrinsic_load_barycentric_pixel:
3410 opcode_val = DXIL_INTR_EVAL_SNAPPED;
3411 func_name = "dx.op.evalSnapped";
3412 num_args = 6;
3413 args[4] = args[5] = dxil_module_get_int32_const(&ctx->mod, 0);
3414 break;
3415 case nir_intrinsic_load_barycentric_at_sample:
3416 opcode_val = DXIL_INTR_EVAL_SAMPLE_INDEX;
3417 func_name = "dx.op.evalSampleIndex";
3418 num_args = 5;
3419 args[4] = get_src(ctx, &barycentric->src[0], 0, nir_type_int);
3420 break;
3421 case nir_intrinsic_load_barycentric_centroid:
3422 opcode_val = DXIL_INTR_EVAL_CENTROID;
3423 func_name = "dx.op.evalCentroid";
3424 num_args = 4;
3425 break;
3426 default:
3427 unreachable("Unsupported interpolation barycentric intrinsic");
3428 }
3429 args[0] = dxil_module_get_int32_const(&ctx->mod, opcode_val);
3430 args[1] = dxil_module_get_int32_const(&ctx->mod, nir_intrinsic_base(intr));
3431 args[2] = get_src(ctx, &intr->src[1], 0, nir_type_int);
3432
3433 const struct dxil_func *func = dxil_get_function(&ctx->mod, func_name, DXIL_F32);
3434
3435 if (!func)
3436 return false;
3437
3438 nir_variable *var = find_patch_matching_variable_by_driver_location(ctx->shader, nir_var_shader_in, nir_intrinsic_base(intr), false);
3439 unsigned var_base_component = var ? var->data.location_frac : 0;
3440 unsigned base_component = nir_intrinsic_component(intr) - var_base_component;
3441
3442 if (ctx->mod.minor_validator >= 5) {
3443 struct dxil_signature_record *sig_rec =
3444 &ctx->mod.inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3445 unsigned comp_size = intr->dest.ssa.bit_size == 64 ? 2 : 1;
3446 unsigned comp_mask = (1 << (intr->num_components * comp_size)) - 1;
3447 comp_mask <<= (var_base_component * comp_size);
3448 for (unsigned r = 0; r < sig_rec->num_elements; ++r)
3449 sig_rec->elements[r].always_reads_mask |= (comp_mask & sig_rec->elements[r].mask);
3450
3451 if (!nir_src_is_const(intr->src[1])) {
3452 struct dxil_psv_signature_element *psv_rec =
3453 &ctx->mod.psv_inputs[ctx->mod.input_mappings[nir_intrinsic_base(intr)]];
3454 psv_rec->dynamic_mask_and_stream |= comp_mask;
3455 }
3456 }
3457
3458 for (unsigned i = 0; i < intr->num_components; ++i) {
3459 args[3] = dxil_module_get_int8_const(&ctx->mod, i + base_component);
3460
3461 const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, num_args);
3462 if (!retval)
3463 return false;
3464 store_dest(ctx, &intr->dest, i, retval, nir_type_float);
3465 }
3466 return true;
3467 }
3468
3469 static bool
emit_load_ptr(struct ntd_context * ctx,nir_intrinsic_instr * intr)3470 emit_load_ptr(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3471 {
3472 struct nir_variable *var =
3473 nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
3474
3475 const struct dxil_value *index =
3476 get_src(ctx, &intr->src[1], 0, nir_type_uint);
3477 if (!index)
3478 return false;
3479
3480 const struct dxil_value *ptr = emit_gep_for_index(ctx, var, index);
3481 if (!ptr)
3482 return false;
3483
3484 const struct dxil_value *retval =
3485 dxil_emit_load(&ctx->mod, ptr, 4, false);
3486 if (!retval)
3487 return false;
3488
3489 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3490 return true;
3491 }
3492
3493 static bool
emit_load_shared(struct ntd_context * ctx,nir_intrinsic_instr * intr)3494 emit_load_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3495 {
3496 const struct dxil_value *zero, *index;
3497 unsigned bit_size = nir_dest_bit_size(intr->dest);
3498 unsigned align = bit_size / 8;
3499
3500 /* All shared mem accesses should have been lowered to scalar 32bit
3501 * accesses.
3502 */
3503 assert(bit_size == 32);
3504 assert(nir_dest_num_components(intr->dest) == 1);
3505
3506 zero = dxil_module_get_int32_const(&ctx->mod, 0);
3507 if (!zero)
3508 return false;
3509
3510 index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3511 if (!index)
3512 return false;
3513
3514 const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
3515 const struct dxil_value *ptr, *retval;
3516
3517 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3518 if (!ptr)
3519 return false;
3520
3521 retval = dxil_emit_load(&ctx->mod, ptr, align, false);
3522 if (!retval)
3523 return false;
3524
3525 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3526 return true;
3527 }
3528
3529 static bool
emit_load_scratch(struct ntd_context * ctx,nir_intrinsic_instr * intr)3530 emit_load_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3531 {
3532 const struct dxil_value *zero, *index;
3533 unsigned bit_size = nir_dest_bit_size(intr->dest);
3534 unsigned align = bit_size / 8;
3535
3536 /* All scratch mem accesses should have been lowered to scalar 32bit
3537 * accesses.
3538 */
3539 assert(bit_size == 32);
3540 assert(nir_dest_num_components(intr->dest) == 1);
3541
3542 zero = dxil_module_get_int32_const(&ctx->mod, 0);
3543 if (!zero)
3544 return false;
3545
3546 index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
3547 if (!index)
3548 return false;
3549
3550 const struct dxil_value *ops[] = { ctx->scratchvars, zero, index };
3551 const struct dxil_value *ptr, *retval;
3552
3553 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
3554 if (!ptr)
3555 return false;
3556
3557 retval = dxil_emit_load(&ctx->mod, ptr, align, false);
3558 if (!retval)
3559 return false;
3560
3561 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3562 return true;
3563 }
3564
3565 static bool
emit_discard_if_with_value(struct ntd_context * ctx,const struct dxil_value * value)3566 emit_discard_if_with_value(struct ntd_context *ctx, const struct dxil_value *value)
3567 {
3568 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DISCARD);
3569 if (!opcode)
3570 return false;
3571
3572 const struct dxil_value *args[] = {
3573 opcode,
3574 value
3575 };
3576
3577 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.discard", DXIL_NONE);
3578 if (!func)
3579 return false;
3580
3581 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3582 }
3583
3584 static bool
emit_discard_if(struct ntd_context * ctx,nir_intrinsic_instr * intr)3585 emit_discard_if(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3586 {
3587 const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_bool);
3588 if (!value)
3589 return false;
3590
3591 return emit_discard_if_with_value(ctx, value);
3592 }
3593
3594 static bool
emit_discard(struct ntd_context * ctx)3595 emit_discard(struct ntd_context *ctx)
3596 {
3597 const struct dxil_value *value = dxil_module_get_int1_const(&ctx->mod, true);
3598 return emit_discard_if_with_value(ctx, value);
3599 }
3600
3601 static bool
emit_emit_vertex(struct ntd_context * ctx,nir_intrinsic_instr * intr)3602 emit_emit_vertex(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3603 {
3604 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_EMIT_STREAM);
3605 const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
3606 if (!opcode || !stream_id)
3607 return false;
3608
3609 const struct dxil_value *args[] = {
3610 opcode,
3611 stream_id
3612 };
3613
3614 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.emitStream", DXIL_NONE);
3615 if (!func)
3616 return false;
3617
3618 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3619 }
3620
3621 static bool
emit_end_primitive(struct ntd_context * ctx,nir_intrinsic_instr * intr)3622 emit_end_primitive(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3623 {
3624 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CUT_STREAM);
3625 const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));
3626 if (!opcode || !stream_id)
3627 return false;
3628
3629 const struct dxil_value *args[] = {
3630 opcode,
3631 stream_id
3632 };
3633
3634 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cutStream", DXIL_NONE);
3635 if (!func)
3636 return false;
3637
3638 return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));
3639 }
3640
3641 static bool
emit_image_store(struct ntd_context * ctx,nir_intrinsic_instr * intr)3642 emit_image_store(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3643 {
3644 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
3645 if (!handle)
3646 return false;
3647
3648 bool is_array = false;
3649 if (intr->intrinsic == nir_intrinsic_image_deref_store)
3650 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
3651 else
3652 is_array = nir_intrinsic_image_array(intr);
3653
3654 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3655 if (!int32_undef)
3656 return false;
3657
3658 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
3659 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_store ?
3660 nir_intrinsic_image_dim(intr) :
3661 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type);
3662 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
3663 if (is_array)
3664 ++num_coords;
3665
3666 assert(num_coords <= nir_src_num_components(intr->src[1]));
3667 for (unsigned i = 0; i < num_coords; ++i) {
3668 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
3669 if (!coord[i])
3670 return false;
3671 }
3672
3673 nir_alu_type in_type = nir_intrinsic_src_type(intr);
3674 enum overload_type overload = get_overload(in_type, 32);
3675
3676 assert(nir_src_bit_size(intr->src[3]) == 32);
3677 unsigned num_components = nir_src_num_components(intr->src[3]);
3678 assert(num_components <= 4);
3679 const struct dxil_value *value[4];
3680 for (unsigned i = 0; i < num_components; ++i) {
3681 value[i] = get_src(ctx, &intr->src[3], i, in_type);
3682 if (!value[i])
3683 return false;
3684 }
3685
3686 for (int i = num_components; i < 4; ++i)
3687 value[i] = int32_undef;
3688
3689 const struct dxil_value *write_mask =
3690 dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);
3691 if (!write_mask)
3692 return false;
3693
3694 if (image_dim == GLSL_SAMPLER_DIM_BUF) {
3695 coord[1] = int32_undef;
3696 return emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);
3697 } else
3698 return emit_texturestore_call(ctx, handle, coord, value, write_mask, overload);
3699 }
3700
3701 static bool
emit_image_load(struct ntd_context * ctx,nir_intrinsic_instr * intr)3702 emit_image_load(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3703 {
3704 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
3705 if (!handle)
3706 return false;
3707
3708 bool is_array = false;
3709 if (intr->intrinsic == nir_intrinsic_image_deref_load)
3710 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
3711 else
3712 is_array = nir_intrinsic_image_array(intr);
3713
3714 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3715 if (!int32_undef)
3716 return false;
3717
3718 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
3719 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_load ?
3720 nir_intrinsic_image_dim(intr) :
3721 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type);
3722 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
3723 if (is_array)
3724 ++num_coords;
3725
3726 assert(num_coords <= nir_src_num_components(intr->src[1]));
3727 for (unsigned i = 0; i < num_coords; ++i) {
3728 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
3729 if (!coord[i])
3730 return false;
3731 }
3732
3733 nir_alu_type out_type = nir_intrinsic_dest_type(intr);
3734 enum overload_type overload = get_overload(out_type, 32);
3735
3736 const struct dxil_value *load_result;
3737 if (image_dim == GLSL_SAMPLER_DIM_BUF) {
3738 coord[1] = int32_undef;
3739 load_result = emit_bufferload_call(ctx, handle, coord, overload);
3740 } else
3741 load_result = emit_textureload_call(ctx, handle, coord, overload);
3742
3743 if (!load_result)
3744 return false;
3745
3746 assert(nir_dest_bit_size(intr->dest) == 32);
3747 unsigned num_components = nir_dest_num_components(intr->dest);
3748 assert(num_components <= 4);
3749 for (unsigned i = 0; i < num_components; ++i) {
3750 const struct dxil_value *component = dxil_emit_extractval(&ctx->mod, load_result, i);
3751 if (!component)
3752 return false;
3753 store_dest(ctx, &intr->dest, i, component, out_type);
3754 }
3755
3756 /* FIXME: This flag should be set to true when the RWTexture is attached
3757 * a vector, and we always declare a vec4 right now, so it should always be
3758 * true. Might be worth reworking the dxil_module_get_res_type() to use a
3759 * scalar when the image only has one component.
3760 */
3761 ctx->mod.feats.typed_uav_load_additional_formats = true;
3762
3763 return true;
3764 }
3765
3766 static bool
emit_image_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_atomic_op op,nir_alu_type type)3767 emit_image_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr,
3768 enum dxil_atomic_op op, nir_alu_type type)
3769 {
3770 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
3771 if (!handle)
3772 return false;
3773
3774 bool is_array = false;
3775 nir_deref_instr *src_as_deref = nir_src_as_deref(intr->src[0]);
3776 if (src_as_deref)
3777 is_array = glsl_sampler_type_is_array(src_as_deref->type);
3778 else
3779 is_array = nir_intrinsic_image_array(intr);
3780
3781 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3782 if (!int32_undef)
3783 return false;
3784
3785 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
3786 enum glsl_sampler_dim image_dim = src_as_deref ?
3787 glsl_get_sampler_dim(src_as_deref->type) :
3788 nir_intrinsic_image_dim(intr);
3789 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
3790 if (is_array)
3791 ++num_coords;
3792
3793 assert(num_coords <= nir_src_num_components(intr->src[1]));
3794 for (unsigned i = 0; i < num_coords; ++i) {
3795 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
3796 if (!coord[i])
3797 return false;
3798 }
3799
3800 const struct dxil_value *value = get_src(ctx, &intr->src[3], 0, type);
3801 if (!value)
3802 return false;
3803
3804 const struct dxil_value *retval =
3805 emit_atomic_binop(ctx, handle, op, coord, value);
3806
3807 if (!retval)
3808 return false;
3809
3810 store_dest(ctx, &intr->dest, 0, retval, type);
3811 return true;
3812 }
3813
3814 static bool
emit_image_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)3815 emit_image_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3816 {
3817 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
3818 if (!handle)
3819 return false;
3820
3821 bool is_array = false;
3822 if (intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap)
3823 is_array = glsl_sampler_type_is_array(nir_src_as_deref(intr->src[0])->type);
3824 else
3825 is_array = nir_intrinsic_image_array(intr);
3826
3827 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3828 if (!int32_undef)
3829 return false;
3830
3831 const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };
3832 enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_atomic_comp_swap ?
3833 nir_intrinsic_image_dim(intr) :
3834 glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type);
3835 unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);
3836 if (is_array)
3837 ++num_coords;
3838
3839 assert(num_coords <= nir_src_num_components(intr->src[1]));
3840 for (unsigned i = 0; i < num_coords; ++i) {
3841 coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);
3842 if (!coord[i])
3843 return false;
3844 }
3845
3846 const struct dxil_value *cmpval = get_src(ctx, &intr->src[3], 0, nir_type_uint);
3847 const struct dxil_value *newval = get_src(ctx, &intr->src[4], 0, nir_type_uint);
3848 if (!cmpval || !newval)
3849 return false;
3850
3851 const struct dxil_value *retval =
3852 emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
3853
3854 if (!retval)
3855 return false;
3856
3857 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3858 return true;
3859 }
3860
3861 struct texop_parameters {
3862 const struct dxil_value *tex;
3863 const struct dxil_value *sampler;
3864 const struct dxil_value *bias, *lod_or_sample, *min_lod;
3865 const struct dxil_value *coord[4], *offset[3], *dx[3], *dy[3];
3866 const struct dxil_value *cmp;
3867 enum overload_type overload;
3868 };
3869
3870 static const struct dxil_value *
emit_texture_size(struct ntd_context * ctx,struct texop_parameters * params)3871 emit_texture_size(struct ntd_context *ctx, struct texop_parameters *params)
3872 {
3873 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.getDimensions", DXIL_NONE);
3874 if (!func)
3875 return false;
3876
3877 const struct dxil_value *args[] = {
3878 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_SIZE),
3879 params->tex,
3880 params->lod_or_sample
3881 };
3882
3883 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
3884 }
3885
3886 static bool
emit_image_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)3887 emit_image_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3888 {
3889 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_TEXTURE2D);
3890 if (!handle)
3891 return false;
3892
3893 const struct dxil_value *lod = get_src(ctx, &intr->src[1], 0, nir_type_uint);
3894 if (!lod)
3895 return false;
3896
3897 struct texop_parameters params = {
3898 .tex = handle,
3899 .lod_or_sample = lod
3900 };
3901 const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms);
3902 if (!dimensions)
3903 return false;
3904
3905 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
3906 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, i);
3907 store_dest(ctx, &intr->dest, i, retval, nir_type_uint);
3908 }
3909
3910 return true;
3911 }
3912
3913 static bool
emit_get_ssbo_size(struct ntd_context * ctx,nir_intrinsic_instr * intr)3914 emit_get_ssbo_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3915 {
3916 enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;
3917 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
3918 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
3919 if (var && var->data.access & ACCESS_NON_WRITEABLE)
3920 class = DXIL_RESOURCE_CLASS_SRV;
3921 }
3922
3923 const struct dxil_value *handle = get_resource_handle(ctx, &intr->src[0], class, DXIL_RESOURCE_KIND_RAW_BUFFER);
3924 if (!handle)
3925 return false;
3926
3927 struct texop_parameters params = {
3928 .tex = handle,
3929 .lod_or_sample = dxil_module_get_undef(
3930 &ctx->mod, dxil_module_get_int_type(&ctx->mod, 32))
3931 };
3932
3933 const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms);
3934 if (!dimensions)
3935 return false;
3936
3937 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, 0);
3938 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
3939
3940 return true;
3941 }
3942
3943 static bool
emit_ssbo_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_atomic_op op,nir_alu_type type)3944 emit_ssbo_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr,
3945 enum dxil_atomic_op op, nir_alu_type type)
3946 {
3947 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
3948 const struct dxil_value *offset =
3949 get_src(ctx, &intr->src[1], 0, nir_type_uint);
3950 const struct dxil_value *value =
3951 get_src(ctx, &intr->src[2], 0, type);
3952
3953 if (!value || !handle || !offset)
3954 return false;
3955
3956 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3957 if (!int32_undef)
3958 return false;
3959
3960 const struct dxil_value *coord[3] = {
3961 offset, int32_undef, int32_undef
3962 };
3963
3964 const struct dxil_value *retval =
3965 emit_atomic_binop(ctx, handle, op, coord, value);
3966
3967 if (!retval)
3968 return false;
3969
3970 store_dest(ctx, &intr->dest, 0, retval, type);
3971 return true;
3972 }
3973
3974 static bool
emit_ssbo_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)3975 emit_ssbo_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
3976 {
3977 const struct dxil_value* handle = get_resource_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, DXIL_RESOURCE_KIND_RAW_BUFFER);
3978 const struct dxil_value *offset =
3979 get_src(ctx, &intr->src[1], 0, nir_type_uint);
3980 const struct dxil_value *cmpval =
3981 get_src(ctx, &intr->src[2], 0, nir_type_int);
3982 const struct dxil_value *newval =
3983 get_src(ctx, &intr->src[3], 0, nir_type_int);
3984
3985 if (!cmpval || !newval || !handle || !offset)
3986 return false;
3987
3988 const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);
3989 if (!int32_undef)
3990 return false;
3991
3992 const struct dxil_value *coord[3] = {
3993 offset, int32_undef, int32_undef
3994 };
3995
3996 const struct dxil_value *retval =
3997 emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);
3998
3999 if (!retval)
4000 return false;
4001
4002 store_dest(ctx, &intr->dest, 0, retval, nir_type_int);
4003 return true;
4004 }
4005
4006 static bool
emit_shared_atomic(struct ntd_context * ctx,nir_intrinsic_instr * intr,enum dxil_rmw_op op,nir_alu_type type)4007 emit_shared_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr,
4008 enum dxil_rmw_op op, nir_alu_type type)
4009 {
4010 const struct dxil_value *zero, *index;
4011
4012 assert(nir_src_bit_size(intr->src[1]) == 32);
4013
4014 zero = dxil_module_get_int32_const(&ctx->mod, 0);
4015 if (!zero)
4016 return false;
4017
4018 index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
4019 if (!index)
4020 return false;
4021
4022 const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
4023 const struct dxil_value *ptr, *value, *retval;
4024
4025 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
4026 if (!ptr)
4027 return false;
4028
4029 value = get_src(ctx, &intr->src[1], 0, type);
4030 if (!value)
4031 return false;
4032
4033 retval = dxil_emit_atomicrmw(&ctx->mod, value, ptr, op, false,
4034 DXIL_ATOMIC_ORDERING_ACQREL,
4035 DXIL_SYNC_SCOPE_CROSSTHREAD);
4036 if (!retval)
4037 return false;
4038
4039 store_dest(ctx, &intr->dest, 0, retval, type);
4040 return true;
4041 }
4042
4043 static bool
emit_shared_atomic_comp_swap(struct ntd_context * ctx,nir_intrinsic_instr * intr)4044 emit_shared_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4045 {
4046 const struct dxil_value *zero, *index;
4047
4048 assert(nir_src_bit_size(intr->src[1]) == 32);
4049
4050 zero = dxil_module_get_int32_const(&ctx->mod, 0);
4051 if (!zero)
4052 return false;
4053
4054 index = get_src(ctx, &intr->src[0], 0, nir_type_uint);
4055 if (!index)
4056 return false;
4057
4058 const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };
4059 const struct dxil_value *ptr, *cmpval, *newval, *retval;
4060
4061 ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));
4062 if (!ptr)
4063 return false;
4064
4065 cmpval = get_src(ctx, &intr->src[1], 0, nir_type_uint);
4066 newval = get_src(ctx, &intr->src[2], 0, nir_type_uint);
4067 if (!cmpval || !newval)
4068 return false;
4069
4070 retval = dxil_emit_cmpxchg(&ctx->mod, cmpval, newval, ptr, false,
4071 DXIL_ATOMIC_ORDERING_ACQREL,
4072 DXIL_SYNC_SCOPE_CROSSTHREAD);
4073 if (!retval)
4074 return false;
4075
4076 store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);
4077 return true;
4078 }
4079
4080 static bool
emit_vulkan_resource_index(struct ntd_context * ctx,nir_intrinsic_instr * intr)4081 emit_vulkan_resource_index(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4082 {
4083 unsigned int binding = nir_intrinsic_binding(intr);
4084
4085 bool const_index = nir_src_is_const(intr->src[0]);
4086 if (const_index) {
4087 binding += nir_src_as_const_value(intr->src[0])->u32;
4088 }
4089
4090 const struct dxil_value *index_value = dxil_module_get_int32_const(&ctx->mod, binding);
4091 if (!index_value)
4092 return false;
4093
4094 if (!const_index) {
4095 const struct dxil_value *offset = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4096 if (!offset)
4097 return false;
4098
4099 index_value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, index_value, offset, 0);
4100 if (!index_value)
4101 return false;
4102 }
4103
4104 store_dest(ctx, &intr->dest, 0, index_value, nir_type_uint32);
4105 store_dest(ctx, &intr->dest, 1, dxil_module_get_int32_const(&ctx->mod, 0), nir_type_uint32);
4106 return true;
4107 }
4108
4109 static bool
emit_load_vulkan_descriptor(struct ntd_context * ctx,nir_intrinsic_instr * intr)4110 emit_load_vulkan_descriptor(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4111 {
4112 nir_intrinsic_instr* index = nir_src_as_intrinsic(intr->src[0]);
4113 /* We currently do not support reindex */
4114 assert(index && index->intrinsic == nir_intrinsic_vulkan_resource_index);
4115
4116 unsigned binding = nir_intrinsic_binding(index);
4117 unsigned space = nir_intrinsic_desc_set(index);
4118
4119 /* The descriptor_set field for variables is only 5 bits. We shouldn't have intrinsics trying to go beyond that. */
4120 assert(space < 32);
4121
4122 nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));
4123
4124 const struct dxil_value *handle = NULL;
4125 enum dxil_resource_class resource_class;
4126
4127 switch (nir_intrinsic_desc_type(intr)) {
4128 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4129 resource_class = DXIL_RESOURCE_CLASS_CBV;
4130 break;
4131 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4132 if (var->data.access & ACCESS_NON_WRITEABLE)
4133 resource_class = DXIL_RESOURCE_CLASS_SRV;
4134 else
4135 resource_class = DXIL_RESOURCE_CLASS_UAV;
4136 break;
4137 default:
4138 unreachable("unknown descriptor type");
4139 return false;
4140 }
4141
4142 const struct dxil_value *index_value = get_src(ctx, &intr->src[0], 0, nir_type_uint32);
4143 if (!index_value)
4144 return false;
4145
4146 handle = emit_createhandle_call(ctx, resource_class,
4147 get_resource_id(ctx, resource_class, space, binding),
4148 index_value, false);
4149
4150 store_dest_value(ctx, &intr->dest, 0, handle);
4151 store_dest(ctx, &intr->dest, 1, get_src(ctx, &intr->src[0], 1, nir_type_uint32), nir_type_uint32);
4152
4153 return true;
4154 }
4155
4156 static bool
emit_load_sample_pos_from_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4157 emit_load_sample_pos_from_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4158 {
4159 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.renderTargetGetSamplePosition", DXIL_NONE);
4160 if (!func)
4161 return false;
4162
4163 const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_RENDER_TARGET_GET_SAMPLE_POSITION);
4164 if (!opcode)
4165 return false;
4166
4167 const struct dxil_value *args[] = {
4168 opcode,
4169 get_src(ctx, &intr->src[0], 0, nir_type_uint32),
4170 };
4171 if (!args[1])
4172 return false;
4173
4174 const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4175 if (!v)
4176 return false;
4177
4178 for (unsigned i = 0; i < 2; ++i) {
4179 /* GL coords go from 0 -> 1, D3D from -0.5 -> 0.5 */
4180 const struct dxil_value *coord = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
4181 dxil_emit_extractval(&ctx->mod, v, i),
4182 dxil_module_get_float_const(&ctx->mod, 0.5f), 0);
4183 store_dest(ctx, &intr->dest, i, coord, nir_type_float32);
4184 }
4185 return true;
4186 }
4187
4188 static bool
emit_load_layer_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4189 emit_load_layer_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4190 {
4191 const struct dxil_value *layer_id = dxil_module_get_int32_const(&ctx->mod, 0);
4192 /* TODO: Properly implement this once multi-view is supported */
4193 store_dest_value(ctx, &intr->dest, 0, layer_id);
4194 return true;
4195 }
4196
4197 static bool
emit_load_sample_id(struct ntd_context * ctx,nir_intrinsic_instr * intr)4198 emit_load_sample_id(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4199 {
4200 assert(ctx->mod.info.has_per_sample_input ||
4201 intr->intrinsic == nir_intrinsic_load_sample_id_no_per_sample);
4202
4203 if (ctx->mod.info.has_per_sample_input)
4204 return emit_load_unary_external_function(ctx, intr, "dx.op.sampleIndex",
4205 DXIL_INTR_SAMPLE_INDEX);
4206
4207 store_dest_value(ctx, &intr->dest, 0, dxil_module_get_int32_const(&ctx->mod, 0));
4208 return true;
4209 }
4210
4211 static bool
emit_intrinsic(struct ntd_context * ctx,nir_intrinsic_instr * intr)4212 emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)
4213 {
4214 switch (intr->intrinsic) {
4215 case nir_intrinsic_load_global_invocation_id:
4216 case nir_intrinsic_load_global_invocation_id_zero_base:
4217 return emit_load_global_invocation_id(ctx, intr);
4218 case nir_intrinsic_load_local_invocation_id:
4219 return emit_load_local_invocation_id(ctx, intr);
4220 case nir_intrinsic_load_local_invocation_index:
4221 return emit_load_local_invocation_index(ctx, intr);
4222 case nir_intrinsic_load_workgroup_id:
4223 case nir_intrinsic_load_workgroup_id_zero_base:
4224 return emit_load_local_workgroup_id(ctx, intr);
4225 case nir_intrinsic_load_ssbo:
4226 return emit_load_ssbo(ctx, intr);
4227 case nir_intrinsic_store_ssbo:
4228 return emit_store_ssbo(ctx, intr);
4229 case nir_intrinsic_store_ssbo_masked_dxil:
4230 return emit_store_ssbo_masked(ctx, intr);
4231 case nir_intrinsic_store_shared_dxil:
4232 case nir_intrinsic_store_shared_masked_dxil:
4233 return emit_store_shared(ctx, intr);
4234 case nir_intrinsic_store_scratch_dxil:
4235 return emit_store_scratch(ctx, intr);
4236 case nir_intrinsic_load_ptr_dxil:
4237 return emit_load_ptr(ctx, intr);
4238 case nir_intrinsic_load_ubo:
4239 return emit_load_ubo(ctx, intr);
4240 case nir_intrinsic_load_ubo_dxil:
4241 return emit_load_ubo_dxil(ctx, intr);
4242 case nir_intrinsic_load_primitive_id:
4243 return emit_load_unary_external_function(ctx, intr, "dx.op.primitiveID",
4244 DXIL_INTR_PRIMITIVE_ID);
4245 case nir_intrinsic_load_sample_id:
4246 case nir_intrinsic_load_sample_id_no_per_sample:
4247 return emit_load_sample_id(ctx, intr);
4248 case nir_intrinsic_load_invocation_id:
4249 switch (ctx->mod.shader_kind) {
4250 case DXIL_HULL_SHADER:
4251 return emit_load_unary_external_function(ctx, intr, "dx.op.outputControlPointID",
4252 DXIL_INTR_OUTPUT_CONTROL_POINT_ID);
4253 case DXIL_GEOMETRY_SHADER:
4254 return emit_load_unary_external_function(ctx, intr, "dx.op.gsInstanceID",
4255 DXIL_INTR_GS_INSTANCE_ID);
4256 default:
4257 unreachable("Unexpected shader kind for invocation ID");
4258 }
4259 case nir_intrinsic_load_sample_mask_in:
4260 return emit_load_sample_mask_in(ctx, intr);
4261 case nir_intrinsic_load_tess_coord:
4262 return emit_load_tess_coord(ctx, intr);
4263 case nir_intrinsic_load_shared_dxil:
4264 return emit_load_shared(ctx, intr);
4265 case nir_intrinsic_load_scratch_dxil:
4266 return emit_load_scratch(ctx, intr);
4267 case nir_intrinsic_discard_if:
4268 case nir_intrinsic_demote_if:
4269 return emit_discard_if(ctx, intr);
4270 case nir_intrinsic_discard:
4271 case nir_intrinsic_demote:
4272 return emit_discard(ctx);
4273 case nir_intrinsic_emit_vertex:
4274 return emit_emit_vertex(ctx, intr);
4275 case nir_intrinsic_end_primitive:
4276 return emit_end_primitive(ctx, intr);
4277 case nir_intrinsic_scoped_barrier:
4278 return emit_barrier(ctx, intr);
4279 case nir_intrinsic_memory_barrier:
4280 case nir_intrinsic_memory_barrier_buffer:
4281 case nir_intrinsic_memory_barrier_image:
4282 case nir_intrinsic_memory_barrier_atomic_counter:
4283 return emit_memory_barrier(ctx, intr);
4284 case nir_intrinsic_memory_barrier_shared:
4285 return emit_memory_barrier_shared(ctx, intr);
4286 case nir_intrinsic_group_memory_barrier:
4287 return emit_group_memory_barrier(ctx, intr);
4288 case nir_intrinsic_control_barrier:
4289 return emit_control_barrier(ctx, intr);
4290 case nir_intrinsic_ssbo_atomic_add:
4291 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_ADD, nir_type_int);
4292 case nir_intrinsic_ssbo_atomic_imin:
4293 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMIN, nir_type_int);
4294 case nir_intrinsic_ssbo_atomic_umin:
4295 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMIN, nir_type_uint);
4296 case nir_intrinsic_ssbo_atomic_imax:
4297 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_int);
4298 case nir_intrinsic_ssbo_atomic_umax:
4299 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMAX, nir_type_uint);
4300 case nir_intrinsic_ssbo_atomic_and:
4301 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_AND, nir_type_uint);
4302 case nir_intrinsic_ssbo_atomic_or:
4303 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_OR, nir_type_uint);
4304 case nir_intrinsic_ssbo_atomic_xor:
4305 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_XOR, nir_type_uint);
4306 case nir_intrinsic_ssbo_atomic_exchange:
4307 return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_EXCHANGE, nir_type_int);
4308 case nir_intrinsic_ssbo_atomic_comp_swap:
4309 return emit_ssbo_atomic_comp_swap(ctx, intr);
4310 case nir_intrinsic_shared_atomic_add_dxil:
4311 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_ADD, nir_type_int);
4312 case nir_intrinsic_shared_atomic_imin_dxil:
4313 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MIN, nir_type_int);
4314 case nir_intrinsic_shared_atomic_umin_dxil:
4315 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMIN, nir_type_uint);
4316 case nir_intrinsic_shared_atomic_imax_dxil:
4317 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MAX, nir_type_int);
4318 case nir_intrinsic_shared_atomic_umax_dxil:
4319 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMAX, nir_type_uint);
4320 case nir_intrinsic_shared_atomic_and_dxil:
4321 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_AND, nir_type_uint);
4322 case nir_intrinsic_shared_atomic_or_dxil:
4323 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_OR, nir_type_uint);
4324 case nir_intrinsic_shared_atomic_xor_dxil:
4325 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XOR, nir_type_uint);
4326 case nir_intrinsic_shared_atomic_exchange_dxil:
4327 return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XCHG, nir_type_int);
4328 case nir_intrinsic_shared_atomic_comp_swap_dxil:
4329 return emit_shared_atomic_comp_swap(ctx, intr);
4330 case nir_intrinsic_image_deref_atomic_add:
4331 case nir_intrinsic_image_atomic_add:
4332 return emit_image_atomic(ctx, intr, DXIL_ATOMIC_ADD, nir_type_int);
4333 case nir_intrinsic_image_deref_atomic_imin:
4334 case nir_intrinsic_image_atomic_imin:
4335 return emit_image_atomic(ctx, intr, DXIL_ATOMIC_IMIN, nir_type_int);
4336 case nir_intrinsic_image_deref_atomic_umin:
4337 case nir_intrinsic_image_atomic_umin:
4338 return emit_image_atomic(ctx, intr, DXIL_ATOMIC_UMIN, nir_type_uint);
4339 case nir_intrinsic_image_deref_atomic_imax:
4340 case nir_intrinsic_image_atomic_imax:
4341 return emit_image_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_int);
4342 case nir_intrinsic_image_deref_atomic_umax:
4343 case nir_intrinsic_image_atomic_umax:
4344 return emit_image_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_uint);
4345 case nir_intrinsic_image_deref_atomic_and:
4346 case nir_intrinsic_image_atomic_and:
4347 return emit_image_atomic(ctx, intr, DXIL_ATOMIC_AND, nir_type_uint);
4348 case nir_intrinsic_image_deref_atomic_or:
4349 case nir_intrinsic_image_atomic_or:
4350 return emit_image_atomic(ctx, intr, DXIL_ATOMIC_OR, nir_type_uint);
4351 case nir_intrinsic_image_deref_atomic_xor:
4352 case nir_intrinsic_image_atomic_xor:
4353 return emit_image_atomic(ctx, intr, DXIL_ATOMIC_XOR, nir_type_uint);
4354 case nir_intrinsic_image_deref_atomic_exchange:
4355 case nir_intrinsic_image_atomic_exchange:
4356 return emit_image_atomic(ctx, intr, DXIL_ATOMIC_EXCHANGE, nir_type_uint);
4357 case nir_intrinsic_image_deref_atomic_comp_swap:
4358 case nir_intrinsic_image_atomic_comp_swap:
4359 return emit_image_atomic_comp_swap(ctx, intr);
4360 case nir_intrinsic_image_store:
4361 case nir_intrinsic_image_deref_store:
4362 return emit_image_store(ctx, intr);
4363 case nir_intrinsic_image_load:
4364 case nir_intrinsic_image_deref_load:
4365 return emit_image_load(ctx, intr);
4366 case nir_intrinsic_image_size:
4367 case nir_intrinsic_image_deref_size:
4368 return emit_image_size(ctx, intr);
4369 case nir_intrinsic_get_ssbo_size:
4370 return emit_get_ssbo_size(ctx, intr);
4371 case nir_intrinsic_load_input:
4372 case nir_intrinsic_load_per_vertex_input:
4373 case nir_intrinsic_load_output:
4374 case nir_intrinsic_load_per_vertex_output:
4375 return emit_load_input_via_intrinsic(ctx, intr);
4376 case nir_intrinsic_store_output:
4377 case nir_intrinsic_store_per_vertex_output:
4378 return emit_store_output_via_intrinsic(ctx, intr);
4379
4380 case nir_intrinsic_load_barycentric_at_offset:
4381 case nir_intrinsic_load_barycentric_at_sample:
4382 case nir_intrinsic_load_barycentric_centroid:
4383 case nir_intrinsic_load_barycentric_pixel:
4384 /* Emit nothing, we only support these as inputs to load_interpolated_input */
4385 return true;
4386 case nir_intrinsic_load_interpolated_input:
4387 return emit_load_interpolated_input(ctx, intr);
4388 break;
4389
4390 case nir_intrinsic_vulkan_resource_index:
4391 return emit_vulkan_resource_index(ctx, intr);
4392 case nir_intrinsic_load_vulkan_descriptor:
4393 return emit_load_vulkan_descriptor(ctx, intr);
4394 case nir_intrinsic_load_layer_id:
4395 return emit_load_layer_id(ctx, intr);
4396
4397 case nir_intrinsic_load_sample_pos_from_id:
4398 return emit_load_sample_pos_from_id(ctx, intr);
4399
4400 case nir_intrinsic_load_num_workgroups:
4401 case nir_intrinsic_load_workgroup_size:
4402 default:
4403 NIR_INSTR_UNSUPPORTED(&intr->instr);
4404 unreachable("Unimplemented intrinsic instruction");
4405 return false;
4406 }
4407 }
4408
4409 static bool
emit_load_const(struct ntd_context * ctx,nir_load_const_instr * load_const)4410 emit_load_const(struct ntd_context *ctx, nir_load_const_instr *load_const)
4411 {
4412 for (int i = 0; i < load_const->def.num_components; ++i) {
4413 const struct dxil_value *value;
4414 switch (load_const->def.bit_size) {
4415 case 1:
4416 value = dxil_module_get_int1_const(&ctx->mod,
4417 load_const->value[i].b);
4418 break;
4419 case 16:
4420 ctx->mod.feats.native_low_precision = true;
4421 value = dxil_module_get_int16_const(&ctx->mod,
4422 load_const->value[i].u16);
4423 break;
4424 case 32:
4425 value = dxil_module_get_int32_const(&ctx->mod,
4426 load_const->value[i].u32);
4427 break;
4428 case 64:
4429 ctx->mod.feats.int64_ops = true;
4430 value = dxil_module_get_int64_const(&ctx->mod,
4431 load_const->value[i].u64);
4432 break;
4433 default:
4434 unreachable("unexpected bit_size");
4435 }
4436 if (!value)
4437 return false;
4438
4439 store_ssa_def(ctx, &load_const->def, i, value);
4440 }
4441 return true;
4442 }
4443
4444 static bool
emit_deref(struct ntd_context * ctx,nir_deref_instr * instr)4445 emit_deref(struct ntd_context* ctx, nir_deref_instr* instr)
4446 {
4447 assert(instr->deref_type == nir_deref_type_var ||
4448 instr->deref_type == nir_deref_type_array);
4449
4450 /* In the CL environment, there's nothing to emit. Any references to
4451 * derefs will emit the necessary logic to handle scratch/shared GEP addressing
4452 */
4453 if (ctx->opts->environment == DXIL_ENVIRONMENT_CL)
4454 return true;
4455
4456 /* In the Vulkan environment, we don't have cached handles for textures or
4457 * samplers, so let's use the opportunity of walking through the derefs to
4458 * emit those.
4459 */
4460 nir_variable *var = nir_deref_instr_get_variable(instr);
4461 assert(var);
4462
4463 if (!glsl_type_is_sampler(glsl_without_array(var->type)) &&
4464 !glsl_type_is_image(glsl_without_array(var->type)) &&
4465 !glsl_type_is_texture(glsl_without_array(var->type)))
4466 return true;
4467
4468 const struct glsl_type *type = instr->type;
4469 const struct dxil_value *binding;
4470 unsigned binding_val = ctx->opts->environment == DXIL_ENVIRONMENT_GL ?
4471 var->data.driver_location : var->data.binding;
4472
4473 if (instr->deref_type == nir_deref_type_var) {
4474 binding = dxil_module_get_int32_const(&ctx->mod, binding_val);
4475 } else {
4476 const struct dxil_value *base = get_src(ctx, &instr->parent, 0, nir_type_uint32);
4477 const struct dxil_value *offset = get_src(ctx, &instr->arr.index, 0, nir_type_uint32);
4478 if (!base || !offset)
4479 return false;
4480
4481 if (glsl_type_is_array(instr->type)) {
4482 offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_MUL, offset,
4483 dxil_module_get_int32_const(&ctx->mod, glsl_get_aoa_size(instr->type)), 0);
4484 if (!offset)
4485 return false;
4486 }
4487 binding = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, base, offset, 0);
4488 }
4489
4490 if (!binding)
4491 return false;
4492
4493 /* Haven't finished chasing the deref chain yet, just store the value */
4494 if (glsl_type_is_array(type)) {
4495 store_dest(ctx, &instr->dest, 0, binding, nir_type_uint32);
4496 return true;
4497 }
4498
4499 assert(glsl_type_is_sampler(type) || glsl_type_is_image(type) || glsl_type_is_texture(type));
4500 enum dxil_resource_class res_class;
4501 if (glsl_type_is_image(type)) {
4502 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN &&
4503 (var->data.access & ACCESS_NON_WRITEABLE))
4504 res_class = DXIL_RESOURCE_CLASS_SRV;
4505 else
4506 res_class = DXIL_RESOURCE_CLASS_UAV;
4507 } else if (glsl_type_is_sampler(type)) {
4508 res_class = DXIL_RESOURCE_CLASS_SAMPLER;
4509 } else {
4510 res_class = DXIL_RESOURCE_CLASS_SRV;
4511 }
4512
4513 unsigned descriptor_set = ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN ?
4514 var->data.descriptor_set : (glsl_type_is_image(type) ? 1 : 0);
4515 const struct dxil_value *handle = emit_createhandle_call(ctx, res_class,
4516 get_resource_id(ctx, res_class, descriptor_set, binding_val), binding, false);
4517 if (!handle)
4518 return false;
4519
4520 store_dest_value(ctx, &instr->dest, 0, handle);
4521 return true;
4522 }
4523
4524 static bool
emit_cond_branch(struct ntd_context * ctx,const struct dxil_value * cond,int true_block,int false_block)4525 emit_cond_branch(struct ntd_context *ctx, const struct dxil_value *cond,
4526 int true_block, int false_block)
4527 {
4528 assert(cond);
4529 assert(true_block >= 0);
4530 assert(false_block >= 0);
4531 return dxil_emit_branch(&ctx->mod, cond, true_block, false_block);
4532 }
4533
4534 static bool
emit_branch(struct ntd_context * ctx,int block)4535 emit_branch(struct ntd_context *ctx, int block)
4536 {
4537 assert(block >= 0);
4538 return dxil_emit_branch(&ctx->mod, NULL, block, -1);
4539 }
4540
4541 static bool
emit_jump(struct ntd_context * ctx,nir_jump_instr * instr)4542 emit_jump(struct ntd_context *ctx, nir_jump_instr *instr)
4543 {
4544 switch (instr->type) {
4545 case nir_jump_break:
4546 case nir_jump_continue:
4547 assert(instr->instr.block->successors[0]);
4548 assert(!instr->instr.block->successors[1]);
4549 return emit_branch(ctx, instr->instr.block->successors[0]->index);
4550
4551 default:
4552 unreachable("Unsupported jump type\n");
4553 }
4554 }
4555
4556 struct phi_block {
4557 unsigned num_components;
4558 struct dxil_instr *comp[NIR_MAX_VEC_COMPONENTS];
4559 };
4560
4561 static bool
emit_phi(struct ntd_context * ctx,nir_phi_instr * instr)4562 emit_phi(struct ntd_context *ctx, nir_phi_instr *instr)
4563 {
4564 unsigned bit_size = nir_dest_bit_size(instr->dest);
4565 const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod,
4566 bit_size);
4567
4568 struct phi_block *vphi = ralloc(ctx->phis, struct phi_block);
4569 vphi->num_components = nir_dest_num_components(instr->dest);
4570
4571 for (unsigned i = 0; i < vphi->num_components; ++i) {
4572 struct dxil_instr *phi = vphi->comp[i] = dxil_emit_phi(&ctx->mod, type);
4573 if (!phi)
4574 return false;
4575 store_dest_value(ctx, &instr->dest, i, dxil_instr_get_return_value(phi));
4576 }
4577 _mesa_hash_table_insert(ctx->phis, instr, vphi);
4578 return true;
4579 }
4580
4581 static bool
fixup_phi(struct ntd_context * ctx,nir_phi_instr * instr,struct phi_block * vphi)4582 fixup_phi(struct ntd_context *ctx, nir_phi_instr *instr,
4583 struct phi_block *vphi)
4584 {
4585 const struct dxil_value *values[16];
4586 unsigned blocks[16];
4587 for (unsigned i = 0; i < vphi->num_components; ++i) {
4588 size_t num_incoming = 0;
4589 nir_foreach_phi_src(src, instr) {
4590 assert(src->src.is_ssa);
4591 const struct dxil_value *val = get_src_ssa(ctx, src->src.ssa, i);
4592 values[num_incoming] = val;
4593 blocks[num_incoming] = src->pred->index;
4594 ++num_incoming;
4595 if (num_incoming == ARRAY_SIZE(values)) {
4596 if (!dxil_phi_add_incoming(vphi->comp[i], values, blocks,
4597 num_incoming))
4598 return false;
4599 num_incoming = 0;
4600 }
4601 }
4602 if (num_incoming > 0 && !dxil_phi_add_incoming(vphi->comp[i], values,
4603 blocks, num_incoming))
4604 return false;
4605 }
4606 return true;
4607 }
4608
4609 static unsigned
get_n_src(struct ntd_context * ctx,const struct dxil_value ** values,unsigned max_components,nir_tex_src * src,nir_alu_type type)4610 get_n_src(struct ntd_context *ctx, const struct dxil_value **values,
4611 unsigned max_components, nir_tex_src *src, nir_alu_type type)
4612 {
4613 unsigned num_components = nir_src_num_components(src->src);
4614 unsigned i = 0;
4615
4616 assert(num_components <= max_components);
4617
4618 for (i = 0; i < num_components; ++i) {
4619 values[i] = get_src(ctx, &src->src, i, type);
4620 if (!values[i])
4621 return 0;
4622 }
4623
4624 return num_components;
4625 }
4626
4627 #define PAD_SRC(ctx, array, components, undef) \
4628 for (unsigned i = components; i < ARRAY_SIZE(array); ++i) { \
4629 array[i] = undef; \
4630 }
4631
4632 static const struct dxil_value *
emit_sample(struct ntd_context * ctx,struct texop_parameters * params)4633 emit_sample(struct ntd_context *ctx, struct texop_parameters *params)
4634 {
4635 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sample", params->overload);
4636 if (!func)
4637 return NULL;
4638
4639 const struct dxil_value *args[11] = {
4640 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE),
4641 params->tex, params->sampler,
4642 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
4643 params->offset[0], params->offset[1], params->offset[2],
4644 params->min_lod
4645 };
4646
4647 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4648 }
4649
4650 static const struct dxil_value *
emit_sample_bias(struct ntd_context * ctx,struct texop_parameters * params)4651 emit_sample_bias(struct ntd_context *ctx, struct texop_parameters *params)
4652 {
4653 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleBias", params->overload);
4654 if (!func)
4655 return NULL;
4656
4657 assert(params->bias != NULL);
4658
4659 const struct dxil_value *args[12] = {
4660 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_BIAS),
4661 params->tex, params->sampler,
4662 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
4663 params->offset[0], params->offset[1], params->offset[2],
4664 params->bias, params->min_lod
4665 };
4666
4667 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4668 }
4669
4670 static const struct dxil_value *
emit_sample_level(struct ntd_context * ctx,struct texop_parameters * params)4671 emit_sample_level(struct ntd_context *ctx, struct texop_parameters *params)
4672 {
4673 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleLevel", params->overload);
4674 if (!func)
4675 return NULL;
4676
4677 assert(params->lod_or_sample != NULL);
4678
4679 const struct dxil_value *args[11] = {
4680 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_LEVEL),
4681 params->tex, params->sampler,
4682 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
4683 params->offset[0], params->offset[1], params->offset[2],
4684 params->lod_or_sample
4685 };
4686
4687 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4688 }
4689
4690 static const struct dxil_value *
emit_sample_cmp(struct ntd_context * ctx,struct texop_parameters * params)4691 emit_sample_cmp(struct ntd_context *ctx, struct texop_parameters *params)
4692 {
4693 const struct dxil_func *func;
4694 enum dxil_intr opcode;
4695 int numparam;
4696
4697 if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) {
4698 func = dxil_get_function(&ctx->mod, "dx.op.sampleCmp", DXIL_F32);
4699 opcode = DXIL_INTR_SAMPLE_CMP;
4700 numparam = 12;
4701 } else {
4702 func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevelZero", DXIL_F32);
4703 opcode = DXIL_INTR_SAMPLE_CMP_LVL_ZERO;
4704 numparam = 11;
4705 }
4706
4707 if (!func)
4708 return NULL;
4709
4710 const struct dxil_value *args[12] = {
4711 dxil_module_get_int32_const(&ctx->mod, opcode),
4712 params->tex, params->sampler,
4713 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
4714 params->offset[0], params->offset[1], params->offset[2],
4715 params->cmp, params->min_lod
4716 };
4717
4718 return dxil_emit_call(&ctx->mod, func, args, numparam);
4719 }
4720
4721 static const struct dxil_value *
emit_sample_grad(struct ntd_context * ctx,struct texop_parameters * params)4722 emit_sample_grad(struct ntd_context *ctx, struct texop_parameters *params)
4723 {
4724 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleGrad", params->overload);
4725 if (!func)
4726 return false;
4727
4728 const struct dxil_value *args[17] = {
4729 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_GRAD),
4730 params->tex, params->sampler,
4731 params->coord[0], params->coord[1], params->coord[2], params->coord[3],
4732 params->offset[0], params->offset[1], params->offset[2],
4733 params->dx[0], params->dx[1], params->dx[2],
4734 params->dy[0], params->dy[1], params->dy[2],
4735 params->min_lod
4736 };
4737
4738 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4739 }
4740
4741 static const struct dxil_value *
emit_texel_fetch(struct ntd_context * ctx,struct texop_parameters * params)4742 emit_texel_fetch(struct ntd_context *ctx, struct texop_parameters *params)
4743 {
4744 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", params->overload);
4745 if (!func)
4746 return false;
4747
4748 if (!params->lod_or_sample)
4749 params->lod_or_sample = dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32));
4750
4751 const struct dxil_value *args[] = {
4752 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD),
4753 params->tex,
4754 params->lod_or_sample, params->coord[0], params->coord[1], params->coord[2],
4755 params->offset[0], params->offset[1], params->offset[2]
4756 };
4757
4758 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4759 }
4760
4761 static const struct dxil_value *
emit_texture_lod(struct ntd_context * ctx,struct texop_parameters * params,bool clamped)4762 emit_texture_lod(struct ntd_context *ctx, struct texop_parameters *params, bool clamped)
4763 {
4764 const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.calculateLOD", DXIL_F32);
4765 if (!func)
4766 return false;
4767
4768 const struct dxil_value *args[] = {
4769 dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOD),
4770 params->tex,
4771 params->sampler,
4772 params->coord[0],
4773 params->coord[1],
4774 params->coord[2],
4775 dxil_module_get_int1_const(&ctx->mod, clamped ? 1 : 0)
4776 };
4777
4778 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
4779 }
4780
4781 static const struct dxil_value *
emit_texture_gather(struct ntd_context * ctx,struct texop_parameters * params,unsigned component)4782 emit_texture_gather(struct ntd_context *ctx, struct texop_parameters *params, unsigned component)
4783 {
4784 const struct dxil_func *func = dxil_get_function(&ctx->mod,
4785 params->cmp ? "dx.op.textureGatherCmp" : "dx.op.textureGather", params->overload);
4786 if (!func)
4787 return false;
4788
4789 const struct dxil_value *args[] = {
4790 dxil_module_get_int32_const(&ctx->mod, params->cmp ?
4791 DXIL_INTR_TEXTURE_GATHER_CMP : DXIL_INTR_TEXTURE_GATHER),
4792 params->tex,
4793 params->sampler,
4794 params->coord[0],
4795 params->coord[1],
4796 params->coord[2],
4797 params->coord[3],
4798 params->offset[0],
4799 params->offset[1],
4800 dxil_module_get_int32_const(&ctx->mod, component),
4801 params->cmp
4802 };
4803
4804 return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args) - (params->cmp ? 0 : 1));
4805 }
4806
4807 static bool
emit_tex(struct ntd_context * ctx,nir_tex_instr * instr)4808 emit_tex(struct ntd_context *ctx, nir_tex_instr *instr)
4809 {
4810 struct texop_parameters params;
4811 memset(¶ms, 0, sizeof(struct texop_parameters));
4812 if (ctx->opts->environment != DXIL_ENVIRONMENT_VULKAN) {
4813 params.tex = ctx->srv_handles[instr->texture_index];
4814 params.sampler = ctx->sampler_handles[instr->sampler_index];
4815 }
4816
4817 const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);
4818 const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32);
4819 const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);
4820 const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type);
4821
4822 unsigned coord_components = 0, offset_components = 0, dx_components = 0, dy_components = 0;
4823 params.overload = get_overload(instr->dest_type, 32);
4824
4825 for (unsigned i = 0; i < instr->num_srcs; i++) {
4826 nir_alu_type type = nir_tex_instr_src_type(instr, i);
4827
4828 switch (instr->src[i].src_type) {
4829 case nir_tex_src_coord:
4830 coord_components = get_n_src(ctx, params.coord, ARRAY_SIZE(params.coord),
4831 &instr->src[i], type);
4832 if (!coord_components)
4833 return false;
4834 break;
4835
4836 case nir_tex_src_offset:
4837 offset_components = get_n_src(ctx, params.offset, ARRAY_SIZE(params.offset),
4838 &instr->src[i], nir_type_int);
4839 if (!offset_components)
4840 return false;
4841 break;
4842
4843 case nir_tex_src_bias:
4844 assert(instr->op == nir_texop_txb);
4845 assert(nir_src_num_components(instr->src[i].src) == 1);
4846 params.bias = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
4847 if (!params.bias)
4848 return false;
4849 break;
4850
4851 case nir_tex_src_lod:
4852 assert(nir_src_num_components(instr->src[i].src) == 1);
4853 if (instr->op == nir_texop_txf_ms) {
4854 assert(nir_src_as_int(instr->src[i].src) == 0);
4855 break;
4856 }
4857
4858 /* Buffers don't have a LOD */
4859 if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF)
4860 params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, type);
4861 else
4862 params.lod_or_sample = int_undef;
4863 if (!params.lod_or_sample)
4864 return false;
4865 break;
4866
4867 case nir_tex_src_min_lod:
4868 assert(nir_src_num_components(instr->src[i].src) == 1);
4869 params.min_lod = get_src(ctx, &instr->src[i].src, 0, type);
4870 if (!params.min_lod)
4871 return false;
4872 break;
4873
4874 case nir_tex_src_comparator:
4875 assert(nir_src_num_components(instr->src[i].src) == 1);
4876 params.cmp = get_src(ctx, &instr->src[i].src, 0, nir_type_float);
4877 if (!params.cmp)
4878 return false;
4879 break;
4880
4881 case nir_tex_src_ddx:
4882 dx_components = get_n_src(ctx, params.dx, ARRAY_SIZE(params.dx),
4883 &instr->src[i], nir_type_float);
4884 if (!dx_components)
4885 return false;
4886 break;
4887
4888 case nir_tex_src_ddy:
4889 dy_components = get_n_src(ctx, params.dy, ARRAY_SIZE(params.dy),
4890 &instr->src[i], nir_type_float);
4891 if (!dy_components)
4892 return false;
4893 break;
4894
4895 case nir_tex_src_ms_index:
4896 params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, nir_type_int);
4897 if (!params.lod_or_sample)
4898 return false;
4899 break;
4900
4901 case nir_tex_src_texture_deref:
4902 assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
4903 params.tex = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
4904 break;
4905
4906 case nir_tex_src_sampler_deref:
4907 assert(ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN);
4908 params.sampler = get_src_ssa(ctx, instr->src[i].src.ssa, 0);
4909 break;
4910
4911 case nir_tex_src_texture_offset:
4912 params.tex = emit_createhandle_call(ctx, DXIL_RESOURCE_CLASS_SRV,
4913 get_resource_id(ctx, DXIL_RESOURCE_CLASS_SRV, 0, instr->texture_index),
4914 dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
4915 get_src_ssa(ctx, instr->src[i].src.ssa, 0),
4916 dxil_module_get_int32_const(&ctx->mod, instr->texture_index), 0),
4917 instr->texture_non_uniform);
4918 break;
4919
4920 case nir_tex_src_sampler_offset:
4921 if (nir_tex_instr_need_sampler(instr)) {
4922 params.sampler = emit_createhandle_call(ctx, DXIL_RESOURCE_CLASS_SAMPLER,
4923 get_resource_id(ctx, DXIL_RESOURCE_CLASS_SAMPLER, 0, instr->sampler_index),
4924 dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,
4925 get_src_ssa(ctx, instr->src[i].src.ssa, 0),
4926 dxil_module_get_int32_const(&ctx->mod, instr->sampler_index), 0),
4927 instr->sampler_non_uniform);
4928 }
4929 break;
4930
4931 case nir_tex_src_projector:
4932 unreachable("Texture projector should have been lowered");
4933
4934 default:
4935 fprintf(stderr, "texture source: %d\n", instr->src[i].src_type);
4936 unreachable("unknown texture source");
4937 }
4938 }
4939
4940 assert(params.tex != NULL);
4941 assert(instr->op == nir_texop_txf ||
4942 instr->op == nir_texop_txf_ms ||
4943 nir_tex_instr_is_query(instr) ||
4944 params.sampler != NULL);
4945
4946 PAD_SRC(ctx, params.coord, coord_components, float_undef);
4947 PAD_SRC(ctx, params.offset, offset_components, int_undef);
4948 if (!params.min_lod) params.min_lod = float_undef;
4949
4950 const struct dxil_value *sample = NULL;
4951 switch (instr->op) {
4952 case nir_texop_txb:
4953 sample = emit_sample_bias(ctx, ¶ms);
4954 break;
4955
4956 case nir_texop_tex:
4957 if (params.cmp != NULL) {
4958 sample = emit_sample_cmp(ctx, ¶ms);
4959 break;
4960 } else if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) {
4961 sample = emit_sample(ctx, ¶ms);
4962 break;
4963 }
4964 params.lod_or_sample = dxil_module_get_float_const(&ctx->mod, 0);
4965 FALLTHROUGH;
4966 case nir_texop_txl:
4967 sample = emit_sample_level(ctx, ¶ms);
4968 break;
4969
4970 case nir_texop_txd:
4971 PAD_SRC(ctx, params.dx, dx_components, float_undef);
4972 PAD_SRC(ctx, params.dy, dy_components,float_undef);
4973 sample = emit_sample_grad(ctx, ¶ms);
4974 break;
4975
4976 case nir_texop_txf:
4977 case nir_texop_txf_ms:
4978 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
4979 params.coord[1] = int_undef;
4980 sample = emit_bufferload_call(ctx, params.tex, params.coord, params.overload);
4981 } else {
4982 PAD_SRC(ctx, params.coord, coord_components, int_undef);
4983 sample = emit_texel_fetch(ctx, ¶ms);
4984 }
4985 break;
4986
4987 case nir_texop_txs:
4988 sample = emit_texture_size(ctx, ¶ms);
4989 break;
4990
4991 case nir_texop_tg4:
4992 sample = emit_texture_gather(ctx, ¶ms, instr->component);
4993 break;
4994
4995 case nir_texop_lod:
4996 sample = emit_texture_lod(ctx, ¶ms, true);
4997 store_dest(ctx, &instr->dest, 0, sample, nir_alu_type_get_base_type(instr->dest_type));
4998 sample = emit_texture_lod(ctx, ¶ms, false);
4999 store_dest(ctx, &instr->dest, 1, sample, nir_alu_type_get_base_type(instr->dest_type));
5000 return true;
5001
5002 case nir_texop_query_levels:
5003 params.lod_or_sample = dxil_module_get_int_const(&ctx->mod, 0, 32);
5004 sample = emit_texture_size(ctx, ¶ms);
5005 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);
5006 store_dest(ctx, &instr->dest, 0, retval, nir_alu_type_get_base_type(instr->dest_type));
5007 return true;
5008
5009 default:
5010 fprintf(stderr, "texture op: %d\n", instr->op);
5011 unreachable("unknown texture op");
5012 }
5013
5014 if (!sample)
5015 return false;
5016
5017 for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
5018 const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, i);
5019 store_dest(ctx, &instr->dest, i, retval, nir_alu_type_get_base_type(instr->dest_type));
5020 }
5021
5022 return true;
5023 }
5024
5025 static bool
emit_undefined(struct ntd_context * ctx,nir_ssa_undef_instr * undef)5026 emit_undefined(struct ntd_context *ctx, nir_ssa_undef_instr *undef)
5027 {
5028 for (unsigned i = 0; i < undef->def.num_components; ++i)
5029 store_ssa_def(ctx, &undef->def, i, dxil_module_get_int32_const(&ctx->mod, 0));
5030 return true;
5031 }
5032
emit_instr(struct ntd_context * ctx,struct nir_instr * instr)5033 static bool emit_instr(struct ntd_context *ctx, struct nir_instr* instr)
5034 {
5035 switch (instr->type) {
5036 case nir_instr_type_alu:
5037 return emit_alu(ctx, nir_instr_as_alu(instr));
5038 case nir_instr_type_intrinsic:
5039 return emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
5040 case nir_instr_type_load_const:
5041 return emit_load_const(ctx, nir_instr_as_load_const(instr));
5042 case nir_instr_type_deref:
5043 return emit_deref(ctx, nir_instr_as_deref(instr));
5044 case nir_instr_type_jump:
5045 return emit_jump(ctx, nir_instr_as_jump(instr));
5046 case nir_instr_type_phi:
5047 return emit_phi(ctx, nir_instr_as_phi(instr));
5048 case nir_instr_type_tex:
5049 return emit_tex(ctx, nir_instr_as_tex(instr));
5050 case nir_instr_type_ssa_undef:
5051 return emit_undefined(ctx, nir_instr_as_ssa_undef(instr));
5052 default:
5053 NIR_INSTR_UNSUPPORTED(instr);
5054 unreachable("Unimplemented instruction type");
5055 return false;
5056 }
5057 }
5058
5059
5060 static bool
emit_block(struct ntd_context * ctx,struct nir_block * block)5061 emit_block(struct ntd_context *ctx, struct nir_block *block)
5062 {
5063 assert(block->index < ctx->mod.cur_emitting_func->num_basic_block_ids);
5064 ctx->mod.cur_emitting_func->basic_block_ids[block->index] = ctx->mod.cur_emitting_func->curr_block;
5065
5066 nir_foreach_instr(instr, block) {
5067 TRACE_CONVERSION(instr);
5068
5069 if (!emit_instr(ctx, instr)) {
5070 return false;
5071 }
5072 }
5073 return true;
5074 }
5075
5076 static bool
5077 emit_cf_list(struct ntd_context *ctx, struct exec_list *list);
5078
5079 static bool
emit_if(struct ntd_context * ctx,struct nir_if * if_stmt)5080 emit_if(struct ntd_context *ctx, struct nir_if *if_stmt)
5081 {
5082 assert(nir_src_num_components(if_stmt->condition) == 1);
5083 const struct dxil_value *cond = get_src(ctx, &if_stmt->condition, 0,
5084 nir_type_bool);
5085 if (!cond)
5086 return false;
5087
5088 /* prepare blocks */
5089 nir_block *then_block = nir_if_first_then_block(if_stmt);
5090 assert(nir_if_last_then_block(if_stmt)->successors[0]);
5091 assert(!nir_if_last_then_block(if_stmt)->successors[1]);
5092 int then_succ = nir_if_last_then_block(if_stmt)->successors[0]->index;
5093
5094 nir_block *else_block = NULL;
5095 int else_succ = -1;
5096 if (!exec_list_is_empty(&if_stmt->else_list)) {
5097 else_block = nir_if_first_else_block(if_stmt);
5098 assert(nir_if_last_else_block(if_stmt)->successors[0]);
5099 assert(!nir_if_last_else_block(if_stmt)->successors[1]);
5100 else_succ = nir_if_last_else_block(if_stmt)->successors[0]->index;
5101 }
5102
5103 if (!emit_cond_branch(ctx, cond, then_block->index,
5104 else_block ? else_block->index : then_succ))
5105 return false;
5106
5107 /* handle then-block */
5108 if (!emit_cf_list(ctx, &if_stmt->then_list) ||
5109 (!nir_block_ends_in_jump(nir_if_last_then_block(if_stmt)) &&
5110 !emit_branch(ctx, then_succ)))
5111 return false;
5112
5113 if (else_block) {
5114 /* handle else-block */
5115 if (!emit_cf_list(ctx, &if_stmt->else_list) ||
5116 (!nir_block_ends_in_jump(nir_if_last_else_block(if_stmt)) &&
5117 !emit_branch(ctx, else_succ)))
5118 return false;
5119 }
5120
5121 return true;
5122 }
5123
5124 static bool
emit_loop(struct ntd_context * ctx,nir_loop * loop)5125 emit_loop(struct ntd_context *ctx, nir_loop *loop)
5126 {
5127 nir_block *first_block = nir_loop_first_block(loop);
5128
5129 assert(nir_loop_last_block(loop)->successors[0]);
5130 assert(!nir_loop_last_block(loop)->successors[1]);
5131
5132 if (!emit_branch(ctx, first_block->index))
5133 return false;
5134
5135 if (!emit_cf_list(ctx, &loop->body))
5136 return false;
5137
5138 if (!emit_branch(ctx, first_block->index))
5139 return false;
5140
5141 return true;
5142 }
5143
5144 static bool
emit_cf_list(struct ntd_context * ctx,struct exec_list * list)5145 emit_cf_list(struct ntd_context *ctx, struct exec_list *list)
5146 {
5147 foreach_list_typed(nir_cf_node, node, node, list) {
5148 switch (node->type) {
5149 case nir_cf_node_block:
5150 if (!emit_block(ctx, nir_cf_node_as_block(node)))
5151 return false;
5152 break;
5153
5154 case nir_cf_node_if:
5155 if (!emit_if(ctx, nir_cf_node_as_if(node)))
5156 return false;
5157 break;
5158
5159 case nir_cf_node_loop:
5160 if (!emit_loop(ctx, nir_cf_node_as_loop(node)))
5161 return false;
5162 break;
5163
5164 default:
5165 unreachable("unsupported cf-list node");
5166 break;
5167 }
5168 }
5169 return true;
5170 }
5171
5172 static void
insert_sorted_by_binding(struct exec_list * var_list,nir_variable * new_var)5173 insert_sorted_by_binding(struct exec_list *var_list, nir_variable *new_var)
5174 {
5175 nir_foreach_variable_in_list(var, var_list) {
5176 if (var->data.binding > new_var->data.binding) {
5177 exec_node_insert_node_before(&var->node, &new_var->node);
5178 return;
5179 }
5180 }
5181 exec_list_push_tail(var_list, &new_var->node);
5182 }
5183
5184
5185 static void
sort_uniforms_by_binding_and_remove_structs(nir_shader * s)5186 sort_uniforms_by_binding_and_remove_structs(nir_shader *s)
5187 {
5188 struct exec_list new_list;
5189 exec_list_make_empty(&new_list);
5190
5191 nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) {
5192 exec_node_remove(&var->node);
5193 const struct glsl_type *type = glsl_without_array(var->type);
5194 if (!glsl_type_is_struct(type))
5195 insert_sorted_by_binding(&new_list, var);
5196 }
5197 exec_list_append(&s->variables, &new_list);
5198 }
5199
5200 static void
prepare_phi_values(struct ntd_context * ctx,nir_function_impl * impl)5201 prepare_phi_values(struct ntd_context *ctx, nir_function_impl *impl)
5202 {
5203 /* PHI nodes are difficult to get right when tracking the types:
5204 * Since the incoming sources are linked to blocks, we can't bitcast
5205 * on the fly while loading. So scan the shader and insert a typed dummy
5206 * value for each phi source, and when storing we convert if the incoming
5207 * value has a different type then the one expected by the phi node.
5208 * We choose int as default, because it supports more bit sizes.
5209 */
5210 nir_foreach_block(block, impl) {
5211 nir_foreach_instr(instr, block) {
5212 if (instr->type == nir_instr_type_phi) {
5213 nir_phi_instr *ir = nir_instr_as_phi(instr);
5214 unsigned bitsize = nir_dest_bit_size(ir->dest);
5215 const struct dxil_value *dummy = dxil_module_get_int_const(&ctx->mod, 0, bitsize);
5216 nir_foreach_phi_src(src, ir) {
5217 for(unsigned int i = 0; i < ir->dest.ssa.num_components; ++i)
5218 store_ssa_def(ctx, src->src.ssa, i, dummy);
5219 }
5220 }
5221 }
5222 }
5223 }
5224
5225 static bool
emit_cbvs(struct ntd_context * ctx)5226 emit_cbvs(struct ntd_context *ctx)
5227 {
5228 if (ctx->opts->environment != DXIL_ENVIRONMENT_GL) {
5229 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ubo) {
5230 if (!emit_ubo_var(ctx, var))
5231 return false;
5232 }
5233 } else {
5234 if (ctx->shader->info.num_ubos) {
5235 const unsigned ubo_size = 16384 /*4096 vec4's*/;
5236 bool has_ubo0 = !ctx->opts->no_ubo0;
5237 bool has_state_vars = ctx->opts->last_ubo_is_not_arrayed;
5238 unsigned ubo1_array_size = ctx->shader->info.num_ubos -
5239 (has_state_vars ? 2 : 1);
5240
5241 if (has_ubo0 &&
5242 !emit_cbv(ctx, 0, 0, ubo_size, 1, "__ubo_uniforms"))
5243 return false;
5244 if (ubo1_array_size &&
5245 !emit_cbv(ctx, 1, 0, ubo_size, ubo1_array_size, "__ubos"))
5246 return false;
5247 if (has_state_vars &&
5248 !emit_cbv(ctx, ctx->shader->info.num_ubos - 1, 0, ubo_size, 1, "__ubo_state_vars"))
5249 return false;
5250 }
5251 }
5252
5253 return true;
5254 }
5255
5256 static bool
emit_scratch(struct ntd_context * ctx)5257 emit_scratch(struct ntd_context *ctx)
5258 {
5259 if (ctx->shader->scratch_size) {
5260 /*
5261 * We always allocate an u32 array, no matter the actual variable types.
5262 * According to the DXIL spec, the minimum load/store granularity is
5263 * 32-bit, anything smaller requires using a read-extract/read-write-modify
5264 * approach.
5265 */
5266 unsigned size = ALIGN_POT(ctx->shader->scratch_size, sizeof(uint32_t));
5267 const struct dxil_type *int32 = dxil_module_get_int_type(&ctx->mod, 32);
5268 const struct dxil_value *array_length = dxil_module_get_int32_const(&ctx->mod, size / sizeof(uint32_t));
5269 if (!int32 || !array_length)
5270 return false;
5271
5272 const struct dxil_type *type = dxil_module_get_array_type(
5273 &ctx->mod, int32, size / sizeof(uint32_t));
5274 if (!type)
5275 return false;
5276
5277 ctx->scratchvars = dxil_emit_alloca(&ctx->mod, type, int32, array_length, 4);
5278 if (!ctx->scratchvars)
5279 return false;
5280 }
5281
5282 return true;
5283 }
5284
5285 /* The validator complains if we don't have ops that reference a global variable. */
5286 static bool
shader_has_shared_ops(struct nir_shader * s)5287 shader_has_shared_ops(struct nir_shader *s)
5288 {
5289 nir_foreach_function(func, s) {
5290 if (!func->impl)
5291 continue;
5292 nir_foreach_block(block, func->impl) {
5293 nir_foreach_instr(instr, block) {
5294 if (instr->type != nir_instr_type_intrinsic)
5295 continue;
5296 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
5297 switch (intrin->intrinsic) {
5298 case nir_intrinsic_load_shared_dxil:
5299 case nir_intrinsic_store_shared_dxil:
5300 case nir_intrinsic_shared_atomic_add_dxil:
5301 case nir_intrinsic_shared_atomic_and_dxil:
5302 case nir_intrinsic_shared_atomic_comp_swap_dxil:
5303 case nir_intrinsic_shared_atomic_exchange_dxil:
5304 case nir_intrinsic_shared_atomic_imax_dxil:
5305 case nir_intrinsic_shared_atomic_imin_dxil:
5306 case nir_intrinsic_shared_atomic_or_dxil:
5307 case nir_intrinsic_shared_atomic_umax_dxil:
5308 case nir_intrinsic_shared_atomic_umin_dxil:
5309 case nir_intrinsic_shared_atomic_xor_dxil:
5310 return true;
5311 default: break;
5312 }
5313 }
5314 }
5315 }
5316 return false;
5317 }
5318
5319 static bool
emit_function(struct ntd_context * ctx,nir_function * func)5320 emit_function(struct ntd_context *ctx, nir_function *func)
5321 {
5322 assert(func->num_params == 0);
5323 nir_function_impl *impl = func->impl;
5324 if (!impl)
5325 return true;
5326
5327 nir_metadata_require(impl, nir_metadata_block_index);
5328
5329 const struct dxil_type *void_type = dxil_module_get_void_type(&ctx->mod);
5330 const struct dxil_type *func_type = dxil_module_add_function_type(&ctx->mod, void_type, NULL, 0);
5331 struct dxil_func_def *func_def = dxil_add_function_def(&ctx->mod, func->name, func_type, impl->num_blocks);
5332 if (!func_def)
5333 return false;
5334
5335 if (func->is_entrypoint)
5336 ctx->main_func_def = func_def;
5337 else if (func == ctx->tess_ctrl_patch_constant_func)
5338 ctx->tess_ctrl_patch_constant_func_def = func_def;
5339
5340 ctx->defs = rzalloc_array(ctx->ralloc_ctx, struct dxil_def, impl->ssa_alloc);
5341 if (!ctx->defs)
5342 return false;
5343 ctx->num_defs = impl->ssa_alloc;
5344
5345 ctx->phis = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
5346 if (!ctx->phis)
5347 return false;
5348
5349 prepare_phi_values(ctx, impl);
5350
5351 if (!emit_scratch(ctx))
5352 return false;
5353
5354 if (!emit_static_indexing_handles(ctx))
5355 return false;
5356
5357 if (!emit_cf_list(ctx, &impl->body))
5358 return false;
5359
5360 hash_table_foreach(ctx->phis, entry) {
5361 if (!fixup_phi(ctx, (nir_phi_instr *)entry->key,
5362 (struct phi_block *)entry->data))
5363 return false;
5364 }
5365
5366 if (!dxil_emit_ret_void(&ctx->mod))
5367 return false;
5368
5369 ralloc_free(ctx->defs);
5370 ctx->defs = NULL;
5371 _mesa_hash_table_destroy(ctx->phis, NULL);
5372 return true;
5373 }
5374
5375 static bool
emit_module(struct ntd_context * ctx,const struct nir_to_dxil_options * opts)5376 emit_module(struct ntd_context *ctx, const struct nir_to_dxil_options *opts)
5377 {
5378 /* The validator forces us to emit resources in a specific order:
5379 * CBVs, Samplers, SRVs, UAVs. While we are at it also remove
5380 * stale struct uniforms, they are lowered but might not have been removed */
5381 sort_uniforms_by_binding_and_remove_structs(ctx->shader);
5382
5383 /* CBVs */
5384 if (!emit_cbvs(ctx))
5385 return false;
5386
5387 /* Samplers */
5388 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
5389 unsigned count = glsl_type_get_sampler_count(var->type);
5390 assert(count == 0 || glsl_type_is_bare_sampler(glsl_without_array(var->type)));
5391 if (count > 0 && !emit_sampler(ctx, var, count))
5392 return false;
5393 }
5394
5395 /* SRVs */
5396 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {
5397 if (glsl_type_is_texture(glsl_without_array(var->type)) &&
5398 !emit_srv(ctx, var, glsl_type_get_texture_count(var->type)))
5399 return false;
5400 }
5401
5402 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
5403 nir_foreach_image_variable(var, ctx->shader) {
5404 if ((var->data.access & ACCESS_NON_WRITEABLE) &&
5405 !emit_srv(ctx, var, glsl_type_get_image_count(var->type)))
5406 return false;
5407 }
5408 }
5409
5410 /* Handle read-only SSBOs as SRVs */
5411 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
5412 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
5413 if ((var->data.access & ACCESS_NON_WRITEABLE) != 0) {
5414 unsigned count = 1;
5415 if (glsl_type_is_array(var->type))
5416 count = glsl_get_length(var->type);
5417 if (!emit_srv(ctx, var, count))
5418 return false;
5419 }
5420 }
5421 }
5422
5423 if (ctx->shader->info.shared_size && shader_has_shared_ops(ctx->shader)) {
5424 const struct dxil_type *type;
5425 unsigned size;
5426
5427 /*
5428 * We always allocate an u32 array, no matter the actual variable types.
5429 * According to the DXIL spec, the minimum load/store granularity is
5430 * 32-bit, anything smaller requires using a read-extract/read-write-modify
5431 * approach. Non-atomic 64-bit accesses are allowed, but the
5432 * GEP(cast(gvar, u64[] *), offset) and cast(GEP(gvar, offset), u64 *))
5433 * sequences don't seem to be accepted by the DXIL validator when the
5434 * pointer is in the groupshared address space, making the 32-bit -> 64-bit
5435 * pointer cast impossible.
5436 */
5437 size = ALIGN_POT(ctx->shader->info.shared_size, sizeof(uint32_t));
5438 type = dxil_module_get_array_type(&ctx->mod,
5439 dxil_module_get_int_type(&ctx->mod, 32),
5440 size / sizeof(uint32_t));
5441 ctx->sharedvars = dxil_add_global_ptr_var(&ctx->mod, "shared", type,
5442 DXIL_AS_GROUPSHARED,
5443 ffs(sizeof(uint64_t)),
5444 NULL);
5445 }
5446
5447 /* UAVs */
5448 if (ctx->shader->info.stage == MESA_SHADER_KERNEL) {
5449 if (!emit_globals(ctx, opts->num_kernel_globals))
5450 return false;
5451
5452 ctx->consts = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);
5453 if (!ctx->consts)
5454 return false;
5455 if (!emit_global_consts(ctx))
5456 return false;
5457 } else if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN) {
5458 /* Handle read/write SSBOs as UAVs */
5459 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {
5460 if ((var->data.access & ACCESS_NON_WRITEABLE) == 0) {
5461 unsigned count = 1;
5462 if (glsl_type_is_array(var->type))
5463 count = glsl_get_length(var->type);
5464 if (!emit_uav(ctx, var->data.binding, var->data.descriptor_set,
5465 count, DXIL_COMP_TYPE_INVALID,
5466 DXIL_RESOURCE_KIND_RAW_BUFFER, var->name))
5467 return false;
5468
5469 }
5470 }
5471 } else {
5472 for (unsigned i = 0; i < ctx->shader->info.num_ssbos; ++i) {
5473 char name[64];
5474 snprintf(name, sizeof(name), "__ssbo%d", i);
5475 if (!emit_uav(ctx, i, 0, 1, DXIL_COMP_TYPE_INVALID,
5476 DXIL_RESOURCE_KIND_RAW_BUFFER, name))
5477 return false;
5478 }
5479 /* To work around a WARP bug, bind these descriptors a second time in descriptor
5480 * space 2. Space 0 will be used for static indexing, while space 2 will be used
5481 * for dynamic indexing. Space 0 will be individual SSBOs in the DXIL shader, while
5482 * space 2 will be a single array.
5483 */
5484 if (ctx->shader->info.num_ssbos &&
5485 !emit_uav(ctx, 0, 2, ctx->shader->info.num_ssbos, DXIL_COMP_TYPE_INVALID,
5486 DXIL_RESOURCE_KIND_RAW_BUFFER, "__ssbo_dynamic"))
5487 return false;
5488 }
5489
5490 nir_foreach_image_variable(var, ctx->shader) {
5491 if (ctx->opts->environment == DXIL_ENVIRONMENT_VULKAN &&
5492 var && (var->data.access & ACCESS_NON_WRITEABLE))
5493 continue; // already handled in SRV
5494
5495 if (!emit_uav_var(ctx, var, glsl_type_get_image_count(var->type)))
5496 return false;
5497 }
5498
5499 ctx->mod.info.has_per_sample_input =
5500 BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
5501 if (!ctx->mod.info.has_per_sample_input && ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
5502 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in | nir_var_system_value) {
5503 if (var->data.sample) {
5504 ctx->mod.info.has_per_sample_input = true;
5505 break;
5506 }
5507 }
5508 }
5509
5510 unsigned input_clip_size = ctx->mod.shader_kind == DXIL_PIXEL_SHADER ?
5511 ctx->shader->info.clip_distance_array_size : ctx->opts->input_clip_size;
5512 preprocess_signatures(&ctx->mod, ctx->shader, input_clip_size);
5513
5514 nir_foreach_function(func, ctx->shader) {
5515 if (!emit_function(ctx, func))
5516 return false;
5517 }
5518
5519 if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
5520 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_out) {
5521 if (var->data.location == FRAG_RESULT_STENCIL) {
5522 ctx->mod.feats.stencil_ref = true;
5523 }
5524 }
5525 } else if (ctx->shader->info.stage == MESA_SHADER_VERTEX ||
5526 ctx->shader->info.stage == MESA_SHADER_TESS_EVAL) {
5527 if (ctx->shader->info.outputs_written &
5528 (VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER))
5529 ctx->mod.feats.array_layer_from_vs_or_ds = true;
5530 }
5531
5532 if (ctx->mod.feats.native_low_precision)
5533 ctx->mod.minor_version = MAX2(ctx->mod.minor_version, 2);
5534
5535 return emit_metadata(ctx) &&
5536 dxil_emit_module(&ctx->mod);
5537 }
5538
5539 static unsigned int
get_dxil_shader_kind(struct nir_shader * s)5540 get_dxil_shader_kind(struct nir_shader *s)
5541 {
5542 switch (s->info.stage) {
5543 case MESA_SHADER_VERTEX:
5544 return DXIL_VERTEX_SHADER;
5545 case MESA_SHADER_TESS_CTRL:
5546 return DXIL_HULL_SHADER;
5547 case MESA_SHADER_TESS_EVAL:
5548 return DXIL_DOMAIN_SHADER;
5549 case MESA_SHADER_GEOMETRY:
5550 return DXIL_GEOMETRY_SHADER;
5551 case MESA_SHADER_FRAGMENT:
5552 return DXIL_PIXEL_SHADER;
5553 case MESA_SHADER_KERNEL:
5554 case MESA_SHADER_COMPUTE:
5555 return DXIL_COMPUTE_SHADER;
5556 default:
5557 unreachable("unknown shader stage in nir_to_dxil");
5558 return DXIL_COMPUTE_SHADER;
5559 }
5560 }
5561
5562 static unsigned
lower_bit_size_callback(const nir_instr * instr,void * data)5563 lower_bit_size_callback(const nir_instr* instr, void *data)
5564 {
5565 if (instr->type != nir_instr_type_alu)
5566 return 0;
5567 const nir_alu_instr *alu = nir_instr_as_alu(instr);
5568
5569 if (nir_op_infos[alu->op].is_conversion)
5570 return 0;
5571
5572 unsigned num_inputs = nir_op_infos[alu->op].num_inputs;
5573 const struct nir_to_dxil_options *opts = (const struct nir_to_dxil_options*)data;
5574 unsigned min_bit_size = opts->lower_int16 ? 32 : 16;
5575
5576 unsigned ret = 0;
5577 for (unsigned i = 0; i < num_inputs; i++) {
5578 unsigned bit_size = nir_src_bit_size(alu->src[i].src);
5579 if (bit_size != 1 && bit_size < min_bit_size)
5580 ret = min_bit_size;
5581 }
5582
5583 return ret;
5584 }
5585
5586 static void
optimize_nir(struct nir_shader * s,const struct nir_to_dxil_options * opts)5587 optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts)
5588 {
5589 bool progress;
5590 do {
5591 progress = false;
5592 NIR_PASS_V(s, nir_lower_vars_to_ssa);
5593 NIR_PASS(progress, s, nir_lower_indirect_derefs, nir_var_function_temp, UINT32_MAX);
5594 NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
5595 NIR_PASS(progress, s, nir_copy_prop);
5596 NIR_PASS(progress, s, nir_opt_copy_prop_vars);
5597 NIR_PASS(progress, s, nir_lower_bit_size, lower_bit_size_callback, (void*)opts);
5598 NIR_PASS(progress, s, dxil_nir_lower_8bit_conv);
5599 if (opts->lower_int16)
5600 NIR_PASS(progress, s, dxil_nir_lower_16bit_conv);
5601 NIR_PASS(progress, s, nir_opt_remove_phis);
5602 NIR_PASS(progress, s, nir_opt_dce);
5603 NIR_PASS(progress, s, nir_opt_if, nir_opt_if_aggressive_last_continue | nir_opt_if_optimize_phi_true_false);
5604 NIR_PASS(progress, s, nir_opt_dead_cf);
5605 NIR_PASS(progress, s, nir_opt_cse);
5606 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
5607 NIR_PASS(progress, s, nir_opt_algebraic);
5608 NIR_PASS(progress, s, dxil_nir_lower_x2b);
5609 if (s->options->lower_int64_options)
5610 NIR_PASS(progress, s, nir_lower_int64);
5611 NIR_PASS(progress, s, nir_lower_alu);
5612 NIR_PASS(progress, s, nir_opt_constant_folding);
5613 NIR_PASS(progress, s, nir_opt_undef);
5614 NIR_PASS(progress, s, nir_lower_undef_to_zero);
5615 NIR_PASS(progress, s, nir_opt_deref);
5616 NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16);
5617 NIR_PASS(progress, s, nir_lower_64bit_phis);
5618 NIR_PASS_V(s, nir_lower_system_values);
5619 } while (progress);
5620
5621 do {
5622 progress = false;
5623 NIR_PASS(progress, s, nir_opt_algebraic_late);
5624 } while (progress);
5625 }
5626
5627 static
dxil_fill_validation_state(struct ntd_context * ctx,struct dxil_validation_state * state)5628 void dxil_fill_validation_state(struct ntd_context *ctx,
5629 struct dxil_validation_state *state)
5630 {
5631 unsigned resource_element_size = ctx->mod.minor_validator >= 6 ?
5632 sizeof(struct dxil_resource_v1) : sizeof(struct dxil_resource_v0);
5633 state->num_resources = ctx->resources.size / resource_element_size;
5634 state->resources.v0 = (struct dxil_resource_v0*)ctx->resources.data;
5635 state->state.psv1.psv0.max_expected_wave_lane_count = UINT_MAX;
5636 state->state.psv1.shader_stage = (uint8_t)ctx->mod.shader_kind;
5637 state->state.psv1.sig_input_elements = (uint8_t)ctx->mod.num_sig_inputs;
5638 state->state.psv1.sig_output_elements = (uint8_t)ctx->mod.num_sig_outputs;
5639 state->state.psv1.sig_patch_const_or_prim_elements = (uint8_t)ctx->mod.num_sig_patch_consts;
5640
5641 switch (ctx->mod.shader_kind) {
5642 case DXIL_VERTEX_SHADER:
5643 state->state.psv1.psv0.vs.output_position_present = ctx->mod.info.has_out_position;
5644 break;
5645 case DXIL_PIXEL_SHADER:
5646 /* TODO: handle depth outputs */
5647 state->state.psv1.psv0.ps.depth_output = ctx->mod.info.has_out_depth;
5648 state->state.psv1.psv0.ps.sample_frequency =
5649 ctx->mod.info.has_per_sample_input;
5650 break;
5651 case DXIL_COMPUTE_SHADER:
5652 state->state.num_threads_x = MAX2(ctx->shader->info.workgroup_size[0], 1);
5653 state->state.num_threads_y = MAX2(ctx->shader->info.workgroup_size[1], 1);
5654 state->state.num_threads_z = MAX2(ctx->shader->info.workgroup_size[2], 1);
5655 break;
5656 case DXIL_GEOMETRY_SHADER:
5657 state->state.psv1.max_vertex_count = ctx->shader->info.gs.vertices_out;
5658 state->state.psv1.psv0.gs.input_primitive = dxil_get_input_primitive(ctx->shader->info.gs.input_primitive);
5659 state->state.psv1.psv0.gs.output_toplology = dxil_get_primitive_topology(ctx->shader->info.gs.output_primitive);
5660 state->state.psv1.psv0.gs.output_stream_mask = MAX2(ctx->shader->info.gs.active_stream_mask, 1);
5661 state->state.psv1.psv0.gs.output_position_present = ctx->mod.info.has_out_position;
5662 break;
5663 case DXIL_HULL_SHADER:
5664 state->state.psv1.psv0.hs.input_control_point_count = ctx->tess_input_control_point_count;
5665 state->state.psv1.psv0.hs.output_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
5666 state->state.psv1.psv0.hs.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
5667 state->state.psv1.psv0.hs.tessellator_output_primitive = get_tessellator_output_primitive(&ctx->shader->info);
5668 state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
5669 break;
5670 case DXIL_DOMAIN_SHADER:
5671 state->state.psv1.psv0.ds.input_control_point_count = ctx->shader->info.tess.tcs_vertices_out;
5672 state->state.psv1.psv0.ds.tessellator_domain = get_tessellator_domain(ctx->shader->info.tess._primitive_mode);
5673 state->state.psv1.psv0.ds.output_position_present = ctx->mod.info.has_out_position;
5674 state->state.psv1.sig_patch_const_or_prim_vectors = ctx->mod.num_psv_patch_consts;
5675 break;
5676 default:
5677 assert(0 && "Shader type not (yet) supported");
5678 }
5679 }
5680
5681 static nir_variable *
add_sysvalue(struct ntd_context * ctx,uint8_t value,char * name,int driver_location)5682 add_sysvalue(struct ntd_context *ctx,
5683 uint8_t value, char *name,
5684 int driver_location)
5685 {
5686
5687 nir_variable *var = rzalloc(ctx->shader, nir_variable);
5688 if (!var)
5689 return NULL;
5690 var->data.driver_location = driver_location;
5691 var->data.location = value;
5692 var->type = glsl_uint_type();
5693 var->name = name;
5694 var->data.mode = nir_var_system_value;
5695 var->data.interpolation = INTERP_MODE_FLAT;
5696 return var;
5697 }
5698
5699 static bool
append_input_or_sysvalue(struct ntd_context * ctx,int input_loc,int sv_slot,char * name,int driver_location)5700 append_input_or_sysvalue(struct ntd_context *ctx,
5701 int input_loc, int sv_slot,
5702 char *name, int driver_location)
5703 {
5704 if (input_loc >= 0) {
5705 /* Check inputs whether a variable is available the corresponds
5706 * to the sysvalue */
5707 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
5708 if (var->data.location == input_loc) {
5709 ctx->system_value[sv_slot] = var;
5710 return true;
5711 }
5712 }
5713 }
5714
5715 ctx->system_value[sv_slot] = add_sysvalue(ctx, sv_slot, name, driver_location);
5716 if (!ctx->system_value[sv_slot])
5717 return false;
5718
5719 nir_shader_add_variable(ctx->shader, ctx->system_value[sv_slot]);
5720 return true;
5721 }
5722
5723 struct sysvalue_name {
5724 gl_system_value value;
5725 int slot;
5726 char *name;
5727 gl_shader_stage only_in_shader;
5728 } possible_sysvalues[] = {
5729 {SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, -1, "SV_VertexID", MESA_SHADER_NONE},
5730 {SYSTEM_VALUE_INSTANCE_ID, -1, "SV_InstanceID", MESA_SHADER_NONE},
5731 {SYSTEM_VALUE_FRONT_FACE, VARYING_SLOT_FACE, "SV_IsFrontFace", MESA_SHADER_NONE},
5732 {SYSTEM_VALUE_PRIMITIVE_ID, VARYING_SLOT_PRIMITIVE_ID, "SV_PrimitiveID", MESA_SHADER_GEOMETRY},
5733 {SYSTEM_VALUE_SAMPLE_ID, -1, "SV_SampleIndex", MESA_SHADER_NONE},
5734 };
5735
5736 static bool
allocate_sysvalues(struct ntd_context * ctx)5737 allocate_sysvalues(struct ntd_context *ctx)
5738 {
5739 unsigned driver_location = 0;
5740 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in)
5741 driver_location++;
5742 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_system_value)
5743 driver_location++;
5744
5745 if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT &&
5746 ctx->shader->info.inputs_read &&
5747 !BITSET_TEST(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID)) {
5748 bool need_sample_id = true;
5749
5750 /* "var->data.sample = true" sometimes just mean, "I want per-sample
5751 * shading", which explains why we can end up with vars having flat
5752 * interpolation with the per-sample bit set. If there's only such
5753 * type of variables, we need to tell DXIL that we read SV_SampleIndex
5754 * to make DXIL validation happy.
5755 */
5756 nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {
5757 if (!var->data.sample || var->data.interpolation != INTERP_MODE_FLAT) {
5758 need_sample_id = false;
5759 break;
5760 }
5761 }
5762
5763 if (need_sample_id)
5764 BITSET_SET(ctx->shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);
5765 }
5766
5767 for (unsigned i = 0; i < ARRAY_SIZE(possible_sysvalues); ++i) {
5768 struct sysvalue_name *info = &possible_sysvalues[i];
5769 if (info->only_in_shader != MESA_SHADER_NONE &&
5770 info->only_in_shader != ctx->shader->info.stage)
5771 continue;
5772 if (BITSET_TEST(ctx->shader->info.system_values_read, info->value)) {
5773 if (!append_input_or_sysvalue(ctx, info->slot,
5774 info->value, info->name,
5775 driver_location++))
5776 return false;
5777 }
5778 }
5779 return true;
5780 }
5781
5782 static int
type_size_vec4(const struct glsl_type * type,bool bindless)5783 type_size_vec4(const struct glsl_type *type, bool bindless)
5784 {
5785 return glsl_count_attribute_slots(type, false);
5786 }
5787
5788 static bool
dxil_validator_can_validate_shader_model(unsigned sm_minor,unsigned val_minor)5789 dxil_validator_can_validate_shader_model(unsigned sm_minor, unsigned val_minor)
5790 {
5791 /* Currently the validators are versioned such that val 1.x is needed for SM6.x */
5792 return sm_minor <= val_minor;
5793 }
5794
5795 static const unsigned dxil_validator_min_capable_version = DXIL_VALIDATOR_1_4;
5796 static const unsigned dxil_validator_max_capable_version = DXIL_VALIDATOR_1_7;
5797
5798 bool
nir_to_dxil(struct nir_shader * s,const struct nir_to_dxil_options * opts,struct blob * blob)5799 nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts,
5800 struct blob *blob)
5801 {
5802 assert(opts);
5803 bool retval = true;
5804 debug_dxil = (int)debug_get_option_debug_dxil();
5805 blob_init(blob);
5806
5807 if (opts->shader_model_max < SHADER_MODEL_6_1) {
5808 debug_printf("D3D12: cannot support emitting shader model 6.0 or lower\n");
5809 return false;
5810 }
5811
5812 if (opts->validator_version_max != NO_DXIL_VALIDATION &&
5813 opts->validator_version_max < dxil_validator_min_capable_version) {
5814 debug_printf("D3D12: Invalid validator version %d.%d, must be 1.4 or greater\n",
5815 opts->validator_version_max >> 16,
5816 opts->validator_version_max & 0xffff);
5817 return false;
5818 }
5819
5820 /* If no validation, write a blob as if it was going to be validated by the newest understood validator.
5821 * Same if the validator is newer than we know how to write for.
5822 */
5823 uint32_t validator_version =
5824 opts->validator_version_max == NO_DXIL_VALIDATION ||
5825 opts->validator_version_max > dxil_validator_max_capable_version ?
5826 dxil_validator_max_capable_version : opts->validator_version_max;
5827
5828 struct ntd_context *ctx = calloc(1, sizeof(*ctx));
5829 if (!ctx)
5830 return false;
5831
5832 ctx->opts = opts;
5833 ctx->shader = s;
5834
5835 ctx->ralloc_ctx = ralloc_context(NULL);
5836 if (!ctx->ralloc_ctx) {
5837 retval = false;
5838 goto out;
5839 }
5840
5841 util_dynarray_init(&ctx->srv_metadata_nodes, ctx->ralloc_ctx);
5842 util_dynarray_init(&ctx->uav_metadata_nodes, ctx->ralloc_ctx);
5843 util_dynarray_init(&ctx->cbv_metadata_nodes, ctx->ralloc_ctx);
5844 util_dynarray_init(&ctx->sampler_metadata_nodes, ctx->ralloc_ctx);
5845 util_dynarray_init(&ctx->resources, ctx->ralloc_ctx);
5846 dxil_module_init(&ctx->mod, ctx->ralloc_ctx);
5847 ctx->mod.shader_kind = get_dxil_shader_kind(s);
5848 ctx->mod.major_version = 6;
5849 ctx->mod.minor_version = 1;
5850 ctx->mod.major_validator = validator_version >> 16;
5851 ctx->mod.minor_validator = validator_version & 0xffff;
5852
5853 if (s->info.stage <= MESA_SHADER_FRAGMENT) {
5854 uint64_t in_mask =
5855 s->info.stage == MESA_SHADER_VERTEX ?
5856 0 : (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
5857 uint64_t out_mask =
5858 s->info.stage == MESA_SHADER_FRAGMENT ?
5859 ((1ull << FRAG_RESULT_STENCIL) | (1ull << FRAG_RESULT_SAMPLE_MASK)) :
5860 (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER);
5861
5862 NIR_PASS_V(s, dxil_nir_fix_io_uint_type, in_mask, out_mask);
5863 }
5864
5865 NIR_PASS_V(s, dxil_nir_lower_fquantize2f16);
5866 NIR_PASS_V(s, nir_lower_frexp);
5867 NIR_PASS_V(s, nir_lower_flrp, 16 | 32 | 64, true);
5868 NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32);
5869 NIR_PASS_V(s, dxil_nir_ensure_position_writes);
5870 NIR_PASS_V(s, nir_lower_pack);
5871 NIR_PASS_V(s, dxil_nir_lower_system_values);
5872 NIR_PASS_V(s, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_system_value | nir_var_shader_out);
5873
5874 if (ctx->mod.shader_kind == DXIL_HULL_SHADER)
5875 NIR_PASS_V(s, dxil_nir_split_tess_ctrl, &ctx->tess_ctrl_patch_constant_func);
5876
5877 if (ctx->mod.shader_kind == DXIL_HULL_SHADER ||
5878 ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) {
5879 /* Make sure any derefs are gone after lower_io before updating tess level vars */
5880 NIR_PASS_V(s, nir_opt_dce);
5881 NIR_PASS_V(s, dxil_nir_fixup_tess_level_for_domain);
5882 }
5883
5884 optimize_nir(s, opts);
5885
5886 NIR_PASS_V(s, nir_remove_dead_variables,
5887 nir_var_function_temp | nir_var_shader_temp, NULL);
5888
5889 if (!allocate_sysvalues(ctx))
5890 return false;
5891
5892 NIR_PASS_V(s, dxil_nir_lower_sysval_to_load_input, ctx->system_value);
5893 NIR_PASS_V(s, nir_opt_dce);
5894
5895 if (debug_dxil & DXIL_DEBUG_VERBOSE)
5896 nir_print_shader(s, stderr);
5897
5898 if (!emit_module(ctx, opts)) {
5899 debug_printf("D3D12: dxil_container_add_module failed\n");
5900 retval = false;
5901 goto out;
5902 }
5903
5904 assert(ctx->mod.major_version == 6 && ctx->mod.minor_version >= 1);
5905 if ((ctx->mod.major_version << 16 | ctx->mod.minor_version) > opts->shader_model_max) {
5906 debug_printf("D3D12: max shader model exceeded\n");
5907 retval = false;
5908 goto out;
5909 }
5910
5911 assert(ctx->mod.major_validator == 1);
5912 if (!dxil_validator_can_validate_shader_model(ctx->mod.minor_version, ctx->mod.minor_validator)) {
5913 debug_printf("D3D12: shader model exceeds max that can be validated\n");
5914 retval = false;
5915 goto out;
5916 }
5917
5918 if (debug_dxil & DXIL_DEBUG_DUMP_MODULE) {
5919 struct dxil_dumper *dumper = dxil_dump_create();
5920 dxil_dump_module(dumper, &ctx->mod);
5921 fprintf(stderr, "\n");
5922 dxil_dump_buf_to_file(dumper, stderr);
5923 fprintf(stderr, "\n\n");
5924 dxil_dump_free(dumper);
5925 }
5926
5927 struct dxil_container container;
5928 dxil_container_init(&container);
5929 if (!dxil_container_add_features(&container, &ctx->mod.feats)) {
5930 debug_printf("D3D12: dxil_container_add_features failed\n");
5931 retval = false;
5932 goto out;
5933 }
5934
5935 if (!dxil_container_add_io_signature(&container,
5936 DXIL_ISG1,
5937 ctx->mod.num_sig_inputs,
5938 ctx->mod.inputs,
5939 ctx->mod.minor_validator >= 7)) {
5940 debug_printf("D3D12: failed to write input signature\n");
5941 retval = false;
5942 goto out;
5943 }
5944
5945 if (!dxil_container_add_io_signature(&container,
5946 DXIL_OSG1,
5947 ctx->mod.num_sig_outputs,
5948 ctx->mod.outputs,
5949 ctx->mod.minor_validator >= 7)) {
5950 debug_printf("D3D12: failed to write output signature\n");
5951 retval = false;
5952 goto out;
5953 }
5954
5955 if ((ctx->mod.shader_kind == DXIL_HULL_SHADER ||
5956 ctx->mod.shader_kind == DXIL_DOMAIN_SHADER) &&
5957 !dxil_container_add_io_signature(&container,
5958 DXIL_PSG1,
5959 ctx->mod.num_sig_patch_consts,
5960 ctx->mod.patch_consts,
5961 ctx->mod.minor_validator >= 7)) {
5962 debug_printf("D3D12: failed to write patch constant signature\n");
5963 retval = false;
5964 goto out;
5965 }
5966
5967 struct dxil_validation_state validation_state;
5968 memset(&validation_state, 0, sizeof(validation_state));
5969 dxil_fill_validation_state(ctx, &validation_state);
5970
5971 if (!dxil_container_add_state_validation(&container,&ctx->mod,
5972 &validation_state)) {
5973 debug_printf("D3D12: failed to write state-validation\n");
5974 retval = false;
5975 goto out;
5976 }
5977
5978 if (!dxil_container_add_module(&container, &ctx->mod)) {
5979 debug_printf("D3D12: failed to write module\n");
5980 retval = false;
5981 goto out;
5982 }
5983
5984 if (!dxil_container_write(&container, blob)) {
5985 debug_printf("D3D12: dxil_container_write failed\n");
5986 retval = false;
5987 goto out;
5988 }
5989 dxil_container_finish(&container);
5990
5991 if (debug_dxil & DXIL_DEBUG_DUMP_BLOB) {
5992 static int shader_id = 0;
5993 char buffer[64];
5994 snprintf(buffer, sizeof(buffer), "shader_%s_%d.blob",
5995 get_shader_kind_str(ctx->mod.shader_kind), shader_id++);
5996 debug_printf("Try to write blob to %s\n", buffer);
5997 FILE *f = fopen(buffer, "wb");
5998 if (f) {
5999 fwrite(blob->data, 1, blob->size, f);
6000 fclose(f);
6001 }
6002 }
6003
6004 out:
6005 dxil_module_release(&ctx->mod);
6006 ralloc_free(ctx->ralloc_ctx);
6007 free(ctx);
6008 return retval;
6009 }
6010
6011 enum dxil_sysvalue_type
nir_var_to_dxil_sysvalue_type(nir_variable * var,uint64_t other_stage_mask)6012 nir_var_to_dxil_sysvalue_type(nir_variable *var, uint64_t other_stage_mask)
6013 {
6014 switch (var->data.location) {
6015 case VARYING_SLOT_FACE:
6016 return DXIL_GENERATED_SYSVALUE;
6017 case VARYING_SLOT_POS:
6018 case VARYING_SLOT_PRIMITIVE_ID:
6019 case VARYING_SLOT_CLIP_DIST0:
6020 case VARYING_SLOT_CLIP_DIST1:
6021 case VARYING_SLOT_PSIZ:
6022 case VARYING_SLOT_TESS_LEVEL_INNER:
6023 case VARYING_SLOT_TESS_LEVEL_OUTER:
6024 case VARYING_SLOT_VIEWPORT:
6025 case VARYING_SLOT_LAYER:
6026 if (!((1ull << var->data.location) & other_stage_mask))
6027 return DXIL_SYSVALUE;
6028 FALLTHROUGH;
6029 default:
6030 return DXIL_NO_SYSVALUE;
6031 }
6032 }
6033