1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "d3d12_compiler.h"
25 #include "d3d12_context.h"
26 #include "d3d12_debug.h"
27 #include "d3d12_screen.h"
28 #include "d3d12_nir_passes.h"
29 #include "nir_to_dxil.h"
30 #include "dxil_nir.h"
31
32 #include "pipe/p_state.h"
33
34 #include "nir.h"
35 #include "nir/nir_draw_helpers.h"
36 #include "nir/tgsi_to_nir.h"
37 #include "compiler/nir/nir_builder.h"
38 #include "tgsi/tgsi_from_mesa.h"
39 #include "tgsi/tgsi_ureg.h"
40
41 #include "util/u_memory.h"
42 #include "util/u_prim.h"
43 #include "util/u_simple_shaders.h"
44 #include "util/u_dl.h"
45
46 #include <directx/d3d12.h>
47 #include <dxguids/dxguids.h>
48
49 #include <dxcapi.h>
50 #include <wrl/client.h>
51
52 extern "C" {
53 #include "tgsi/tgsi_parse.h"
54 #include "tgsi/tgsi_point_sprite.h"
55 }
56
57 using Microsoft::WRL::ComPtr;
58
59 struct d3d12_validation_tools
60 {
61 d3d12_validation_tools();
62
63 bool validate_and_sign(struct blob *dxil);
64
65 void disassemble(struct blob *dxil);
66
67 void load_dxil_dll();
68
69 struct HModule {
70 HModule();
71 ~HModule();
72
73 bool load(LPCSTR file_name);
74 operator util_dl_library *() const;
75 private:
76 util_dl_library *module;
77 };
78
79 HModule dxil_module;
80 HModule dxc_compiler_module;
81 ComPtr<IDxcCompiler> compiler;
82 ComPtr<IDxcValidator> validator;
83 ComPtr<IDxcLibrary> library;
84 };
85
d3d12_validator_create()86 struct d3d12_validation_tools *d3d12_validator_create()
87 {
88 d3d12_validation_tools *tools = new d3d12_validation_tools();
89 if (tools->validator)
90 return tools;
91 delete tools;
92 return nullptr;
93 }
94
d3d12_validator_destroy(struct d3d12_validation_tools * validator)95 void d3d12_validator_destroy(struct d3d12_validation_tools *validator)
96 {
97 delete validator;
98 }
99
100
101 const void *
d3d12_get_compiler_options(struct pipe_screen * screen,enum pipe_shader_ir ir,enum pipe_shader_type shader)102 d3d12_get_compiler_options(struct pipe_screen *screen,
103 enum pipe_shader_ir ir,
104 enum pipe_shader_type shader)
105 {
106 assert(ir == PIPE_SHADER_IR_NIR);
107 return dxil_get_nir_compiler_options();
108 }
109
110 static uint32_t
resource_dimension(enum glsl_sampler_dim dim)111 resource_dimension(enum glsl_sampler_dim dim)
112 {
113 switch (dim) {
114 case GLSL_SAMPLER_DIM_1D:
115 return RESOURCE_DIMENSION_TEXTURE1D;
116 case GLSL_SAMPLER_DIM_2D:
117 return RESOURCE_DIMENSION_TEXTURE2D;
118 case GLSL_SAMPLER_DIM_3D:
119 return RESOURCE_DIMENSION_TEXTURE3D;
120 case GLSL_SAMPLER_DIM_CUBE:
121 return RESOURCE_DIMENSION_TEXTURECUBE;
122 default:
123 return RESOURCE_DIMENSION_UNKNOWN;
124 }
125 }
126
127 static struct d3d12_shader *
compile_nir(struct d3d12_context * ctx,struct d3d12_shader_selector * sel,struct d3d12_shader_key * key,struct nir_shader * nir)128 compile_nir(struct d3d12_context *ctx, struct d3d12_shader_selector *sel,
129 struct d3d12_shader_key *key, struct nir_shader *nir)
130 {
131 struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
132 struct d3d12_shader *shader = rzalloc(sel, d3d12_shader);
133 shader->key = *key;
134 shader->nir = nir;
135 sel->current = shader;
136
137 NIR_PASS_V(nir, nir_lower_samplers);
138 NIR_PASS_V(nir, dxil_nir_create_bare_samplers);
139
140 if (key->samples_int_textures)
141 NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex,
142 key->tex_wrap_states, key->swizzle_state,
143 screen->base.get_paramf(&screen->base, PIPE_CAPF_MAX_TEXTURE_LOD_BIAS));
144
145 if (key->vs.needs_format_emulation)
146 d3d12_nir_lower_vs_vertex_conversion(nir, key->vs.format_conversion);
147
148 uint32_t num_ubos_before_lower_to_ubo = nir->info.num_ubos;
149 uint32_t num_uniforms_before_lower_to_ubo = nir->num_uniforms;
150 NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, false);
151 shader->has_default_ubo0 = num_uniforms_before_lower_to_ubo > 0 &&
152 nir->info.num_ubos > num_ubos_before_lower_to_ubo;
153
154 if (key->last_vertex_processing_stage) {
155 if (key->invert_depth)
156 NIR_PASS_V(nir, d3d12_nir_invert_depth);
157 NIR_PASS_V(nir, nir_lower_clip_halfz);
158 NIR_PASS_V(nir, d3d12_lower_yflip);
159 }
160 NIR_PASS_V(nir, nir_lower_packed_ubo_loads);
161 NIR_PASS_V(nir, d3d12_lower_load_first_vertex);
162 NIR_PASS_V(nir, d3d12_lower_state_vars, shader);
163 NIR_PASS_V(nir, dxil_nir_lower_bool_input);
164
165 struct nir_to_dxil_options opts = {};
166 opts.interpolate_at_vertex = screen->have_load_at_vertex;
167 opts.lower_int16 = !screen->opts4.Native16BitShaderOpsSupported;
168 opts.ubo_binding_offset = shader->has_default_ubo0 ? 0 : 1;
169 opts.provoking_vertex = key->fs.provoking_vertex;
170
171 struct blob tmp;
172 if (!nir_to_dxil(nir, &opts, &tmp)) {
173 debug_printf("D3D12: nir_to_dxil failed\n");
174 return NULL;
175 }
176
177 // Non-ubo variables
178 shader->begin_srv_binding = (UINT_MAX);
179 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
180 auto type = glsl_without_array(var->type);
181 if (glsl_type_is_sampler(type) && glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
182 unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
183 for (unsigned i = 0; i < count; ++i) {
184 shader->srv_bindings[var->data.binding + i].binding = var->data.binding;
185 shader->srv_bindings[var->data.binding + i].dimension = resource_dimension(glsl_get_sampler_dim(type));
186 }
187 shader->begin_srv_binding = MIN2(var->data.binding, shader->begin_srv_binding);
188 shader->end_srv_binding = MAX2(var->data.binding + count, shader->end_srv_binding);
189 }
190 }
191
192 // Ubo variables
193 if(nir->info.num_ubos) {
194 // Ignore state_vars ubo as it is bound as root constants
195 unsigned num_ubo_bindings = nir->info.num_ubos - (shader->state_vars_used ? 1 : 0);
196 for(unsigned i = opts.ubo_binding_offset; i < num_ubo_bindings; ++i) {
197 shader->cb_bindings[shader->num_cb_bindings++].binding = i;
198 }
199 }
200 if (ctx->validation_tools) {
201 ctx->validation_tools->validate_and_sign(&tmp);
202
203 if (d3d12_debug & D3D12_DEBUG_DISASS) {
204 ctx->validation_tools->disassemble(&tmp);
205 }
206 }
207
208 blob_finish_get_buffer(&tmp, &shader->bytecode, &shader->bytecode_length);
209
210 if (d3d12_debug & D3D12_DEBUG_DXIL) {
211 char buf[256];
212 static int i;
213 snprintf(buf, sizeof(buf), "dump%02d.dxil", i++);
214 FILE *fp = fopen(buf, "wb");
215 fwrite(shader->bytecode, sizeof(char), shader->bytecode_length, fp);
216 fclose(fp);
217 fprintf(stderr, "wrote '%s'...\n", buf);
218 }
219 return shader;
220 }
221
222 struct d3d12_selection_context {
223 struct d3d12_context *ctx;
224 const struct pipe_draw_info *dinfo;
225 bool needs_point_sprite_lowering;
226 bool needs_vertex_reordering;
227 unsigned provoking_vertex;
228 bool alternate_tri;
229 unsigned fill_mode_lowered;
230 unsigned cull_mode_lowered;
231 bool manual_depth_range;
232 unsigned missing_dual_src_outputs;
233 unsigned frag_result_color_lowering;
234 };
235
236 static unsigned
missing_dual_src_outputs(struct d3d12_context * ctx)237 missing_dual_src_outputs(struct d3d12_context *ctx)
238 {
239 if (!ctx->gfx_pipeline_state.blend->is_dual_src)
240 return 0;
241
242 struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
243 nir_shader *s = fs->initial;
244
245 unsigned indices_seen = 0;
246 nir_foreach_function(function, s) {
247 if (function->impl) {
248 nir_foreach_block(block, function->impl) {
249 nir_foreach_instr(instr, block) {
250 if (instr->type != nir_instr_type_intrinsic)
251 continue;
252
253 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
254 if (intr->intrinsic != nir_intrinsic_store_deref)
255 continue;
256
257 nir_variable *var = nir_intrinsic_get_var(intr, 0);
258 if (var->data.mode != nir_var_shader_out ||
259 (var->data.location != FRAG_RESULT_COLOR &&
260 var->data.location != FRAG_RESULT_DATA0))
261 continue;
262
263 indices_seen |= 1u << var->data.index;
264 if ((indices_seen & 3) == 3)
265 return 0;
266 }
267 }
268 }
269 }
270
271 return 3 & ~indices_seen;
272 }
273
274 static unsigned
frag_result_color_lowering(struct d3d12_context * ctx)275 frag_result_color_lowering(struct d3d12_context *ctx)
276 {
277 struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
278 assert(fs);
279
280 if (fs->initial->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR))
281 return ctx->fb.nr_cbufs > 1 ? ctx->fb.nr_cbufs : 0;
282
283 return 0;
284 }
285
286 static bool
manual_depth_range(struct d3d12_context * ctx)287 manual_depth_range(struct d3d12_context *ctx)
288 {
289 if (!d3d12_need_zero_one_depth_range(ctx))
290 return false;
291
292 /**
293 * If we can't use the D3D12 zero-one depth-range, we might have to apply
294 * depth-range ourselves.
295 *
296 * Because we only need to override the depth-range to zero-one range in
297 * the case where we write frag-depth, we only need to apply manual
298 * depth-range to gl_FragCoord.z.
299 *
300 * No extra care is needed to be taken in the case where gl_FragDepth is
301 * written conditionally, because the GLSL 4.60 spec states:
302 *
303 * If a shader statically assigns a value to gl_FragDepth, and there
304 * is an execution path through the shader that does not set
305 * gl_FragDepth, then the value of the fragment’s depth may be
306 * undefined for executions of the shader that take that path. That
307 * is, if the set of linked fragment shaders statically contain a
308 * write to gl_FragDepth, then it is responsible for always writing
309 * it.
310 */
311
312 struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
313 return fs && fs->initial->info.inputs_read & VARYING_BIT_POS;
314 }
315
316 static bool
needs_edge_flag_fix(enum pipe_prim_type mode)317 needs_edge_flag_fix(enum pipe_prim_type mode)
318 {
319 return (mode == PIPE_PRIM_QUADS ||
320 mode == PIPE_PRIM_QUAD_STRIP ||
321 mode == PIPE_PRIM_POLYGON);
322 }
323
324 static unsigned
fill_mode_lowered(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)325 fill_mode_lowered(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
326 {
327 struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
328
329 if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
330 !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_gs_variant) ||
331 ctx->gfx_pipeline_state.rast == NULL ||
332 (dinfo->mode != PIPE_PRIM_TRIANGLES &&
333 dinfo->mode != PIPE_PRIM_TRIANGLE_STRIP))
334 return PIPE_POLYGON_MODE_FILL;
335
336 /* D3D12 supports line mode (wireframe) but doesn't support edge flags */
337 if (((ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_LINE &&
338 ctx->gfx_pipeline_state.rast->base.cull_face != PIPE_FACE_FRONT) ||
339 (ctx->gfx_pipeline_state.rast->base.fill_back == PIPE_POLYGON_MODE_LINE &&
340 ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_FRONT)) &&
341 (vs->initial->info.outputs_written & VARYING_BIT_EDGE ||
342 needs_edge_flag_fix(ctx->initial_api_prim)))
343 return PIPE_POLYGON_MODE_LINE;
344
345 if (ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_POINT)
346 return PIPE_POLYGON_MODE_POINT;
347
348 return PIPE_POLYGON_MODE_FILL;
349 }
350
351 static bool
needs_point_sprite_lowering(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)352 needs_point_sprite_lowering(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
353 {
354 struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
355 struct d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
356
357 if (gs != NULL && !gs->is_gs_variant) {
358 /* There is an user GS; Check if it outputs points with PSIZE */
359 return (gs->initial->info.gs.output_primitive == GL_POINTS &&
360 gs->initial->info.outputs_written & VARYING_BIT_PSIZ);
361 } else {
362 /* No user GS; check if we are drawing wide points */
363 return ((dinfo->mode == PIPE_PRIM_POINTS ||
364 fill_mode_lowered(ctx, dinfo) == PIPE_POLYGON_MODE_POINT) &&
365 (ctx->gfx_pipeline_state.rast->base.point_size > 1.0 ||
366 ctx->gfx_pipeline_state.rast->base.offset_point ||
367 (ctx->gfx_pipeline_state.rast->base.point_size_per_vertex &&
368 vs->initial->info.outputs_written & VARYING_BIT_PSIZ)) &&
369 (vs->initial->info.outputs_written & VARYING_BIT_POS));
370 }
371 }
372
373 static unsigned
cull_mode_lowered(struct d3d12_context * ctx,unsigned fill_mode)374 cull_mode_lowered(struct d3d12_context *ctx, unsigned fill_mode)
375 {
376 if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
377 !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_gs_variant) ||
378 ctx->gfx_pipeline_state.rast == NULL ||
379 ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_NONE)
380 return PIPE_FACE_NONE;
381
382 return ctx->gfx_pipeline_state.rast->base.cull_face;
383 }
384
385 static unsigned
get_provoking_vertex(struct d3d12_selection_context * sel_ctx,bool * alternate)386 get_provoking_vertex(struct d3d12_selection_context *sel_ctx, bool *alternate)
387 {
388 struct d3d12_shader_selector *vs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_VERTEX];
389 struct d3d12_shader_selector *gs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
390 struct d3d12_shader_selector *last_vertex_stage = gs && !gs->is_gs_variant ? gs : vs;
391
392 /* Make sure GL prims match Gallium prims */
393 STATIC_ASSERT(GL_POINTS == PIPE_PRIM_POINTS);
394 STATIC_ASSERT(GL_LINES == PIPE_PRIM_LINES);
395 STATIC_ASSERT(GL_LINE_STRIP == PIPE_PRIM_LINE_STRIP);
396
397 enum pipe_prim_type mode;
398 switch (last_vertex_stage->stage) {
399 case PIPE_SHADER_GEOMETRY:
400 mode = (enum pipe_prim_type)last_vertex_stage->current->nir->info.gs.output_primitive;
401 break;
402 case PIPE_SHADER_VERTEX:
403 mode = sel_ctx->dinfo ? (enum pipe_prim_type)sel_ctx->dinfo->mode : PIPE_PRIM_TRIANGLES;
404 break;
405 default:
406 unreachable("Tesselation shaders are not supported");
407 }
408
409 bool flatshade_first = sel_ctx->ctx->gfx_pipeline_state.rast &&
410 sel_ctx->ctx->gfx_pipeline_state.rast->base.flatshade_first;
411 *alternate = (mode == GL_TRIANGLE_STRIP || mode == GL_TRIANGLE_STRIP_ADJACENCY) &&
412 (!gs || gs->is_gs_variant ||
413 gs->initial->info.gs.vertices_out > u_prim_vertex_count(mode)->min);
414 return flatshade_first ? 0 : u_prim_vertex_count(mode)->min - 1;
415 }
416
417 static bool
has_flat_varyings(struct d3d12_context * ctx)418 has_flat_varyings(struct d3d12_context *ctx)
419 {
420 struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
421
422 if (!fs || !fs->current)
423 return false;
424
425 nir_foreach_variable_with_modes(input, fs->current->nir,
426 nir_var_shader_in) {
427 if (input->data.interpolation == INTERP_MODE_FLAT)
428 return true;
429 }
430
431 return false;
432 }
433
434 static bool
needs_vertex_reordering(struct d3d12_selection_context * sel_ctx)435 needs_vertex_reordering(struct d3d12_selection_context *sel_ctx)
436 {
437 struct d3d12_context *ctx = sel_ctx->ctx;
438 bool flat = has_flat_varyings(ctx);
439 bool xfb = ctx->gfx_pipeline_state.num_so_targets > 0;
440
441 if (fill_mode_lowered(ctx, sel_ctx->dinfo) != PIPE_POLYGON_MODE_FILL)
442 return false;
443
444 /* TODO add support for line primitives */
445
446 /* When flat shading a triangle and provoking vertex is not the first one, we use load_at_vertex.
447 If not available for this adapter, or if it's a triangle strip, we need to reorder the vertices */
448 if (flat && sel_ctx->provoking_vertex >= 2 && (!d3d12_screen(ctx->base.screen)->have_load_at_vertex ||
449 sel_ctx->alternate_tri))
450 return true;
451
452 /* When transform feedback is enabled and the output is alternating (triangle strip or triangle
453 strip with adjacency), we need to reorder vertices to get the order expected by OpenGL. This
454 only works when there is no flat shading involved. In that scenario, we don't care about
455 the provoking vertex. */
456 if (xfb && !flat && sel_ctx->alternate_tri) {
457 sel_ctx->provoking_vertex = 0;
458 return true;
459 }
460
461 return false;
462 }
463
464 static nir_variable *
create_varying_from_info(nir_shader * nir,struct d3d12_varying_info * info,unsigned slot,nir_variable_mode mode)465 create_varying_from_info(nir_shader *nir, struct d3d12_varying_info *info,
466 unsigned slot, nir_variable_mode mode)
467 {
468 nir_variable *var;
469 char tmp[100];
470
471 snprintf(tmp, ARRAY_SIZE(tmp),
472 mode == nir_var_shader_in ? "in_%d" : "out_%d",
473 info->vars[slot].driver_location);
474 var = nir_variable_create(nir, mode, info->vars[slot].type, tmp);
475 var->data.location = slot;
476 var->data.driver_location = info->vars[slot].driver_location;
477 var->data.interpolation = info->vars[slot].interpolation;
478
479 return var;
480 }
481
482 static void
fill_varyings(struct d3d12_varying_info * info,nir_shader * s,nir_variable_mode modes,uint64_t mask)483 fill_varyings(struct d3d12_varying_info *info, nir_shader *s,
484 nir_variable_mode modes, uint64_t mask)
485 {
486 nir_foreach_variable_with_modes(var, s, modes) {
487 unsigned slot = var->data.location;
488 uint64_t slot_bit = BITFIELD64_BIT(slot);
489
490 if (!(mask & slot_bit))
491 continue;
492 info->vars[slot].driver_location = var->data.driver_location;
493 info->vars[slot].type = var->type;
494 info->vars[slot].interpolation = var->data.interpolation;
495 info->mask |= slot_bit;
496 }
497 }
498
499 static void
fill_flat_varyings(struct d3d12_gs_variant_key * key,d3d12_shader_selector * fs)500 fill_flat_varyings(struct d3d12_gs_variant_key *key, d3d12_shader_selector *fs)
501 {
502 if (!fs || !fs->current)
503 return;
504
505 nir_foreach_variable_with_modes(input, fs->current->nir,
506 nir_var_shader_in) {
507 if (input->data.interpolation == INTERP_MODE_FLAT)
508 key->flat_varyings |= BITFIELD64_BIT(input->data.location);
509 }
510 }
511
512 static void
validate_geometry_shader_variant(struct d3d12_selection_context * sel_ctx)513 validate_geometry_shader_variant(struct d3d12_selection_context *sel_ctx)
514 {
515 struct d3d12_context *ctx = sel_ctx->ctx;
516 d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
517 d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
518 struct d3d12_gs_variant_key key = {0};
519 bool variant_needed = false;
520
521 d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
522
523 /* Nothing to do if there is a user geometry shader bound */
524 if (gs != NULL && !gs->is_gs_variant)
525 return;
526
527 /* Fill the geometry shader variant key */
528 if (sel_ctx->fill_mode_lowered != PIPE_POLYGON_MODE_FILL) {
529 key.fill_mode = sel_ctx->fill_mode_lowered;
530 key.cull_mode = sel_ctx->cull_mode_lowered;
531 key.has_front_face = BITSET_TEST(fs->initial->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
532 if (key.cull_mode != PIPE_FACE_NONE || key.has_front_face)
533 key.front_ccw = ctx->gfx_pipeline_state.rast->base.front_ccw ^ (ctx->flip_y < 0);
534 key.edge_flag_fix = needs_edge_flag_fix(ctx->initial_api_prim);
535 fill_flat_varyings(&key, fs);
536 if (key.flat_varyings != 0)
537 key.flatshade_first = ctx->gfx_pipeline_state.rast->base.flatshade_first;
538 variant_needed = true;
539 } else if (sel_ctx->needs_point_sprite_lowering) {
540 key.passthrough = true;
541 variant_needed = true;
542 } else if (sel_ctx->needs_vertex_reordering) {
543 /* TODO support cases where flat shading (pv != 0) and xfb are enabled */
544 key.provoking_vertex = sel_ctx->provoking_vertex;
545 key.alternate_tri = sel_ctx->alternate_tri;
546 variant_needed = true;
547 }
548
549 if (variant_needed) {
550 fill_varyings(&key.varyings, vs->initial, nir_var_shader_out,
551 vs->initial->info.outputs_written);
552 }
553
554 /* Check if the currently bound geometry shader variant is correct */
555 if (gs && memcmp(&gs->gs_key, &key, sizeof(key)) == 0)
556 return;
557
558 /* Find/create the proper variant and bind it */
559 gs = variant_needed ? d3d12_get_gs_variant(ctx, &key) : NULL;
560 ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = gs;
561 }
562
563 static bool
d3d12_compare_shader_keys(const d3d12_shader_key * expect,const d3d12_shader_key * have)564 d3d12_compare_shader_keys(const d3d12_shader_key *expect, const d3d12_shader_key *have)
565 {
566 assert(expect->stage == have->stage);
567 assert(expect);
568 assert(have);
569
570 /* Because we only add varyings we check that a shader has at least the expected in-
571 * and outputs. */
572 if (memcmp(&expect->required_varying_inputs, &have->required_varying_inputs,
573 sizeof(struct d3d12_varying_info)) ||
574 memcmp(&expect->required_varying_outputs, &have->required_varying_outputs,
575 sizeof(struct d3d12_varying_info)) ||
576 (expect->next_varying_inputs != have->next_varying_inputs) ||
577 (expect->prev_varying_outputs != have->prev_varying_outputs))
578 return false;
579
580 if (expect->stage == PIPE_SHADER_GEOMETRY) {
581 if (expect->gs.writes_psize) {
582 if (!have->gs.writes_psize ||
583 expect->gs.point_pos_stream_out != have->gs.point_pos_stream_out ||
584 expect->gs.sprite_coord_enable != have->gs.sprite_coord_enable ||
585 expect->gs.sprite_origin_upper_left != have->gs.sprite_origin_upper_left ||
586 expect->gs.point_size_per_vertex != have->gs.point_size_per_vertex)
587 return false;
588 } else if (have->gs.writes_psize) {
589 return false;
590 }
591 if (expect->gs.primitive_id != have->gs.primitive_id ||
592 expect->gs.triangle_strip != have->gs.triangle_strip)
593 return false;
594 } else if (expect->stage == PIPE_SHADER_FRAGMENT) {
595 if (expect->fs.frag_result_color_lowering != have->fs.frag_result_color_lowering ||
596 expect->fs.manual_depth_range != have->fs.manual_depth_range ||
597 expect->fs.polygon_stipple != have->fs.polygon_stipple ||
598 expect->fs.cast_to_uint != have->fs.cast_to_uint ||
599 expect->fs.cast_to_int != have->fs.cast_to_int)
600 return false;
601 }
602
603 if (expect->tex_saturate_s != have->tex_saturate_s ||
604 expect->tex_saturate_r != have->tex_saturate_r ||
605 expect->tex_saturate_t != have->tex_saturate_t)
606 return false;
607
608 if (expect->samples_int_textures != have->samples_int_textures)
609 return false;
610
611 if (expect->n_texture_states != have->n_texture_states)
612 return false;
613
614 if (memcmp(expect->tex_wrap_states, have->tex_wrap_states,
615 expect->n_texture_states * sizeof(dxil_wrap_sampler_state)))
616 return false;
617
618 if (memcmp(expect->swizzle_state, have->swizzle_state,
619 expect->n_texture_states * sizeof(dxil_texture_swizzle_state)))
620 return false;
621
622 if (memcmp(expect->sampler_compare_funcs, have->sampler_compare_funcs,
623 expect->n_texture_states * sizeof(enum compare_func)))
624 return false;
625
626 if (expect->invert_depth != have->invert_depth)
627 return false;
628
629 if (expect->stage == PIPE_SHADER_VERTEX) {
630 if (expect->vs.needs_format_emulation != have->vs.needs_format_emulation)
631 return false;
632
633 if (expect->vs.needs_format_emulation) {
634 if (memcmp(expect->vs.format_conversion, have->vs.format_conversion,
635 PIPE_MAX_ATTRIBS * sizeof (enum pipe_format)))
636 return false;
637 }
638 }
639
640 if (expect->fs.provoking_vertex != have->fs.provoking_vertex)
641 return false;
642
643 return true;
644 }
645
646 static void
d3d12_fill_shader_key(struct d3d12_selection_context * sel_ctx,d3d12_shader_key * key,d3d12_shader_selector * sel,d3d12_shader_selector * prev,d3d12_shader_selector * next)647 d3d12_fill_shader_key(struct d3d12_selection_context *sel_ctx,
648 d3d12_shader_key *key, d3d12_shader_selector *sel,
649 d3d12_shader_selector *prev, d3d12_shader_selector *next)
650 {
651 pipe_shader_type stage = sel->stage;
652
653 uint64_t system_generated_in_values =
654 VARYING_BIT_PNTC |
655 VARYING_BIT_PRIMITIVE_ID;
656
657 uint64_t system_out_values =
658 VARYING_BIT_CLIP_DIST0 |
659 VARYING_BIT_CLIP_DIST1;
660
661 memset(key, 0, sizeof(d3d12_shader_key));
662 key->stage = stage;
663
664 if (prev) {
665 /* We require as inputs what the previous stage has written,
666 * except certain system values */
667 if (stage == PIPE_SHADER_FRAGMENT || stage == PIPE_SHADER_GEOMETRY)
668 system_out_values |= VARYING_BIT_POS;
669 if (stage == PIPE_SHADER_FRAGMENT)
670 system_out_values |= VARYING_BIT_PSIZ;
671 uint64_t mask = prev->current->nir->info.outputs_written & ~system_out_values;
672 fill_varyings(&key->required_varying_inputs, prev->current->nir,
673 nir_var_shader_out, mask);
674 key->prev_varying_outputs = prev->current->nir->info.outputs_written;
675
676 /* Set the provoking vertex based on the previous shader output. Only set the
677 * key value if the driver actually supports changing the provoking vertex though */
678 if (stage == PIPE_SHADER_FRAGMENT && sel_ctx->ctx->gfx_pipeline_state.rast &&
679 !sel_ctx->needs_vertex_reordering &&
680 d3d12_screen(sel_ctx->ctx->base.screen)->have_load_at_vertex)
681 key->fs.provoking_vertex = sel_ctx->provoking_vertex;
682 }
683
684 /* We require as outputs what the next stage reads,
685 * except certain system values */
686 if (next) {
687 if (!next->is_gs_variant) {
688 if (stage == PIPE_SHADER_VERTEX)
689 system_generated_in_values |= VARYING_BIT_POS;
690 uint64_t mask = next->current->nir->info.inputs_read & ~system_generated_in_values;
691 fill_varyings(&key->required_varying_outputs, next->current->nir,
692 nir_var_shader_in, mask);
693 }
694 key->next_varying_inputs = next->current->nir->info.inputs_read;
695 }
696
697 if (stage == PIPE_SHADER_GEOMETRY ||
698 (stage == PIPE_SHADER_VERTEX && (!next || next->stage != PIPE_SHADER_GEOMETRY))) {
699 key->last_vertex_processing_stage = 1;
700 key->invert_depth = sel_ctx->ctx->reverse_depth_range;
701 if (sel_ctx->ctx->pstipple.enabled)
702 key->next_varying_inputs |= VARYING_BIT_POS;
703 }
704
705 if (stage == PIPE_SHADER_GEOMETRY && sel_ctx->ctx->gfx_pipeline_state.rast) {
706 struct pipe_rasterizer_state *rast = &sel_ctx->ctx->gfx_pipeline_state.rast->base;
707 if (sel_ctx->needs_point_sprite_lowering) {
708 key->gs.writes_psize = 1;
709 key->gs.point_size_per_vertex = rast->point_size_per_vertex;
710 key->gs.sprite_coord_enable = rast->sprite_coord_enable;
711 key->gs.sprite_origin_upper_left = (rast->sprite_coord_mode != PIPE_SPRITE_COORD_LOWER_LEFT);
712 if (sel_ctx->ctx->flip_y < 0)
713 key->gs.sprite_origin_upper_left = !key->gs.sprite_origin_upper_left;
714 key->gs.aa_point = rast->point_smooth;
715 key->gs.stream_output_factor = 6;
716 } else if (sel_ctx->fill_mode_lowered == PIPE_POLYGON_MODE_LINE) {
717 key->gs.stream_output_factor = 2;
718 } else if (sel_ctx->needs_vertex_reordering && !sel->is_gs_variant) {
719 key->gs.triangle_strip = 1;
720 }
721
722 if (sel->is_gs_variant && next && next->initial->info.inputs_read & VARYING_BIT_PRIMITIVE_ID)
723 key->gs.primitive_id = 1;
724 } else if (stage == PIPE_SHADER_FRAGMENT) {
725 key->fs.missing_dual_src_outputs = sel_ctx->missing_dual_src_outputs;
726 key->fs.frag_result_color_lowering = sel_ctx->frag_result_color_lowering;
727 key->fs.manual_depth_range = sel_ctx->manual_depth_range;
728 key->fs.polygon_stipple = sel_ctx->ctx->pstipple.enabled;
729 if (sel_ctx->ctx->gfx_pipeline_state.blend &&
730 sel_ctx->ctx->gfx_pipeline_state.blend->desc.RenderTarget[0].LogicOpEnable &&
731 !sel_ctx->ctx->gfx_pipeline_state.has_float_rtv) {
732 key->fs.cast_to_uint = util_format_is_unorm(sel_ctx->ctx->fb.cbufs[0]->format);
733 key->fs.cast_to_int = !key->fs.cast_to_uint;
734 }
735 }
736
737 if (sel->samples_int_textures) {
738 key->samples_int_textures = sel->samples_int_textures;
739 key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
740 /* Copy only states with integer textures */
741 for(int i = 0; i < key->n_texture_states; ++i) {
742 auto& wrap_state = sel_ctx->ctx->tex_wrap_states[stage][i];
743 if (wrap_state.is_int_sampler) {
744 memcpy(&key->tex_wrap_states[i], &wrap_state, sizeof(wrap_state));
745 key->swizzle_state[i] = sel_ctx->ctx->tex_swizzle_state[stage][i];
746 }
747 }
748 }
749
750 for (unsigned i = 0; i < sel_ctx->ctx->num_samplers[stage]; ++i) {
751 if (!sel_ctx->ctx->samplers[stage][i] ||
752 sel_ctx->ctx->samplers[stage][i]->filter == PIPE_TEX_FILTER_NEAREST)
753 continue;
754
755 if (sel_ctx->ctx->samplers[stage][i]->wrap_r == PIPE_TEX_WRAP_CLAMP)
756 key->tex_saturate_r |= 1 << i;
757 if (sel_ctx->ctx->samplers[stage][i]->wrap_s == PIPE_TEX_WRAP_CLAMP)
758 key->tex_saturate_s |= 1 << i;
759 if (sel_ctx->ctx->samplers[stage][i]->wrap_t == PIPE_TEX_WRAP_CLAMP)
760 key->tex_saturate_t |= 1 << i;
761 }
762
763 if (sel->compare_with_lod_bias_grad) {
764 key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
765 memcpy(key->sampler_compare_funcs, sel_ctx->ctx->tex_compare_func[stage],
766 key->n_texture_states * sizeof(enum compare_func));
767 memcpy(key->swizzle_state, sel_ctx->ctx->tex_swizzle_state[stage],
768 key->n_texture_states * sizeof(dxil_texture_swizzle_state));
769 }
770
771 if (stage == PIPE_SHADER_VERTEX && sel_ctx->ctx->gfx_pipeline_state.ves) {
772 key->vs.needs_format_emulation = sel_ctx->ctx->gfx_pipeline_state.ves->needs_format_emulation;
773 if (key->vs.needs_format_emulation) {
774 memcpy(key->vs.format_conversion, sel_ctx->ctx->gfx_pipeline_state.ves->format_conversion,
775 sel_ctx->ctx->gfx_pipeline_state.ves->num_elements * sizeof(enum pipe_format));
776 }
777 }
778
779 if (stage == PIPE_SHADER_FRAGMENT &&
780 sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY] &&
781 sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_gs_variant &&
782 sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->gs_key.has_front_face) {
783 key->fs.remap_front_facing = 1;
784 }
785 }
786
787 static void
select_shader_variant(struct d3d12_selection_context * sel_ctx,d3d12_shader_selector * sel,d3d12_shader_selector * prev,d3d12_shader_selector * next)788 select_shader_variant(struct d3d12_selection_context *sel_ctx, d3d12_shader_selector *sel,
789 d3d12_shader_selector *prev, d3d12_shader_selector *next)
790 {
791 struct d3d12_context *ctx = sel_ctx->ctx;
792 d3d12_shader_key key;
793 nir_shader *new_nir_variant;
794 unsigned pstipple_binding = UINT32_MAX;
795
796 d3d12_fill_shader_key(sel_ctx, &key, sel, prev, next);
797
798 /* Check for an existing variant */
799 for (d3d12_shader *variant = sel->first; variant;
800 variant = variant->next_variant) {
801
802 if (d3d12_compare_shader_keys(&key, &variant->key)) {
803 sel->current = variant;
804 return;
805 }
806 }
807
808 /* Clone the NIR shader */
809 new_nir_variant = nir_shader_clone(sel, sel->initial);
810
811 /* Apply any needed lowering passes */
812 if (key.gs.writes_psize) {
813 NIR_PASS_V(new_nir_variant, d3d12_lower_point_sprite,
814 !key.gs.sprite_origin_upper_left,
815 key.gs.point_size_per_vertex,
816 key.gs.sprite_coord_enable,
817 key.next_varying_inputs);
818
819 nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
820 nir_shader_gather_info(new_nir_variant, impl);
821 }
822
823 if (key.gs.primitive_id) {
824 NIR_PASS_V(new_nir_variant, d3d12_lower_primitive_id);
825
826 nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
827 nir_shader_gather_info(new_nir_variant, impl);
828 }
829
830 if (key.gs.triangle_strip)
831 NIR_PASS_V(new_nir_variant, d3d12_lower_triangle_strip);
832
833 if (key.fs.polygon_stipple) {
834 NIR_PASS_V(new_nir_variant, nir_lower_pstipple_fs,
835 &pstipple_binding, 0, false);
836
837 nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
838 nir_shader_gather_info(new_nir_variant, impl);
839 }
840
841 if (key.fs.remap_front_facing) {
842 d3d12_forward_front_face(new_nir_variant);
843
844 nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
845 nir_shader_gather_info(new_nir_variant, impl);
846 }
847
848 if (key.fs.missing_dual_src_outputs) {
849 NIR_PASS_V(new_nir_variant, d3d12_add_missing_dual_src_target,
850 key.fs.missing_dual_src_outputs);
851 } else if (key.fs.frag_result_color_lowering) {
852 NIR_PASS_V(new_nir_variant, nir_lower_fragcolor,
853 key.fs.frag_result_color_lowering);
854 }
855
856 if (key.fs.manual_depth_range)
857 NIR_PASS_V(new_nir_variant, d3d12_lower_depth_range);
858
859 if (sel->compare_with_lod_bias_grad)
860 NIR_PASS_V(new_nir_variant, d3d12_lower_sample_tex_compare, key.n_texture_states,
861 key.sampler_compare_funcs, key.swizzle_state);
862
863 if (key.fs.cast_to_uint)
864 NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, false);
865 if (key.fs.cast_to_int)
866 NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, true);
867
868 {
869 struct nir_lower_tex_options tex_options = { };
870 tex_options.lower_txp = ~0u; /* No equivalent for textureProj */
871 tex_options.lower_rect = true;
872 tex_options.lower_rect_offset = true;
873 tex_options.saturate_s = key.tex_saturate_s;
874 tex_options.saturate_r = key.tex_saturate_r;
875 tex_options.saturate_t = key.tex_saturate_t;
876
877 NIR_PASS_V(new_nir_variant, nir_lower_tex, &tex_options);
878 }
879
880 /* Add the needed in and outputs, and re-sort */
881 uint64_t mask = key.required_varying_inputs.mask & ~new_nir_variant->info.inputs_read;
882
883 if (prev) {
884 while (mask) {
885 int slot = u_bit_scan64(&mask);
886 create_varying_from_info(new_nir_variant, &key.required_varying_inputs, slot, nir_var_shader_in);
887 }
888 dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_in,
889 key.prev_varying_outputs);
890 }
891
892 mask = key.required_varying_outputs.mask & ~new_nir_variant->info.outputs_written;
893
894 if (next) {
895 while (mask) {
896 int slot = u_bit_scan64(&mask);
897 create_varying_from_info(new_nir_variant, &key.required_varying_outputs, slot, nir_var_shader_out);
898 }
899 dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_out,
900 key.next_varying_inputs);
901 }
902
903 d3d12_shader *new_variant = compile_nir(ctx, sel, &key, new_nir_variant);
904 assert(new_variant);
905
906 /* keep track of polygon stipple texture binding */
907 new_variant->pstipple_binding = pstipple_binding;
908
909 /* prepend the new shader in the selector chain and pick it */
910 new_variant->next_variant = sel->first;
911 sel->current = sel->first = new_variant;
912 }
913
914 static d3d12_shader_selector *
get_prev_shader(struct d3d12_context * ctx,pipe_shader_type current)915 get_prev_shader(struct d3d12_context *ctx, pipe_shader_type current)
916 {
917 /* No TESS_CTRL or TESS_EVAL yet */
918
919 switch (current) {
920 case PIPE_SHADER_VERTEX:
921 return NULL;
922 case PIPE_SHADER_FRAGMENT:
923 if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
924 return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
925 FALLTHROUGH;
926 case PIPE_SHADER_GEOMETRY:
927 return ctx->gfx_stages[PIPE_SHADER_VERTEX];
928 default:
929 unreachable("shader type not supported");
930 }
931 }
932
933 static d3d12_shader_selector *
get_next_shader(struct d3d12_context * ctx,pipe_shader_type current)934 get_next_shader(struct d3d12_context *ctx, pipe_shader_type current)
935 {
936 /* No TESS_CTRL or TESS_EVAL yet */
937
938 switch (current) {
939 case PIPE_SHADER_VERTEX:
940 if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
941 return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
942 FALLTHROUGH;
943 case PIPE_SHADER_GEOMETRY:
944 return ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
945 case PIPE_SHADER_FRAGMENT:
946 return NULL;
947 default:
948 unreachable("shader type not supported");
949 }
950 }
951
952 enum tex_scan_flags {
953 TEX_SAMPLE_INTEGER_TEXTURE = 1 << 0,
954 TEX_CMP_WITH_LOD_BIAS_GRAD = 1 << 1,
955 TEX_SCAN_ALL_FLAGS = (1 << 2) - 1
956 };
957
958 static unsigned
scan_texture_use(nir_shader * nir)959 scan_texture_use(nir_shader *nir)
960 {
961 unsigned result = 0;
962 nir_foreach_function(func, nir) {
963 nir_foreach_block(block, func->impl) {
964 nir_foreach_instr(instr, block) {
965 if (instr->type == nir_instr_type_tex) {
966 auto tex = nir_instr_as_tex(instr);
967 switch (tex->op) {
968 case nir_texop_txb:
969 case nir_texop_txl:
970 case nir_texop_txd:
971 if (tex->is_shadow)
972 result |= TEX_CMP_WITH_LOD_BIAS_GRAD;
973 FALLTHROUGH;
974 case nir_texop_tex:
975 if (tex->dest_type & (nir_type_int | nir_type_uint))
976 result |= TEX_SAMPLE_INTEGER_TEXTURE;
977 default:
978 ;
979 }
980 }
981 if (TEX_SCAN_ALL_FLAGS == result)
982 return result;
983 }
984 }
985 }
986 return result;
987 }
988
989 static uint64_t
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)990 update_so_info(struct pipe_stream_output_info *so_info,
991 uint64_t outputs_written)
992 {
993 uint64_t so_outputs = 0;
994 uint8_t reverse_map[64] = {0};
995 unsigned slot = 0;
996
997 while (outputs_written)
998 reverse_map[slot++] = u_bit_scan64(&outputs_written);
999
1000 for (unsigned i = 0; i < so_info->num_outputs; i++) {
1001 struct pipe_stream_output *output = &so_info->output[i];
1002
1003 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
1004 output->register_index = reverse_map[output->register_index];
1005
1006 so_outputs |= 1ull << output->register_index;
1007 }
1008
1009 return so_outputs;
1010 }
1011
1012 struct d3d12_shader_selector *
d3d12_create_shader(struct d3d12_context * ctx,pipe_shader_type stage,const struct pipe_shader_state * shader)1013 d3d12_create_shader(struct d3d12_context *ctx,
1014 pipe_shader_type stage,
1015 const struct pipe_shader_state *shader)
1016 {
1017 struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
1018 sel->stage = stage;
1019
1020 struct nir_shader *nir = NULL;
1021
1022 if (shader->type == PIPE_SHADER_IR_NIR) {
1023 nir = (nir_shader *)shader->ir.nir;
1024 } else {
1025 assert(shader->type == PIPE_SHADER_IR_TGSI);
1026 nir = tgsi_to_nir(shader->tokens, ctx->base.screen, false);
1027 }
1028
1029 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1030
1031 unsigned tex_scan_result = scan_texture_use(nir);
1032 sel->samples_int_textures = (tex_scan_result & TEX_SAMPLE_INTEGER_TEXTURE) != 0;
1033 sel->compare_with_lod_bias_grad = (tex_scan_result & TEX_CMP_WITH_LOD_BIAS_GRAD) != 0;
1034
1035 memcpy(&sel->so_info, &shader->stream_output, sizeof(sel->so_info));
1036 update_so_info(&sel->so_info, nir->info.outputs_written);
1037
1038 assert(nir != NULL);
1039 d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage);
1040 d3d12_shader_selector *next = get_next_shader(ctx, sel->stage);
1041
1042 uint64_t in_mask = nir->info.stage == MESA_SHADER_VERTEX ?
1043 0 : VARYING_BIT_PRIMITIVE_ID;
1044
1045 uint64_t out_mask = nir->info.stage == MESA_SHADER_FRAGMENT ?
1046 (1ull << FRAG_RESULT_STENCIL) :
1047 VARYING_BIT_PRIMITIVE_ID;
1048
1049 d3d12_fix_io_uint_type(nir, in_mask, out_mask);
1050 NIR_PASS_V(nir, dxil_nir_split_clip_cull_distance);
1051
1052 if (nir->info.stage != MESA_SHADER_VERTEX)
1053 nir->info.inputs_read =
1054 dxil_reassign_driver_locations(nir, nir_var_shader_in,
1055 prev ? prev->current->nir->info.outputs_written : 0);
1056 else
1057 nir->info.inputs_read = dxil_sort_by_driver_location(nir, nir_var_shader_in);
1058
1059 if (nir->info.stage != MESA_SHADER_FRAGMENT) {
1060 nir->info.outputs_written =
1061 dxil_reassign_driver_locations(nir, nir_var_shader_out,
1062 next ? next->current->nir->info.inputs_read : 0);
1063 } else {
1064 NIR_PASS_V(nir, nir_lower_fragcoord_wtrans);
1065 dxil_sort_ps_outputs(nir);
1066 }
1067
1068 /* Integer cube maps are not supported in DirectX because sampling is not supported
1069 * on integer textures and TextureLoad is not supported for cube maps, so we have to
1070 * lower integer cube maps to be handled like 2D textures arrays*/
1071 NIR_PASS_V(nir, d3d12_lower_int_cubmap_to_array);
1072
1073 /* Keep this initial shader as the blue print for possible variants */
1074 sel->initial = nir;
1075
1076 /*
1077 * We must compile some shader here, because if the previous or a next shaders exists later
1078 * when the shaders are bound, then the key evaluation in the shader selector will access
1079 * the current variant of these prev and next shader, and we can only assign
1080 * a current variant when it has been successfully compiled.
1081 *
1082 * For shaders that require lowering because certain instructions are not available
1083 * and their emulation is state depended (like sampling an integer texture that must be
1084 * emulated and needs handling of boundary conditions, or shadow compare sampling with LOD),
1085 * we must go through the shader selector here to create a compilable variant.
1086 * For shaders that are not depended on the state this is just compiling the original
1087 * shader.
1088 *
1089 * TODO: get rid of having to compiling the shader here if it can be forseen that it will
1090 * be thrown away (i.e. it depends on states that are likely to change before the shader is
1091 * used for the first time)
1092 */
1093 struct d3d12_selection_context sel_ctx = {0};
1094 sel_ctx.ctx = ctx;
1095 select_shader_variant(&sel_ctx, sel, prev, next);
1096
1097 if (!sel->current) {
1098 ralloc_free(sel);
1099 return NULL;
1100 }
1101
1102 return sel;
1103 }
1104
1105 void
d3d12_select_shader_variants(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)1106 d3d12_select_shader_variants(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
1107 {
1108 static unsigned order[] = {PIPE_SHADER_VERTEX, PIPE_SHADER_GEOMETRY, PIPE_SHADER_FRAGMENT};
1109 struct d3d12_selection_context sel_ctx;
1110
1111 sel_ctx.ctx = ctx;
1112 sel_ctx.dinfo = dinfo;
1113 sel_ctx.needs_point_sprite_lowering = needs_point_sprite_lowering(ctx, dinfo);
1114 sel_ctx.fill_mode_lowered = fill_mode_lowered(ctx, dinfo);
1115 sel_ctx.cull_mode_lowered = cull_mode_lowered(ctx, sel_ctx.fill_mode_lowered);
1116 sel_ctx.provoking_vertex = get_provoking_vertex(&sel_ctx, &sel_ctx.alternate_tri);
1117 sel_ctx.needs_vertex_reordering = needs_vertex_reordering(&sel_ctx);
1118 sel_ctx.missing_dual_src_outputs = missing_dual_src_outputs(ctx);
1119 sel_ctx.frag_result_color_lowering = frag_result_color_lowering(ctx);
1120 sel_ctx.manual_depth_range = manual_depth_range(ctx);
1121
1122 validate_geometry_shader_variant(&sel_ctx);
1123
1124 for (unsigned i = 0; i < ARRAY_SIZE(order); ++i) {
1125 auto sel = ctx->gfx_stages[order[i]];
1126 if (!sel)
1127 continue;
1128
1129 d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage);
1130 d3d12_shader_selector *next = get_next_shader(ctx, sel->stage);
1131
1132 select_shader_variant(&sel_ctx, sel, prev, next);
1133 }
1134 }
1135
1136 void
d3d12_shader_free(struct d3d12_shader_selector * sel)1137 d3d12_shader_free(struct d3d12_shader_selector *sel)
1138 {
1139 auto shader = sel->first;
1140 while (shader) {
1141 free(shader->bytecode);
1142 shader = shader->next_variant;
1143 }
1144 ralloc_free(sel->initial);
1145 ralloc_free(sel);
1146 }
1147
1148 #ifdef _WIN32
1149 // Used to get path to self
1150 extern "C" extern IMAGE_DOS_HEADER __ImageBase;
1151 #endif
1152
load_dxil_dll()1153 void d3d12_validation_tools::load_dxil_dll()
1154 {
1155 if (!dxil_module.load(UTIL_DL_PREFIX "dxil" UTIL_DL_EXT)) {
1156 #ifdef _WIN32
1157 char selfPath[MAX_PATH] = "";
1158 uint32_t pathSize = GetModuleFileNameA((HINSTANCE)&__ImageBase, selfPath, sizeof(selfPath));
1159 if (pathSize == 0 || pathSize == sizeof(selfPath)) {
1160 debug_printf("D3D12: Unable to get path to self");
1161 return;
1162 }
1163
1164 auto lastSlash = strrchr(selfPath, '\\');
1165 if (!lastSlash) {
1166 debug_printf("D3D12: Unable to get path to self");
1167 return;
1168 }
1169
1170 *(lastSlash + 1) = '\0';
1171 if (strcat_s(selfPath, "dxil.dll") != 0) {
1172 debug_printf("D3D12: Unable to get path to dxil.dll next to self");
1173 return;
1174 }
1175
1176 dxil_module.load(selfPath);
1177 #endif
1178 }
1179 }
1180
d3d12_validation_tools()1181 d3d12_validation_tools::d3d12_validation_tools()
1182 {
1183 load_dxil_dll();
1184 DxcCreateInstanceProc dxil_create_func = (DxcCreateInstanceProc)util_dl_get_proc_address(dxil_module, "DxcCreateInstance");
1185
1186 if (dxil_create_func) {
1187 HRESULT hr = dxil_create_func(CLSID_DxcValidator, IID_PPV_ARGS(&validator));
1188 if (FAILED(hr)) {
1189 debug_printf("D3D12: Unable to create validator\n");
1190 }
1191 }
1192 #ifdef _WIN32
1193 else if (!(d3d12_debug & D3D12_DEBUG_EXPERIMENTAL)) {
1194 debug_printf("D3D12: Unable to load DXIL.dll\n");
1195 }
1196 #endif
1197
1198 DxcCreateInstanceProc compiler_create_func = nullptr;
1199 if(dxc_compiler_module.load("dxcompiler.dll"))
1200 compiler_create_func = (DxcCreateInstanceProc)util_dl_get_proc_address(dxc_compiler_module, "DxcCreateInstance");
1201
1202 if (compiler_create_func) {
1203 HRESULT hr = compiler_create_func(CLSID_DxcLibrary, IID_PPV_ARGS(&library));
1204 if (FAILED(hr)) {
1205 debug_printf("D3D12: Unable to create library instance: %x\n", hr);
1206 }
1207
1208 if (d3d12_debug & D3D12_DEBUG_DISASS) {
1209 hr = compiler_create_func(CLSID_DxcCompiler, IID_PPV_ARGS(&compiler));
1210 if (FAILED(hr)) {
1211 debug_printf("D3D12: Unable to create compiler instance\n");
1212 }
1213 }
1214 } else if (d3d12_debug & D3D12_DEBUG_DISASS) {
1215 debug_printf("D3D12: Disassembly requested but compiler couldn't be loaded\n");
1216 }
1217 }
1218
HModule()1219 d3d12_validation_tools::HModule::HModule():
1220 module(0)
1221 {
1222 }
1223
~HModule()1224 d3d12_validation_tools::HModule::~HModule()
1225 {
1226 if (module)
1227 util_dl_close(module);
1228 }
1229
1230 inline
operator util_dl_library*() const1231 d3d12_validation_tools::HModule::operator util_dl_library * () const
1232 {
1233 return module;
1234 }
1235
1236 bool
load(LPCSTR file_name)1237 d3d12_validation_tools::HModule::load(LPCSTR file_name)
1238 {
1239 module = util_dl_open(file_name);
1240 return module != nullptr;
1241 }
1242
1243
1244 class ShaderBlob : public IDxcBlob {
1245 public:
ShaderBlob(blob * data)1246 ShaderBlob(blob* data) : m_data(data) {}
1247
GetBufferPointer(void)1248 LPVOID STDMETHODCALLTYPE GetBufferPointer(void) override { return m_data->data; }
1249
GetBufferSize()1250 SIZE_T STDMETHODCALLTYPE GetBufferSize() override { return m_data->size; }
1251
QueryInterface(REFIID,void **)1252 HRESULT STDMETHODCALLTYPE QueryInterface(REFIID, void**) override { return E_NOINTERFACE; }
1253
AddRef()1254 ULONG STDMETHODCALLTYPE AddRef() override { return 1; }
1255
Release()1256 ULONG STDMETHODCALLTYPE Release() override { return 0; }
1257
1258 blob* m_data;
1259 };
1260
validate_and_sign(struct blob * dxil)1261 bool d3d12_validation_tools::validate_and_sign(struct blob *dxil)
1262 {
1263 ShaderBlob source(dxil);
1264
1265 ComPtr<IDxcOperationResult> result;
1266
1267 validator->Validate(&source, DxcValidatorFlags_InPlaceEdit, &result);
1268 HRESULT validationStatus;
1269 result->GetStatus(&validationStatus);
1270 if (FAILED(validationStatus) && library) {
1271 ComPtr<IDxcBlobEncoding> printBlob, printBlobUtf8;
1272 result->GetErrorBuffer(&printBlob);
1273 library->GetBlobAsUtf8(printBlob.Get(), printBlobUtf8.GetAddressOf());
1274
1275 char *errorString;
1276 if (printBlobUtf8) {
1277 errorString = reinterpret_cast<char*>(printBlobUtf8->GetBufferPointer());
1278
1279 errorString[printBlobUtf8->GetBufferSize() - 1] = 0;
1280 debug_printf("== VALIDATION ERROR =============================================\n%s\n"
1281 "== END ==========================================================\n",
1282 errorString);
1283 }
1284
1285 return false;
1286 }
1287 return true;
1288
1289 }
1290
disassemble(struct blob * dxil)1291 void d3d12_validation_tools::disassemble(struct blob *dxil)
1292 {
1293 if (!compiler) {
1294 fprintf(stderr, "D3D12: No Disassembler\n");
1295 return;
1296 }
1297 ShaderBlob source(dxil);
1298 IDxcBlobEncoding* pDisassembly = nullptr;
1299
1300 if (FAILED(compiler->Disassemble(&source, &pDisassembly))) {
1301 fprintf(stderr, "D3D12: Disassembler failed\n");
1302 return;
1303 }
1304
1305 ComPtr<IDxcBlobEncoding> dissassably(pDisassembly);
1306 ComPtr<IDxcBlobEncoding> blobUtf8;
1307 library->GetBlobAsUtf8(pDisassembly, blobUtf8.GetAddressOf());
1308 if (!blobUtf8) {
1309 fprintf(stderr, "D3D12: Unable to get utf8 encoding\n");
1310 return;
1311 }
1312
1313 char *disassembly = reinterpret_cast<char*>(blobUtf8->GetBufferPointer());
1314 disassembly[blobUtf8->GetBufferSize() - 1] = 0;
1315
1316 fprintf(stderr, "== BEGIN SHADER ============================================\n"
1317 "%s\n"
1318 "== END SHADER ==============================================\n",
1319 disassembly);
1320 }
1321