1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "d3d12_compiler.h"
25 #include "d3d12_context.h"
26 #include "d3d12_debug.h"
27 #include "d3d12_screen.h"
28 #include "d3d12_nir_passes.h"
29 #include "nir_to_dxil.h"
30 #include "dxil_nir.h"
31 #include "dxil_nir_lower_int_cubemaps.h"
32
33 #include "pipe/p_state.h"
34
35 #include "nir.h"
36 #include "nir/nir_draw_helpers.h"
37 #include "nir/tgsi_to_nir.h"
38 #include "compiler/nir/nir_builder.h"
39
40 #include "util/hash_table.h"
41 #include "util/u_memory.h"
42 #include "util/u_prim.h"
43 #include "util/u_simple_shaders.h"
44 #include "util/u_dl.h"
45
46 #include <dxguids/dxguids.h>
47
48 #ifdef _WIN32
49 #include "dxil_validator.h"
50 #endif
51
52 const void *
d3d12_get_compiler_options(struct pipe_screen * screen,enum pipe_shader_ir ir,enum pipe_shader_type shader)53 d3d12_get_compiler_options(struct pipe_screen *screen,
54 enum pipe_shader_ir ir,
55 enum pipe_shader_type shader)
56 {
57 assert(ir == PIPE_SHADER_IR_NIR);
58 return &d3d12_screen(screen)->nir_options;
59 }
60
61 static uint32_t
resource_dimension(enum glsl_sampler_dim dim)62 resource_dimension(enum glsl_sampler_dim dim)
63 {
64 switch (dim) {
65 case GLSL_SAMPLER_DIM_1D:
66 return RESOURCE_DIMENSION_TEXTURE1D;
67 case GLSL_SAMPLER_DIM_2D:
68 return RESOURCE_DIMENSION_TEXTURE2D;
69 case GLSL_SAMPLER_DIM_3D:
70 return RESOURCE_DIMENSION_TEXTURE3D;
71 case GLSL_SAMPLER_DIM_CUBE:
72 return RESOURCE_DIMENSION_TEXTURECUBE;
73 default:
74 return RESOURCE_DIMENSION_UNKNOWN;
75 }
76 }
77
78 static bool
can_remove_dead_sampler(nir_variable * var,void * data)79 can_remove_dead_sampler(nir_variable *var, void *data)
80 {
81 const struct glsl_type *base_type = glsl_without_array(var->type);
82 return glsl_type_is_sampler(base_type) && !glsl_type_is_bare_sampler(base_type);
83 }
84
85 static struct d3d12_shader *
compile_nir(struct d3d12_context * ctx,struct d3d12_shader_selector * sel,struct d3d12_shader_key * key,struct nir_shader * nir)86 compile_nir(struct d3d12_context *ctx, struct d3d12_shader_selector *sel,
87 struct d3d12_shader_key *key, struct nir_shader *nir)
88 {
89 struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
90 struct d3d12_shader *shader = rzalloc(sel, d3d12_shader);
91 shader->key = *key;
92
93 if (shader->key.n_texture_states > 0) {
94 shader->key.tex_wrap_states = (dxil_wrap_sampler_state*)ralloc_size(sel, sizeof(dxil_wrap_sampler_state) * shader->key.n_texture_states);
95 memcpy(shader->key.tex_wrap_states, key->tex_wrap_states, sizeof(dxil_wrap_sampler_state) * shader->key.n_texture_states);
96 }
97 else
98 shader->key.tex_wrap_states = nullptr;
99
100 shader->nir = nir;
101 sel->current = shader;
102
103 NIR_PASS_V(nir, nir_lower_samplers);
104 NIR_PASS_V(nir, dxil_nir_split_typed_samplers);
105
106 NIR_PASS_V(nir, nir_opt_dce);
107 struct nir_remove_dead_variables_options dead_var_opts = {};
108 dead_var_opts.can_remove_var = can_remove_dead_sampler;
109 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform, &dead_var_opts);
110
111 if (key->samples_int_textures)
112 NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex,
113 key->n_texture_states, key->tex_wrap_states, key->swizzle_state,
114 screen->base.caps.max_texture_lod_bias);
115
116 if (key->stage == PIPE_SHADER_VERTEX && key->vs.needs_format_emulation)
117 dxil_nir_lower_vs_vertex_conversion(nir, key->vs.format_conversion);
118
119 if (key->last_vertex_processing_stage) {
120 if (key->invert_depth)
121 NIR_PASS_V(nir, d3d12_nir_invert_depth, key->invert_depth, key->halfz);
122 if (!key->halfz)
123 NIR_PASS_V(nir, nir_lower_clip_halfz);
124 NIR_PASS_V(nir, d3d12_lower_yflip);
125 }
126
127 NIR_PASS_V(nir, d3d12_lower_state_vars, shader);
128
129 const struct dxil_nir_lower_loads_stores_options loads_stores_options = {};
130 NIR_PASS_V(nir, dxil_nir_lower_loads_stores_to_dxil, &loads_stores_options);
131
132 if (key->stage == PIPE_SHADER_FRAGMENT && key->fs.multisample_disabled)
133 NIR_PASS_V(nir, d3d12_disable_multisampling);
134
135 struct nir_to_dxil_options opts = {};
136 opts.interpolate_at_vertex = screen->have_load_at_vertex;
137 opts.lower_int16 = !screen->opts4.Native16BitShaderOpsSupported;
138 opts.last_ubo_is_not_arrayed = shader->num_state_vars > 0;
139 if (key->stage == PIPE_SHADER_FRAGMENT)
140 opts.provoking_vertex = key->fs.provoking_vertex;
141 opts.input_clip_size = key->input_clip_size;
142 opts.environment = DXIL_ENVIRONMENT_GL;
143 opts.shader_model_max = screen->max_shader_model;
144 #ifdef _WIN32
145 opts.validator_version_max = dxil_get_validator_version(ctx->dxil_validator);
146 #endif
147
148 struct blob tmp;
149 if (!nir_to_dxil(nir, &opts, NULL, &tmp)) {
150 debug_printf("D3D12: nir_to_dxil failed\n");
151 return NULL;
152 }
153
154 // Non-ubo variables
155 shader->begin_srv_binding = (UINT_MAX);
156 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
157 auto type_no_array = glsl_without_array(var->type);
158 if (glsl_type_is_texture(type_no_array)) {
159 unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
160 for (unsigned i = 0; i < count; ++i) {
161 shader->srv_bindings[var->data.binding + i].dimension = resource_dimension(glsl_get_sampler_dim(type_no_array));
162 }
163 shader->begin_srv_binding = MIN2(var->data.binding, shader->begin_srv_binding);
164 shader->end_srv_binding = MAX2(var->data.binding + count, shader->end_srv_binding);
165 }
166 }
167
168 nir_foreach_image_variable(var, nir) {
169 auto type_no_array = glsl_without_array(var->type);
170 unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
171 for (unsigned i = 0; i < count; ++i) {
172 shader->uav_bindings[var->data.driver_location + i].dimension = resource_dimension(glsl_get_sampler_dim(type_no_array));
173 }
174 }
175
176 // Ubo variables
177 if(nir->info.num_ubos) {
178 shader->begin_ubo_binding = shader->nir->num_uniforms > 0 || !shader->nir->info.first_ubo_is_default_ubo ? 0 : 1;
179 // Ignore state_vars ubo as it is bound as root constants
180 shader->end_ubo_binding = nir->info.num_ubos - (shader->state_vars_used ? 1 : 0);
181 }
182
183 #ifdef _WIN32
184 if (ctx->dxil_validator) {
185 if (!(d3d12_debug & D3D12_DEBUG_EXPERIMENTAL)) {
186 char *err;
187 if (!dxil_validate_module(ctx->dxil_validator, tmp.data,
188 tmp.size, &err) && err) {
189 debug_printf(
190 "== VALIDATION ERROR =============================================\n"
191 "%s\n"
192 "== END ==========================================================\n",
193 err);
194 ralloc_free(err);
195 }
196 }
197
198 if (d3d12_debug & D3D12_DEBUG_DISASS) {
199 char *str = dxil_disasm_module(ctx->dxil_validator, tmp.data,
200 tmp.size);
201 fprintf(stderr,
202 "== BEGIN SHADER ============================================\n"
203 "%s\n"
204 "== END SHADER ==============================================\n",
205 str);
206 ralloc_free(str);
207 }
208 }
209 #endif
210
211 blob_finish_get_buffer(&tmp, &shader->bytecode, &shader->bytecode_length);
212
213 if (d3d12_debug & D3D12_DEBUG_DXIL) {
214 char buf[256];
215 static int i;
216 snprintf(buf, sizeof(buf), "dump%02d.dxil", i++);
217 FILE *fp = fopen(buf, "wb");
218 fwrite(shader->bytecode, sizeof(char), shader->bytecode_length, fp);
219 fclose(fp);
220 fprintf(stderr, "wrote '%s'...\n", buf);
221 }
222 return shader;
223 }
224
225 struct d3d12_selection_context {
226 struct d3d12_context *ctx;
227 bool needs_point_sprite_lowering;
228 bool needs_vertex_reordering;
229 unsigned provoking_vertex;
230 bool alternate_tri;
231 unsigned fill_mode_lowered;
232 unsigned cull_mode_lowered;
233 bool manual_depth_range;
234 unsigned missing_dual_src_outputs;
235 unsigned frag_result_color_lowering;
236 const unsigned *variable_workgroup_size;
237 };
238
239 unsigned
missing_dual_src_outputs(struct d3d12_context * ctx)240 missing_dual_src_outputs(struct d3d12_context *ctx)
241 {
242 if (!ctx->gfx_pipeline_state.blend || !ctx->gfx_pipeline_state.blend->is_dual_src)
243 return 0;
244
245 struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
246 if (!fs)
247 return 0;
248
249 const nir_shader *s = fs->initial;
250
251 unsigned indices_seen = 0;
252 nir_foreach_function_impl(impl, s) {
253 nir_foreach_block(block, impl) {
254 nir_foreach_instr(instr, block) {
255 if (instr->type != nir_instr_type_intrinsic)
256 continue;
257
258 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
259 if (intr->intrinsic != nir_intrinsic_store_deref)
260 continue;
261
262 nir_variable *var = nir_intrinsic_get_var(intr, 0);
263 if (var->data.mode != nir_var_shader_out)
264 continue;
265
266 unsigned index = var->data.index;
267 if (var->data.location > FRAG_RESULT_DATA0)
268 index = var->data.location - FRAG_RESULT_DATA0;
269 else if (var->data.location != FRAG_RESULT_COLOR &&
270 var->data.location != FRAG_RESULT_DATA0)
271 continue;
272
273 indices_seen |= 1u << index;
274 if ((indices_seen & 3) == 3)
275 return 0;
276 }
277 }
278 }
279
280 return 3 & ~indices_seen;
281 }
282
283 static unsigned
frag_result_color_lowering(struct d3d12_context * ctx)284 frag_result_color_lowering(struct d3d12_context *ctx)
285 {
286 struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
287 assert(fs);
288
289 if (fs->initial->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR))
290 return ctx->fb.nr_cbufs > 1 ? ctx->fb.nr_cbufs : 0;
291
292 return 0;
293 }
294
295 bool
manual_depth_range(struct d3d12_context * ctx)296 manual_depth_range(struct d3d12_context *ctx)
297 {
298 if (!d3d12_need_zero_one_depth_range(ctx))
299 return false;
300
301 /**
302 * If we can't use the D3D12 zero-one depth-range, we might have to apply
303 * depth-range ourselves.
304 *
305 * Because we only need to override the depth-range to zero-one range in
306 * the case where we write frag-depth, we only need to apply manual
307 * depth-range to gl_FragCoord.z.
308 *
309 * No extra care is needed to be taken in the case where gl_FragDepth is
310 * written conditionally, because the GLSL 4.60 spec states:
311 *
312 * If a shader statically assigns a value to gl_FragDepth, and there
313 * is an execution path through the shader that does not set
314 * gl_FragDepth, then the value of the fragment’s depth may be
315 * undefined for executions of the shader that take that path. That
316 * is, if the set of linked fragment shaders statically contain a
317 * write to gl_FragDepth, then it is responsible for always writing
318 * it.
319 */
320
321 struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
322 return fs && fs->initial->info.inputs_read & VARYING_BIT_POS;
323 }
324
325 static bool
needs_edge_flag_fix(enum mesa_prim mode)326 needs_edge_flag_fix(enum mesa_prim mode)
327 {
328 return (mode == MESA_PRIM_QUADS ||
329 mode == MESA_PRIM_QUAD_STRIP ||
330 mode == MESA_PRIM_POLYGON);
331 }
332
333 static unsigned
fill_mode_lowered(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)334 fill_mode_lowered(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
335 {
336 struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
337
338 if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
339 !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant) ||
340 ctx->gfx_pipeline_state.rast == NULL ||
341 (dinfo->mode != MESA_PRIM_TRIANGLES &&
342 dinfo->mode != MESA_PRIM_TRIANGLE_STRIP))
343 return PIPE_POLYGON_MODE_FILL;
344
345 /* D3D12 supports line mode (wireframe) but doesn't support edge flags */
346 if (((ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_LINE &&
347 ctx->gfx_pipeline_state.rast->base.cull_face != PIPE_FACE_FRONT) ||
348 (ctx->gfx_pipeline_state.rast->base.fill_back == PIPE_POLYGON_MODE_LINE &&
349 ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_FRONT)) &&
350 (vs->initial->info.outputs_written & VARYING_BIT_EDGE ||
351 needs_edge_flag_fix(ctx->initial_api_prim)))
352 return PIPE_POLYGON_MODE_LINE;
353
354 if (ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_POINT)
355 return PIPE_POLYGON_MODE_POINT;
356
357 return PIPE_POLYGON_MODE_FILL;
358 }
359
360 static bool
has_stream_out_for_streams(struct d3d12_context * ctx)361 has_stream_out_for_streams(struct d3d12_context *ctx)
362 {
363 unsigned mask = ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->initial->info.gs.active_stream_mask & ~1;
364 for (unsigned i = 0; i < ctx->gfx_pipeline_state.so_info.num_outputs; ++i) {
365 unsigned stream = ctx->gfx_pipeline_state.so_info.output[i].stream;
366 if (((1 << stream) & mask) &&
367 ctx->so_buffer_views[stream].SizeInBytes)
368 return true;
369 }
370 return false;
371 }
372
373 static bool
needs_point_sprite_lowering(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)374 needs_point_sprite_lowering(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
375 {
376 struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
377 struct d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
378
379 if (gs != NULL && !gs->is_variant) {
380 /* There is an user GS; Check if it outputs points with PSIZE */
381 return (gs->initial->info.gs.output_primitive == MESA_PRIM_POINTS &&
382 (gs->initial->info.outputs_written & VARYING_BIT_PSIZ ||
383 ctx->gfx_pipeline_state.rast->base.point_size > 1.0) &&
384 (gs->initial->info.gs.active_stream_mask == 1 ||
385 !has_stream_out_for_streams(ctx)));
386 } else {
387 /* No user GS; check if we are drawing wide points */
388 return ((dinfo->mode == MESA_PRIM_POINTS ||
389 fill_mode_lowered(ctx, dinfo) == PIPE_POLYGON_MODE_POINT) &&
390 (ctx->gfx_pipeline_state.rast->base.point_size > 1.0 ||
391 ctx->gfx_pipeline_state.rast->base.offset_point ||
392 (ctx->gfx_pipeline_state.rast->base.point_size_per_vertex &&
393 vs->initial->info.outputs_written & VARYING_BIT_PSIZ)) &&
394 (vs->initial->info.outputs_written & VARYING_BIT_POS));
395 }
396 }
397
398 static unsigned
cull_mode_lowered(struct d3d12_context * ctx,unsigned fill_mode)399 cull_mode_lowered(struct d3d12_context *ctx, unsigned fill_mode)
400 {
401 if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
402 !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant) ||
403 ctx->gfx_pipeline_state.rast == NULL ||
404 ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_NONE)
405 return PIPE_FACE_NONE;
406
407 return ctx->gfx_pipeline_state.rast->base.cull_face;
408 }
409
410 static unsigned
get_provoking_vertex(struct d3d12_selection_context * sel_ctx,bool * alternate,const struct pipe_draw_info * dinfo)411 get_provoking_vertex(struct d3d12_selection_context *sel_ctx, bool *alternate, const struct pipe_draw_info *dinfo)
412 {
413 if (dinfo->mode == GL_PATCHES) {
414 *alternate = false;
415 return 0;
416 }
417
418 struct d3d12_shader_selector *vs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_VERTEX];
419 struct d3d12_shader_selector *gs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
420 struct d3d12_shader_selector *last_vertex_stage = gs && !gs->is_variant ? gs : vs;
421
422 enum mesa_prim mode;
423 switch (last_vertex_stage->stage) {
424 case PIPE_SHADER_GEOMETRY:
425 mode = (enum mesa_prim)last_vertex_stage->initial->info.gs.output_primitive;
426 break;
427 case PIPE_SHADER_VERTEX:
428 mode = (enum mesa_prim)dinfo->mode;
429 break;
430 default:
431 unreachable("Tesselation shaders are not supported");
432 }
433
434 bool flatshade_first = sel_ctx->ctx->gfx_pipeline_state.rast &&
435 sel_ctx->ctx->gfx_pipeline_state.rast->base.flatshade_first;
436 *alternate = (mode == GL_TRIANGLE_STRIP || mode == GL_TRIANGLE_STRIP_ADJACENCY) &&
437 (!gs || gs->is_variant ||
438 gs->initial->info.gs.vertices_out > u_prim_vertex_count(mode)->min);
439 return flatshade_first ? 0 : u_prim_vertex_count(mode)->min - 1;
440 }
441
442 bool
has_flat_varyings(struct d3d12_context * ctx)443 has_flat_varyings(struct d3d12_context *ctx)
444 {
445 struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
446
447 if (!fs)
448 return false;
449
450 nir_foreach_variable_with_modes(input, fs->initial,
451 nir_var_shader_in) {
452 if (input->data.interpolation == INTERP_MODE_FLAT &&
453 /* Disregard sysvals */
454 (input->data.location >= VARYING_SLOT_VAR0 ||
455 input->data.location <= VARYING_SLOT_TEX7))
456 return true;
457 }
458
459 return false;
460 }
461
462 static bool
needs_vertex_reordering(struct d3d12_selection_context * sel_ctx,const struct pipe_draw_info * dinfo)463 needs_vertex_reordering(struct d3d12_selection_context *sel_ctx, const struct pipe_draw_info *dinfo)
464 {
465 struct d3d12_context *ctx = sel_ctx->ctx;
466 bool flat = ctx->has_flat_varyings;
467 bool xfb = ctx->gfx_pipeline_state.num_so_targets > 0;
468
469 if (fill_mode_lowered(ctx, dinfo) != PIPE_POLYGON_MODE_FILL)
470 return false;
471
472 /* TODO add support for line primitives */
473 if (u_reduced_prim((mesa_prim)dinfo->mode) == MESA_PRIM_LINES)
474 return false;
475
476 /* When flat shading a triangle and provoking vertex is not the first one, we use load_at_vertex.
477 If not available for this adapter, or if it's a triangle strip, we need to reorder the vertices */
478 if (flat && sel_ctx->provoking_vertex >= 2 && (!d3d12_screen(ctx->base.screen)->have_load_at_vertex ||
479 sel_ctx->alternate_tri))
480 return true;
481
482 /* When transform feedback is enabled and the output is alternating (triangle strip or triangle
483 strip with adjacency), we need to reorder vertices to get the order expected by OpenGL. This
484 only works when there is no flat shading involved. In that scenario, we don't care about
485 the provoking vertex. */
486 if (xfb && !flat && sel_ctx->alternate_tri) {
487 sel_ctx->provoking_vertex = 0;
488 return true;
489 }
490
491 return false;
492 }
493
494 static d3d12_varying_info*
fill_varyings(struct d3d12_context * ctx,const nir_shader * s,nir_variable_mode modes,uint64_t mask,bool patch)495 fill_varyings(struct d3d12_context *ctx, const nir_shader *s,
496 nir_variable_mode modes, uint64_t mask, bool patch)
497 {
498 struct d3d12_varying_info info;
499
500 info.max = 0;
501 info.mask = 0;
502 info.hash = 0;
503
504 nir_foreach_variable_with_modes(var, s, modes) {
505 unsigned slot = var->data.location;
506 bool is_generic_patch = slot >= VARYING_SLOT_PATCH0;
507 if (patch ^ is_generic_patch)
508 continue;
509 if (is_generic_patch)
510 slot -= VARYING_SLOT_PATCH0;
511 uint64_t slot_bit = BITFIELD64_BIT(slot);
512
513 if (!(mask & slot_bit))
514 continue;
515
516 if ((info.mask & slot_bit) == 0) {
517 memset(info.slots + slot, 0, sizeof(info.slots[0]));
518 info.max = MAX2(info.max, slot);
519 }
520
521 const struct glsl_type *type = var->type;
522 if (nir_is_arrayed_io(var, s->info.stage))
523 type = glsl_get_array_element(type);
524 info.slots[slot].types[var->data.location_frac] = type;
525
526 info.slots[slot].patch = var->data.patch;
527 auto& var_slot = info.slots[slot].vars[var->data.location_frac];
528 var_slot.driver_location = var->data.driver_location;
529 var_slot.interpolation = var->data.interpolation;
530 var_slot.compact = var->data.compact;
531 var_slot.always_active_io = var->data.always_active_io;
532 info.mask |= slot_bit;
533 info.slots[slot].location_frac_mask |= (1 << var->data.location_frac);
534 }
535
536 for (uint32_t i = 0; i <= info.max; ++i) {
537 if (((1llu << i) & info.mask) == 0)
538 memset(info.slots + i, 0, sizeof(info.slots[0]));
539 else
540 info.hash = _mesa_hash_data_with_seed(info.slots + i, sizeof(info.slots[0]), info.hash);
541 }
542 info.hash = _mesa_hash_data_with_seed(&info.mask, sizeof(info.mask), info.hash);
543
544 struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
545
546 mtx_lock(&screen->varying_info_mutex);
547 set_entry *pentry = _mesa_set_search_pre_hashed(screen->varying_info_set, info.hash, &info);
548 if (pentry != nullptr) {
549 mtx_unlock(&screen->varying_info_mutex);
550 return (d3d12_varying_info*)pentry->key;
551 }
552 else {
553 d3d12_varying_info *key = MALLOC_STRUCT(d3d12_varying_info);
554 *key = info;
555
556 _mesa_set_add_pre_hashed(screen->varying_info_set, info.hash, key);
557
558 mtx_unlock(&screen->varying_info_mutex);
559 return key;
560 }
561 }
562
563 static void
fill_flat_varyings(struct d3d12_gs_variant_key * key,d3d12_shader_selector * fs)564 fill_flat_varyings(struct d3d12_gs_variant_key *key, d3d12_shader_selector *fs)
565 {
566 if (!fs)
567 return;
568
569 nir_foreach_variable_with_modes(input, fs->initial,
570 nir_var_shader_in) {
571 if (input->data.interpolation == INTERP_MODE_FLAT)
572 key->flat_varyings |= BITFIELD64_BIT(input->data.location);
573 }
574 }
575
576 bool
d3d12_compare_varying_info(const d3d12_varying_info * expect,const d3d12_varying_info * have)577 d3d12_compare_varying_info(const d3d12_varying_info *expect, const d3d12_varying_info *have)
578 {
579 if (expect == have)
580 return true;
581
582 if (expect == nullptr || have == nullptr)
583 return false;
584
585 if (expect->mask != have->mask
586 || expect->max != have->max)
587 return false;
588
589 if (!expect->mask)
590 return true;
591
592 /* 6 is a rough (wild) guess for a bulk memcmp cross-over point. When there
593 * are a small number of slots present, individual is much faster. */
594 if (util_bitcount64(expect->mask) < 6) {
595 uint64_t mask = expect->mask;
596 while (mask) {
597 int slot = u_bit_scan64(&mask);
598 if (memcmp(&expect->slots[slot], &have->slots[slot], sizeof(have->slots[slot])))
599 return false;
600 }
601
602 return true;
603 }
604
605 return !memcmp(expect->slots, have->slots, sizeof(expect->slots[0]) * expect->max);
606 }
607
608
varying_info_hash(const void * info)609 uint32_t varying_info_hash(const void *info) {
610 return ((d3d12_varying_info*)info)->hash;
611 }
varying_info_compare(const void * a,const void * b)612 bool varying_info_compare(const void *a, const void *b) {
613 return d3d12_compare_varying_info((d3d12_varying_info*)a, (d3d12_varying_info*)b);
614 }
varying_info_entry_destroy(set_entry * entry)615 void varying_info_entry_destroy(set_entry *entry) {
616 if (entry->key)
617 free((void*)entry->key);
618 }
619
620 void
d3d12_varying_cache_init(struct d3d12_screen * screen)621 d3d12_varying_cache_init(struct d3d12_screen *screen) {
622 screen->varying_info_set = _mesa_set_create(nullptr, varying_info_hash, varying_info_compare);
623 }
624
625 void
d3d12_varying_cache_destroy(struct d3d12_screen * screen)626 d3d12_varying_cache_destroy(struct d3d12_screen *screen) {
627 _mesa_set_destroy(screen->varying_info_set, varying_info_entry_destroy);
628 }
629
630
631 static void
validate_geometry_shader_variant(struct d3d12_selection_context * sel_ctx)632 validate_geometry_shader_variant(struct d3d12_selection_context *sel_ctx)
633 {
634 struct d3d12_context *ctx = sel_ctx->ctx;
635 d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
636
637 /* Nothing to do if there is a user geometry shader bound */
638 if (gs != NULL && !gs->is_variant)
639 return;
640
641 d3d12_shader_selector* vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
642 d3d12_shader_selector* fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
643
644 struct d3d12_gs_variant_key key;
645 key.all = 0;
646 key.flat_varyings = 0;
647
648 /* Fill the geometry shader variant key */
649 if (sel_ctx->fill_mode_lowered != PIPE_POLYGON_MODE_FILL) {
650 key.fill_mode = sel_ctx->fill_mode_lowered;
651 key.cull_mode = sel_ctx->cull_mode_lowered;
652 key.has_front_face = (fs->initial->info.inputs_read & VARYING_BIT_FACE) != 0;
653 if (key.cull_mode != PIPE_FACE_NONE || key.has_front_face)
654 key.front_ccw = ctx->gfx_pipeline_state.rast->base.front_ccw ^ (ctx->flip_y < 0);
655 key.edge_flag_fix = needs_edge_flag_fix(ctx->initial_api_prim);
656 fill_flat_varyings(&key, fs);
657 if (key.flat_varyings != 0)
658 key.flatshade_first = ctx->gfx_pipeline_state.rast->base.flatshade_first;
659 } else if (sel_ctx->needs_point_sprite_lowering) {
660 key.passthrough = true;
661 } else if (sel_ctx->needs_vertex_reordering) {
662 /* TODO support cases where flat shading (pv != 0) and xfb are enabled, or lines */
663 key.provoking_vertex = sel_ctx->provoking_vertex;
664 key.alternate_tri = sel_ctx->alternate_tri;
665 }
666
667 if (vs->initial_output_vars == nullptr) {
668 vs->initial_output_vars = fill_varyings(sel_ctx->ctx, vs->initial, nir_var_shader_out,
669 vs->initial->info.outputs_written, false);
670 }
671 key.varyings = vs->initial_output_vars;
672 gs = d3d12_get_gs_variant(ctx, &key);
673 ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = gs;
674 }
675
676 static void
validate_tess_ctrl_shader_variant(struct d3d12_selection_context * sel_ctx)677 validate_tess_ctrl_shader_variant(struct d3d12_selection_context *sel_ctx)
678 {
679 struct d3d12_context *ctx = sel_ctx->ctx;
680 d3d12_shader_selector *tcs = ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
681
682 /* Nothing to do if there is a user tess ctrl shader bound */
683 if (tcs != NULL && !tcs->is_variant)
684 return;
685
686 d3d12_shader_selector *tes = ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
687 struct d3d12_tcs_variant_key key = {0};
688
689 bool variant_needed = tes != nullptr;
690
691 /* Fill the variant key */
692 if (variant_needed) {
693 if (tes->initial_input_vars == nullptr) {
694 tes->initial_input_vars = fill_varyings(sel_ctx->ctx, tes->initial, nir_var_shader_in,
695 tes->initial->info.inputs_read & ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER),
696 false);
697 }
698 key.varyings = tes->initial_input_vars;
699 key.vertices_out = ctx->patch_vertices;
700 }
701
702 /* Find/create the proper variant and bind it */
703 tcs = variant_needed ? d3d12_get_tcs_variant(ctx, &key) : NULL;
704 ctx->gfx_stages[PIPE_SHADER_TESS_CTRL] = tcs;
705 }
706
707 static bool
d3d12_compare_shader_keys(struct d3d12_selection_context * sel_ctx,const d3d12_shader_key * expect,const d3d12_shader_key * have)708 d3d12_compare_shader_keys(struct d3d12_selection_context* sel_ctx, const d3d12_shader_key *expect, const d3d12_shader_key *have)
709 {
710 assert(expect->stage == have->stage);
711 assert(expect);
712 assert(have);
713
714 if (expect->hash != have->hash)
715 return false;
716
717 switch (expect->stage) {
718 case PIPE_SHADER_VERTEX:
719 if (expect->vs.needs_format_emulation != have->vs.needs_format_emulation)
720 return false;
721
722 if (expect->vs.needs_format_emulation) {
723 if (memcmp(expect->vs.format_conversion, have->vs.format_conversion,
724 sel_ctx->ctx->gfx_pipeline_state.ves->num_elements * sizeof(enum pipe_format)))
725 return false;
726 }
727 break;
728 case PIPE_SHADER_GEOMETRY:
729 if (expect->gs.all != have->gs.all)
730 return false;
731 break;
732 case PIPE_SHADER_TESS_CTRL:
733 if (expect->hs.all != have->hs.all)
734 return false;
735 break;
736 case PIPE_SHADER_TESS_EVAL:
737 if (expect->ds.tcs_vertices_out != have->ds.tcs_vertices_out ||
738 expect->ds.prev_patch_outputs != have->ds.prev_patch_outputs)
739 return false;
740 break;
741 case PIPE_SHADER_FRAGMENT:
742 if (expect->fs.all != have->fs.all)
743 return false;
744 break;
745 case PIPE_SHADER_COMPUTE:
746 if (memcmp(expect->cs.workgroup_size, have->cs.workgroup_size,
747 sizeof(have->cs.workgroup_size)))
748 return false;
749 break;
750 default:
751 unreachable("invalid stage");
752 }
753
754 if (expect->n_texture_states != have->n_texture_states)
755 return false;
756
757 if (expect->n_images != have->n_images)
758 return false;
759
760 if (expect->n_texture_states > 0 &&
761 memcmp(expect->tex_wrap_states, have->tex_wrap_states,
762 expect->n_texture_states * sizeof(dxil_wrap_sampler_state)))
763 return false;
764
765 if (memcmp(expect->swizzle_state, have->swizzle_state,
766 expect->n_texture_states * sizeof(dxil_texture_swizzle_state)))
767 return false;
768
769 if (memcmp(expect->sampler_compare_funcs, have->sampler_compare_funcs,
770 expect->n_texture_states * sizeof(enum compare_func)))
771 return false;
772
773 if (memcmp(expect->image_format_conversion, have->image_format_conversion,
774 expect->n_images * sizeof(struct d3d12_image_format_conversion_info)))
775 return false;
776
777 if (!(expect->next_varying_inputs == have->next_varying_inputs &&
778 expect->prev_varying_outputs == have->prev_varying_outputs &&
779 expect->common_all == have->common_all &&
780 expect->tex_saturate_s == have->tex_saturate_s &&
781 expect->tex_saturate_r == have->tex_saturate_r &&
782 expect->tex_saturate_t == have->tex_saturate_t))
783 return false;
784
785 if (expect->next_has_frac_inputs &&
786 expect->next_varying_frac_inputs != have->next_varying_frac_inputs &&
787 memcmp(expect->next_varying_frac_inputs, have->next_varying_frac_inputs, sizeof(d3d12_shader_selector::varying_frac_inputs)))
788 return false;
789 if (expect->prev_has_frac_outputs &&
790 expect->prev_varying_frac_outputs != have->prev_varying_frac_outputs &&
791 memcmp(expect->prev_varying_frac_outputs, have->prev_varying_frac_outputs, sizeof(d3d12_shader_selector::varying_frac_outputs)))
792 return false;
793 return true;
794 }
795
796 static uint32_t
d3d12_shader_key_hash(const d3d12_shader_key * key)797 d3d12_shader_key_hash(const d3d12_shader_key *key)
798 {
799 uint32_t hash;
800
801 hash = (uint32_t)key->stage;
802
803 hash += static_cast<uint32_t>(key->next_varying_inputs);
804 hash += static_cast<uint32_t>(key->prev_varying_outputs);
805 hash += key->common_all;
806 if (key->next_has_frac_inputs)
807 hash = _mesa_hash_data_with_seed(key->next_varying_frac_inputs, sizeof(d3d12_shader_selector::varying_frac_inputs), hash);
808 if (key->prev_has_frac_outputs)
809 hash = _mesa_hash_data_with_seed(key->prev_varying_frac_outputs, sizeof(d3d12_shader_selector::varying_frac_outputs), hash);
810 switch (key->stage) {
811 case PIPE_SHADER_VERTEX:
812 /* (Probably) not worth the bit extraction for needs_format_emulation and
813 * the rest of the the format_conversion data is large. Don't bother
814 * hashing for now until this is shown to be worthwhile. */
815 break;
816 case PIPE_SHADER_GEOMETRY:
817 hash += static_cast<uint32_t>(key->gs.all);
818 break;
819 case PIPE_SHADER_FRAGMENT:
820 hash += key->fs.all;
821 break;
822 case PIPE_SHADER_COMPUTE:
823 hash = _mesa_hash_data_with_seed(&key->cs, sizeof(key->cs), hash);
824 break;
825 case PIPE_SHADER_TESS_CTRL:
826 hash += static_cast<uint32_t>(key->hs.all);
827 break;
828 case PIPE_SHADER_TESS_EVAL:
829 hash += key->ds.tcs_vertices_out;
830 hash += key->ds.prev_patch_outputs;
831 break;
832 default:
833 /* No type specific information to hash for other stages. */
834 break;
835 }
836
837 hash += key->n_texture_states;
838 hash += key->n_images;
839 return hash;
840 }
841
842 static void
d3d12_fill_shader_key(struct d3d12_selection_context * sel_ctx,d3d12_shader_key * key,d3d12_shader_selector * sel,d3d12_shader_selector * prev,d3d12_shader_selector * next)843 d3d12_fill_shader_key(struct d3d12_selection_context *sel_ctx,
844 d3d12_shader_key *key, d3d12_shader_selector *sel,
845 d3d12_shader_selector *prev, d3d12_shader_selector *next)
846 {
847 pipe_shader_type stage = sel->stage;
848
849 memset(key, 0, offsetof(d3d12_shader_key, vs));
850 key->stage = stage;
851
852 switch (stage)
853 {
854 case PIPE_SHADER_VERTEX:
855 key->vs.needs_format_emulation = 0;
856 break;
857 case PIPE_SHADER_FRAGMENT:
858 key->fs.all = 0;
859 break;
860 case PIPE_SHADER_GEOMETRY:
861 key->gs.all = 0;
862 break;
863 case PIPE_SHADER_TESS_CTRL:
864 key->hs.all = 0;
865 break;
866 case PIPE_SHADER_TESS_EVAL:
867 key->ds.tcs_vertices_out = 0;
868 key->ds.prev_patch_outputs = 0;
869 break;
870 case PIPE_SHADER_COMPUTE:
871 memset(key->cs.workgroup_size, 0, sizeof(key->cs.workgroup_size));
872 break;
873 default: unreachable("Invalid stage type");
874 }
875
876 key->n_texture_states = 0;
877 key->tex_wrap_states = sel_ctx->ctx->tex_wrap_states_shader_key;
878 key->n_images = 0;
879
880 if (prev) {
881 key->prev_varying_outputs = prev->initial->info.outputs_written;
882 key->prev_has_frac_outputs = prev->has_frac_outputs;
883 key->prev_varying_frac_outputs = prev->varying_frac_outputs;
884
885 if (stage == PIPE_SHADER_TESS_EVAL)
886 key->ds.prev_patch_outputs = prev->initial->info.patch_outputs_written;
887
888 /* Set the provoking vertex based on the previous shader output. Only set the
889 * key value if the driver actually supports changing the provoking vertex though */
890 if (stage == PIPE_SHADER_FRAGMENT && sel_ctx->ctx->gfx_pipeline_state.rast &&
891 !sel_ctx->needs_vertex_reordering &&
892 d3d12_screen(sel_ctx->ctx->base.screen)->have_load_at_vertex)
893 key->fs.provoking_vertex = sel_ctx->provoking_vertex;
894
895 /* Get the input clip distance size. The info's clip_distance_array_size corresponds
896 * to the output, and in cases of TES or GS you could have differently-sized inputs
897 * and outputs. For FS, there is no output, so it's repurposed to mean input.
898 */
899 if (stage != PIPE_SHADER_FRAGMENT)
900 key->input_clip_size = prev->initial->info.clip_distance_array_size;
901 }
902
903 if (next) {
904 if (stage == PIPE_SHADER_TESS_CTRL)
905 key->hs.next_patch_inputs = next->initial->info.patch_outputs_read;
906 key->next_varying_inputs = next->initial->info.inputs_read;
907 if (BITSET_TEST(next->initial->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID))
908 key->next_varying_inputs |= VARYING_SLOT_PRIMITIVE_ID;
909 key->next_has_frac_inputs = next->has_frac_inputs;
910 key->next_varying_frac_inputs = next->varying_frac_inputs;
911 }
912
913 if (stage == PIPE_SHADER_GEOMETRY ||
914 ((stage == PIPE_SHADER_VERTEX || stage == PIPE_SHADER_TESS_EVAL) &&
915 (!next || next->stage == PIPE_SHADER_FRAGMENT))) {
916 key->last_vertex_processing_stage = 1;
917 key->invert_depth = sel_ctx->ctx->reverse_depth_range;
918 key->halfz = sel_ctx->ctx->gfx_pipeline_state.rast ?
919 sel_ctx->ctx->gfx_pipeline_state.rast->base.clip_halfz : false;
920 if (sel_ctx->ctx->pstipple.enabled &&
921 sel_ctx->ctx->gfx_pipeline_state.rast->base.poly_stipple_enable)
922 key->next_varying_inputs |= VARYING_BIT_POS;
923 }
924
925 if (stage == PIPE_SHADER_GEOMETRY && sel_ctx->ctx->gfx_pipeline_state.rast) {
926 struct pipe_rasterizer_state *rast = &sel_ctx->ctx->gfx_pipeline_state.rast->base;
927 if (sel_ctx->needs_point_sprite_lowering) {
928 key->gs.writes_psize = 1;
929 key->gs.point_size_per_vertex = rast->point_size_per_vertex;
930 key->gs.sprite_coord_enable = rast->sprite_coord_enable;
931 key->gs.sprite_origin_upper_left = (rast->sprite_coord_mode != PIPE_SPRITE_COORD_LOWER_LEFT);
932 if (sel_ctx->ctx->flip_y < 0)
933 key->gs.sprite_origin_upper_left = !key->gs.sprite_origin_upper_left;
934 key->gs.aa_point = rast->point_smooth;
935 key->gs.stream_output_factor = 6;
936 } else if (sel_ctx->fill_mode_lowered == PIPE_POLYGON_MODE_LINE) {
937 key->gs.stream_output_factor = 2;
938 } else if (sel_ctx->needs_vertex_reordering && !sel->is_variant) {
939 key->gs.triangle_strip = 1;
940 }
941
942 if (sel->is_variant && next) {
943 if (next->initial->info.inputs_read & VARYING_BIT_FACE)
944 key->next_varying_inputs = (key->next_varying_inputs | VARYING_BIT_VAR(12)) & ~VARYING_BIT_FACE;
945 if (next->initial->info.inputs_read & VARYING_BIT_PRIMITIVE_ID)
946 key->gs.primitive_id = 1;
947 }
948 } else if (stage == PIPE_SHADER_FRAGMENT) {
949 key->fs.missing_dual_src_outputs = sel_ctx->missing_dual_src_outputs;
950 key->fs.frag_result_color_lowering = sel_ctx->frag_result_color_lowering;
951 key->fs.manual_depth_range = sel_ctx->manual_depth_range;
952 key->fs.polygon_stipple = sel_ctx->ctx->pstipple.enabled &&
953 sel_ctx->ctx->gfx_pipeline_state.rast->base.poly_stipple_enable;
954 key->fs.multisample_disabled = sel_ctx->ctx->gfx_pipeline_state.rast &&
955 !sel_ctx->ctx->gfx_pipeline_state.rast->desc.MultisampleEnable;
956 if (sel_ctx->ctx->gfx_pipeline_state.blend &&
957 sel_ctx->ctx->gfx_pipeline_state.blend->desc.RenderTarget[0].LogicOpEnable &&
958 !sel_ctx->ctx->gfx_pipeline_state.has_float_rtv) {
959 key->fs.cast_to_uint = util_format_is_unorm(sel_ctx->ctx->fb.cbufs[0]->format);
960 key->fs.cast_to_int = !key->fs.cast_to_uint;
961 }
962 if (sel_ctx->needs_point_sprite_lowering) {
963 if (sel->initial->info.inputs_read & VARYING_BIT_FACE)
964 key->prev_varying_outputs = (key->prev_varying_outputs | VARYING_BIT_VAR(12)) & ~VARYING_BIT_FACE;
965 key->prev_varying_outputs |= sel->initial->info.inputs_read & (VARYING_BIT_PNTC | BITFIELD64_RANGE(VARYING_SLOT_TEX0, 8));
966 }
967 } else if (stage == PIPE_SHADER_TESS_CTRL) {
968 if (next && next->initial->info.stage == MESA_SHADER_TESS_EVAL) {
969 key->hs.primitive_mode = next->initial->info.tess._primitive_mode;
970 key->hs.ccw = next->initial->info.tess.ccw;
971 key->hs.point_mode = next->initial->info.tess.point_mode;
972 key->hs.spacing = next->initial->info.tess.spacing;
973 } else {
974 key->hs.primitive_mode = TESS_PRIMITIVE_QUADS;
975 key->hs.ccw = true;
976 key->hs.point_mode = false;
977 key->hs.spacing = TESS_SPACING_EQUAL;
978 }
979 key->hs.patch_vertices_in = MAX2(sel_ctx->ctx->patch_vertices, 1);
980 } else if (stage == PIPE_SHADER_TESS_EVAL) {
981 if (prev && prev->initial->info.stage == MESA_SHADER_TESS_CTRL)
982 key->ds.tcs_vertices_out = prev->initial->info.tess.tcs_vertices_out;
983 else
984 key->ds.tcs_vertices_out = 32;
985 }
986
987 if (sel->samples_int_textures) {
988 key->samples_int_textures = sel->samples_int_textures;
989 key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
990 /* Copy only states with integer textures */
991 for(int i = 0; i < key->n_texture_states; ++i) {
992 auto& wrap_state = sel_ctx->ctx->tex_wrap_states[stage][i];
993 if (wrap_state.is_int_sampler) {
994 memcpy(&key->tex_wrap_states[i], &wrap_state, sizeof(wrap_state));
995 key->swizzle_state[i] = sel_ctx->ctx->tex_swizzle_state[stage][i];
996 } else {
997 memset(&key->tex_wrap_states[i], 0, sizeof(key->tex_wrap_states[i]));
998 key->swizzle_state[i] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W };
999 }
1000 }
1001 }
1002
1003 for (unsigned i = 0, e = sel_ctx->ctx->num_samplers[stage]; i < e; ++i) {
1004 if (!sel_ctx->ctx->samplers[stage][i] ||
1005 sel_ctx->ctx->samplers[stage][i]->filter == PIPE_TEX_FILTER_NEAREST)
1006 continue;
1007
1008 if (sel_ctx->ctx->samplers[stage][i]->wrap_r == PIPE_TEX_WRAP_CLAMP)
1009 key->tex_saturate_r |= 1 << i;
1010 if (sel_ctx->ctx->samplers[stage][i]->wrap_s == PIPE_TEX_WRAP_CLAMP)
1011 key->tex_saturate_s |= 1 << i;
1012 if (sel_ctx->ctx->samplers[stage][i]->wrap_t == PIPE_TEX_WRAP_CLAMP)
1013 key->tex_saturate_t |= 1 << i;
1014 }
1015
1016 if (sel->compare_with_lod_bias_grad) {
1017 key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
1018 memcpy(key->sampler_compare_funcs, sel_ctx->ctx->tex_compare_func[stage],
1019 key->n_texture_states * sizeof(enum compare_func));
1020 memcpy(key->swizzle_state, sel_ctx->ctx->tex_swizzle_state[stage],
1021 key->n_texture_states * sizeof(dxil_texture_swizzle_state));
1022 if (!sel->samples_int_textures)
1023 memset(key->tex_wrap_states, 0, sizeof(key->tex_wrap_states[0]) * key->n_texture_states);
1024 }
1025
1026 if (stage == PIPE_SHADER_VERTEX && sel_ctx->ctx->gfx_pipeline_state.ves) {
1027 key->vs.needs_format_emulation = sel_ctx->ctx->gfx_pipeline_state.ves->needs_format_emulation;
1028 if (key->vs.needs_format_emulation) {
1029 unsigned num_elements = sel_ctx->ctx->gfx_pipeline_state.ves->num_elements;
1030
1031 memset(key->vs.format_conversion + num_elements,
1032 0,
1033 sizeof(key->vs.format_conversion) - (num_elements * sizeof(enum pipe_format)));
1034
1035 memcpy(key->vs.format_conversion, sel_ctx->ctx->gfx_pipeline_state.ves->format_conversion,
1036 num_elements * sizeof(enum pipe_format));
1037 }
1038 }
1039
1040 if (stage == PIPE_SHADER_FRAGMENT &&
1041 sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY] &&
1042 sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant &&
1043 sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->gs_key.has_front_face) {
1044 key->fs.remap_front_facing = 1;
1045 }
1046
1047 if (stage == PIPE_SHADER_COMPUTE && sel_ctx->variable_workgroup_size) {
1048 memcpy(key->cs.workgroup_size, sel_ctx->variable_workgroup_size, sizeof(key->cs.workgroup_size));
1049 }
1050
1051 key->n_images = sel_ctx->ctx->num_image_views[stage];
1052 for (unsigned i = 0; i < key->n_images; ++i) {
1053 key->image_format_conversion[i].emulated_format = sel_ctx->ctx->image_view_emulation_formats[stage][i];
1054 if (key->image_format_conversion[i].emulated_format != PIPE_FORMAT_NONE)
1055 key->image_format_conversion[i].view_format = sel_ctx->ctx->image_views[stage][i].format;
1056 }
1057
1058 key->hash = d3d12_shader_key_hash(key);
1059 }
1060
1061 static void
select_shader_variant(struct d3d12_selection_context * sel_ctx,d3d12_shader_selector * sel,d3d12_shader_selector * prev,d3d12_shader_selector * next)1062 select_shader_variant(struct d3d12_selection_context *sel_ctx, d3d12_shader_selector *sel,
1063 d3d12_shader_selector *prev, d3d12_shader_selector *next)
1064 {
1065 struct d3d12_context *ctx = sel_ctx->ctx;
1066 d3d12_shader_key key;
1067 nir_shader *new_nir_variant;
1068 unsigned pstipple_binding = UINT32_MAX;
1069
1070 d3d12_fill_shader_key(sel_ctx, &key, sel, prev, next);
1071
1072 /* Check for an existing variant */
1073 for (d3d12_shader *variant = sel->first; variant;
1074 variant = variant->next_variant) {
1075
1076 if (d3d12_compare_shader_keys(sel_ctx, &key, &variant->key)) {
1077 sel->current = variant;
1078 return;
1079 }
1080 }
1081
1082 /* Clone the NIR shader */
1083 new_nir_variant = nir_shader_clone(sel, sel->initial);
1084
1085 /* Apply any needed lowering passes */
1086 if (key.stage == PIPE_SHADER_GEOMETRY) {
1087 if (key.gs.writes_psize) {
1088 NIR_PASS_V(new_nir_variant, d3d12_lower_point_sprite,
1089 !key.gs.sprite_origin_upper_left,
1090 key.gs.point_size_per_vertex,
1091 key.gs.sprite_coord_enable,
1092 key.next_varying_inputs);
1093 }
1094
1095 if (key.gs.primitive_id)
1096 NIR_PASS_V(new_nir_variant, d3d12_lower_primitive_id);
1097
1098 if (key.gs.triangle_strip)
1099 NIR_PASS_V(new_nir_variant, d3d12_lower_triangle_strip);
1100 }
1101 else if (key.stage == PIPE_SHADER_FRAGMENT)
1102 {
1103 if (key.fs.polygon_stipple) {
1104 NIR_PASS_V(new_nir_variant, nir_lower_pstipple_fs,
1105 &pstipple_binding, 0, false, nir_type_bool1);
1106 }
1107
1108 if (key.fs.remap_front_facing)
1109 dxil_nir_forward_front_face(new_nir_variant);
1110
1111 if (key.fs.missing_dual_src_outputs) {
1112 NIR_PASS_V(new_nir_variant, d3d12_add_missing_dual_src_target,
1113 key.fs.missing_dual_src_outputs);
1114 } else if (key.fs.frag_result_color_lowering) {
1115 NIR_PASS_V(new_nir_variant, nir_lower_fragcolor,
1116 key.fs.frag_result_color_lowering);
1117 }
1118
1119 if (key.fs.manual_depth_range)
1120 NIR_PASS_V(new_nir_variant, d3d12_lower_depth_range);
1121 }
1122
1123
1124 if (sel->compare_with_lod_bias_grad) {
1125 STATIC_ASSERT(sizeof(dxil_texture_swizzle_state) ==
1126 sizeof(nir_lower_tex_shadow_swizzle));
1127
1128 NIR_PASS_V(new_nir_variant,
1129 nir_lower_tex_shadow,
1130 key.n_texture_states,
1131 key.sampler_compare_funcs,
1132 (nir_lower_tex_shadow_swizzle *) key.swizzle_state,
1133 false);
1134 }
1135
1136 if (key.stage == PIPE_SHADER_FRAGMENT) {
1137 if (key.fs.cast_to_uint)
1138 NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, false);
1139 if (key.fs.cast_to_int)
1140 NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, true);
1141 }
1142
1143 if (key.n_images) {
1144 d3d12_image_format_conversion_info_arr image_format_arr = { key.n_images, key.image_format_conversion };
1145 NIR_PASS_V(new_nir_variant, d3d12_lower_image_casts, &image_format_arr);
1146 }
1147
1148 if (key.stage == PIPE_SHADER_COMPUTE && sel->workgroup_size_variable) {
1149 new_nir_variant->info.workgroup_size[0] = static_cast<uint16_t>(key.cs.workgroup_size[0]);
1150 new_nir_variant->info.workgroup_size[1] = static_cast<uint16_t>(key.cs.workgroup_size[1]);
1151 new_nir_variant->info.workgroup_size[2] = static_cast<uint16_t>(key.cs.workgroup_size[2]);
1152 }
1153
1154 if (new_nir_variant->info.stage == MESA_SHADER_TESS_CTRL) {
1155 new_nir_variant->info.tess._primitive_mode = (tess_primitive_mode)key.hs.primitive_mode;
1156 new_nir_variant->info.tess.ccw = key.hs.ccw;
1157 new_nir_variant->info.tess.point_mode = key.hs.point_mode;
1158 new_nir_variant->info.tess.spacing = key.hs.spacing;
1159
1160 NIR_PASS_V(new_nir_variant, dxil_nir_set_tcs_patches_in, key.hs.patch_vertices_in);
1161 } else if (new_nir_variant->info.stage == MESA_SHADER_TESS_EVAL) {
1162 new_nir_variant->info.tess.tcs_vertices_out = static_cast<uint8_t>(key.ds.tcs_vertices_out);
1163 }
1164
1165 {
1166 struct nir_lower_tex_options tex_options = { };
1167 tex_options.lower_txp = ~0u; /* No equivalent for textureProj */
1168 tex_options.lower_rect = true;
1169 tex_options.lower_rect_offset = true;
1170 tex_options.saturate_s = key.tex_saturate_s;
1171 tex_options.saturate_r = key.tex_saturate_r;
1172 tex_options.saturate_t = key.tex_saturate_t;
1173 tex_options.lower_invalid_implicit_lod = true;
1174 tex_options.lower_tg4_offsets = true;
1175
1176 NIR_PASS_V(new_nir_variant, nir_lower_tex, &tex_options);
1177 }
1178
1179 /* Remove not-written inputs, and re-sort */
1180 if (prev) {
1181 NIR_PASS_V(new_nir_variant, dxil_nir_kill_undefined_varyings, key.prev_varying_outputs,
1182 prev->initial->info.patch_outputs_written, key.prev_varying_frac_outputs);
1183 dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_in, key.prev_varying_outputs,
1184 key.prev_varying_frac_outputs);
1185 }
1186
1187 /* Remove not-read outputs and re-sort */
1188 if (next) {
1189 NIR_PASS_V(new_nir_variant, dxil_nir_kill_unused_outputs, key.next_varying_inputs,
1190 next->initial->info.patch_inputs_read, key.next_varying_frac_inputs);
1191 dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_out, key.next_varying_inputs,
1192 key.next_varying_frac_inputs);
1193 }
1194
1195 nir_shader_gather_info(new_nir_variant, nir_shader_get_entrypoint(new_nir_variant));
1196 d3d12_shader *new_variant = compile_nir(ctx, sel, &key, new_nir_variant);
1197 assert(new_variant);
1198
1199 /* keep track of polygon stipple texture binding */
1200 new_variant->pstipple_binding = pstipple_binding;
1201
1202 /* prepend the new shader in the selector chain and pick it */
1203 new_variant->next_variant = sel->first;
1204 sel->current = sel->first = new_variant;
1205 }
1206
1207 static d3d12_shader_selector *
get_prev_shader(struct d3d12_context * ctx,pipe_shader_type current)1208 get_prev_shader(struct d3d12_context *ctx, pipe_shader_type current)
1209 {
1210 switch (current) {
1211 case PIPE_SHADER_VERTEX:
1212 return NULL;
1213 case PIPE_SHADER_FRAGMENT:
1214 if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
1215 return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1216 FALLTHROUGH;
1217 case PIPE_SHADER_GEOMETRY:
1218 if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
1219 return ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
1220 FALLTHROUGH;
1221 case PIPE_SHADER_TESS_EVAL:
1222 if (ctx->gfx_stages[PIPE_SHADER_TESS_CTRL])
1223 return ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
1224 FALLTHROUGH;
1225 case PIPE_SHADER_TESS_CTRL:
1226 return ctx->gfx_stages[PIPE_SHADER_VERTEX];
1227 default:
1228 unreachable("shader type not supported");
1229 }
1230 }
1231
1232 static d3d12_shader_selector *
get_next_shader(struct d3d12_context * ctx,pipe_shader_type current)1233 get_next_shader(struct d3d12_context *ctx, pipe_shader_type current)
1234 {
1235 switch (current) {
1236 case PIPE_SHADER_VERTEX:
1237 if (ctx->gfx_stages[PIPE_SHADER_TESS_CTRL])
1238 return ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
1239 FALLTHROUGH;
1240 case PIPE_SHADER_TESS_CTRL:
1241 if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
1242 return ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
1243 FALLTHROUGH;
1244 case PIPE_SHADER_TESS_EVAL:
1245 if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
1246 return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1247 FALLTHROUGH;
1248 case PIPE_SHADER_GEOMETRY:
1249 return ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
1250 case PIPE_SHADER_FRAGMENT:
1251 return NULL;
1252 default:
1253 unreachable("shader type not supported");
1254 }
1255 }
1256
1257 enum tex_scan_flags {
1258 TEX_SAMPLE_INTEGER_TEXTURE = 1 << 0,
1259 TEX_CMP_WITH_LOD_BIAS_GRAD = 1 << 1,
1260 TEX_SCAN_ALL_FLAGS = (1 << 2) - 1
1261 };
1262
1263 static unsigned
scan_texture_use(nir_shader * nir)1264 scan_texture_use(nir_shader *nir)
1265 {
1266 unsigned result = 0;
1267 nir_foreach_function_impl(impl, nir) {
1268 nir_foreach_block(block, impl) {
1269 nir_foreach_instr(instr, block) {
1270 if (instr->type == nir_instr_type_tex) {
1271 auto tex = nir_instr_as_tex(instr);
1272 switch (tex->op) {
1273 case nir_texop_txb:
1274 case nir_texop_txl:
1275 case nir_texop_txd:
1276 if (tex->is_shadow)
1277 result |= TEX_CMP_WITH_LOD_BIAS_GRAD;
1278 FALLTHROUGH;
1279 case nir_texop_tex:
1280 if (tex->dest_type & (nir_type_int | nir_type_uint))
1281 result |= TEX_SAMPLE_INTEGER_TEXTURE;
1282 default:
1283 ;
1284 }
1285 }
1286 if (TEX_SCAN_ALL_FLAGS == result)
1287 return result;
1288 }
1289 }
1290 }
1291 return result;
1292 }
1293
1294 static uint64_t
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)1295 update_so_info(struct pipe_stream_output_info *so_info,
1296 uint64_t outputs_written)
1297 {
1298 uint64_t so_outputs = 0;
1299 uint8_t reverse_map[64] = {0};
1300 unsigned slot = 0;
1301
1302 while (outputs_written)
1303 reverse_map[slot++] = static_cast<uint8_t>(u_bit_scan64(&outputs_written));
1304
1305 for (unsigned i = 0; i < so_info->num_outputs; i++) {
1306 struct pipe_stream_output *output = &so_info->output[i];
1307
1308 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
1309 output->register_index = reverse_map[output->register_index];
1310
1311 so_outputs |= 1ull << output->register_index;
1312 }
1313
1314 return so_outputs;
1315 }
1316
1317 static struct d3d12_shader_selector *
d3d12_create_shader_impl(struct d3d12_context * ctx,struct d3d12_shader_selector * sel,struct nir_shader * nir)1318 d3d12_create_shader_impl(struct d3d12_context *ctx,
1319 struct d3d12_shader_selector *sel,
1320 struct nir_shader *nir)
1321 {
1322 unsigned tex_scan_result = scan_texture_use(nir);
1323 sel->samples_int_textures = (tex_scan_result & TEX_SAMPLE_INTEGER_TEXTURE) != 0;
1324 sel->compare_with_lod_bias_grad = (tex_scan_result & TEX_CMP_WITH_LOD_BIAS_GRAD) != 0;
1325 sel->workgroup_size_variable = nir->info.workgroup_size_variable;
1326
1327 /* Integer cube maps are not supported in DirectX because sampling is not supported
1328 * on integer textures and TextureLoad is not supported for cube maps, so we have to
1329 * lower integer cube maps to be handled like 2D textures arrays*/
1330 NIR_PASS_V(nir, dxil_nir_lower_int_cubemaps, true);
1331
1332 NIR_PASS_V(nir, dxil_nir_lower_subgroup_id);
1333 NIR_PASS_V(nir, dxil_nir_lower_num_subgroups);
1334
1335 nir_lower_subgroups_options subgroup_options = {};
1336 subgroup_options.ballot_bit_size = 32;
1337 subgroup_options.ballot_components = 4;
1338 subgroup_options.lower_subgroup_masks = true;
1339 subgroup_options.lower_to_scalar = true;
1340 subgroup_options.lower_relative_shuffle = true;
1341 subgroup_options.lower_inverse_ballot = true;
1342 if (nir->info.stage != MESA_SHADER_FRAGMENT && nir->info.stage != MESA_SHADER_COMPUTE)
1343 subgroup_options.lower_quad = true;
1344 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
1345 NIR_PASS_V(nir, nir_lower_bit_size, [](const nir_instr *instr, void *) -> unsigned {
1346 if (instr->type != nir_instr_type_intrinsic)
1347 return 0;
1348 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1349 switch (intr->intrinsic) {
1350 case nir_intrinsic_quad_swap_horizontal:
1351 case nir_intrinsic_quad_swap_vertical:
1352 case nir_intrinsic_quad_swap_diagonal:
1353 case nir_intrinsic_reduce:
1354 case nir_intrinsic_inclusive_scan:
1355 case nir_intrinsic_exclusive_scan:
1356 return intr->def.bit_size == 1 ? 32 : 0;
1357 default:
1358 return 0;
1359 }
1360 }, NULL);
1361
1362 // Ensure subgroup scans on bools are gone
1363 NIR_PASS_V(nir, nir_opt_dce);
1364 NIR_PASS_V(nir, dxil_nir_lower_unsupported_subgroup_scan);
1365
1366 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1367
1368 if (nir->info.stage == MESA_SHADER_COMPUTE)
1369 NIR_PASS_V(nir, d3d12_lower_compute_state_vars);
1370 NIR_PASS_V(nir, d3d12_lower_load_draw_params);
1371 NIR_PASS_V(nir, d3d12_lower_load_patch_vertices_in);
1372 NIR_PASS_V(nir, dxil_nir_lower_double_math);
1373
1374 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) {
1375 if (var->data.location >= VARYING_SLOT_VAR0 && var->data.location_frac) {
1376 sel->has_frac_inputs = 1;
1377 BITSET_SET(sel->varying_frac_inputs, (var->data.location - VARYING_SLOT_VAR0) * 4 + var->data.location_frac);
1378 }
1379 }
1380 nir_foreach_variable_with_modes(var, nir, nir_var_shader_out) {
1381 if (var->data.location >= VARYING_SLOT_VAR0 && var->data.location_frac) {
1382 sel->has_frac_outputs = 1;
1383 BITSET_SET(sel->varying_frac_outputs, (var->data.location - VARYING_SLOT_VAR0) * 4 + var->data.location_frac);
1384 }
1385 }
1386
1387 /* Keep this initial shader as the blue print for possible variants */
1388 sel->initial = nir;
1389 sel->initial_output_vars = nullptr;
1390 sel->initial_input_vars = nullptr;
1391 sel->gs_key.varyings = nullptr;
1392 sel->tcs_key.varyings = nullptr;
1393
1394 return sel;
1395 }
1396
1397 struct d3d12_shader_selector *
d3d12_create_shader(struct d3d12_context * ctx,pipe_shader_type stage,const struct pipe_shader_state * shader)1398 d3d12_create_shader(struct d3d12_context *ctx,
1399 pipe_shader_type stage,
1400 const struct pipe_shader_state *shader)
1401 {
1402 struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
1403 sel->stage = stage;
1404
1405 struct nir_shader *nir = NULL;
1406
1407 if (shader->type == PIPE_SHADER_IR_NIR) {
1408 nir = (nir_shader *)shader->ir.nir;
1409 } else {
1410 assert(shader->type == PIPE_SHADER_IR_TGSI);
1411 nir = tgsi_to_nir(shader->tokens, ctx->base.screen, false);
1412 }
1413
1414 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1415 memcpy(&sel->so_info, &shader->stream_output, sizeof(sel->so_info));
1416 update_so_info(&sel->so_info, nir->info.outputs_written);
1417
1418 assert(nir != NULL);
1419
1420 NIR_PASS_V(nir, dxil_nir_split_clip_cull_distance);
1421 NIR_PASS_V(nir, d3d12_split_needed_varyings);
1422
1423 if (nir->info.stage == MESA_SHADER_TESS_EVAL || nir->info.stage == MESA_SHADER_TESS_CTRL) {
1424 /* D3D requires exactly-matching patch constant signatures. Since tess ctrl must write these vars,
1425 * tess eval must have them. */
1426 for (uint32_t i = 0; i < 2; ++i) {
1427 unsigned loc = i == 0 ? VARYING_SLOT_TESS_LEVEL_OUTER : VARYING_SLOT_TESS_LEVEL_INNER;
1428 nir_variable_mode mode = nir->info.stage == MESA_SHADER_TESS_EVAL ? nir_var_shader_in : nir_var_shader_out;
1429 nir_variable *var = nir_find_variable_with_location(nir, mode, loc);
1430 uint32_t arr_size = i == 0 ? 4 : 2;
1431 if (!var) {
1432 var = nir_variable_create(nir, mode, glsl_array_type(glsl_float_type(), arr_size, 0), i == 0 ? "outer" : "inner");
1433 var->data.location = loc;
1434 var->data.patch = true;
1435 var->data.compact = true;
1436
1437 if (mode == nir_var_shader_out) {
1438 nir_builder b = nir_builder_create(nir_shader_get_entrypoint(nir));
1439 b.cursor = nir_after_impl(b.impl);
1440 for (uint32_t j = 0; j < arr_size; ++j)
1441 nir_store_deref(&b, nir_build_deref_array_imm(&b, nir_build_deref_var(&b, var), j), nir_imm_zero(&b, 1, 32), 1);
1442 }
1443 }
1444 }
1445 }
1446
1447 if (nir->info.stage != MESA_SHADER_VERTEX) {
1448 dxil_reassign_driver_locations(nir, nir_var_shader_in, 0, NULL);
1449 } else {
1450 dxil_sort_by_driver_location(nir, nir_var_shader_in);
1451
1452 uint32_t driver_loc = 0;
1453 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) {
1454 var->data.driver_location = driver_loc;
1455 driver_loc += glsl_count_attribute_slots(var->type, false);
1456 }
1457 }
1458
1459 if (nir->info.stage != MESA_SHADER_FRAGMENT) {
1460 dxil_reassign_driver_locations(nir, nir_var_shader_out, 0, NULL);
1461 } else {
1462 NIR_PASS_V(nir, nir_lower_fragcoord_wtrans);
1463 NIR_PASS_V(nir, dxil_nir_lower_sample_pos);
1464 dxil_sort_ps_outputs(nir);
1465 }
1466
1467 return d3d12_create_shader_impl(ctx, sel, nir);
1468 }
1469
1470 struct d3d12_shader_selector *
d3d12_create_compute_shader(struct d3d12_context * ctx,const struct pipe_compute_state * shader)1471 d3d12_create_compute_shader(struct d3d12_context *ctx,
1472 const struct pipe_compute_state *shader)
1473 {
1474 struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
1475 sel->stage = PIPE_SHADER_COMPUTE;
1476
1477 struct nir_shader *nir = NULL;
1478
1479 if (shader->ir_type == PIPE_SHADER_IR_NIR) {
1480 nir = (nir_shader *)shader->prog;
1481 } else {
1482 assert(shader->ir_type == PIPE_SHADER_IR_TGSI);
1483 nir = tgsi_to_nir(shader->prog, ctx->base.screen, false);
1484 }
1485
1486 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1487
1488 return d3d12_create_shader_impl(ctx, sel, nir);
1489 }
1490
1491 void
d3d12_select_shader_variants(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)1492 d3d12_select_shader_variants(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
1493 {
1494 struct d3d12_selection_context sel_ctx;
1495
1496 sel_ctx.ctx = ctx;
1497 sel_ctx.needs_point_sprite_lowering = needs_point_sprite_lowering(ctx, dinfo);
1498 sel_ctx.fill_mode_lowered = fill_mode_lowered(ctx, dinfo);
1499 sel_ctx.cull_mode_lowered = cull_mode_lowered(ctx, sel_ctx.fill_mode_lowered);
1500 sel_ctx.provoking_vertex = get_provoking_vertex(&sel_ctx, &sel_ctx.alternate_tri, dinfo);
1501 sel_ctx.needs_vertex_reordering = needs_vertex_reordering(&sel_ctx, dinfo);
1502 sel_ctx.missing_dual_src_outputs = ctx->missing_dual_src_outputs;
1503 sel_ctx.frag_result_color_lowering = frag_result_color_lowering(ctx);
1504 sel_ctx.manual_depth_range = ctx->manual_depth_range;
1505
1506 d3d12_shader_selector* gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1507 if (gs == nullptr || gs->is_variant) {
1508 if (sel_ctx.fill_mode_lowered != PIPE_POLYGON_MODE_FILL || sel_ctx.needs_point_sprite_lowering || sel_ctx.needs_vertex_reordering)
1509 validate_geometry_shader_variant(&sel_ctx);
1510 else if (gs != nullptr) {
1511 ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = NULL;
1512 }
1513 }
1514
1515 validate_tess_ctrl_shader_variant(&sel_ctx);
1516
1517 auto* stages = ctx->gfx_stages;
1518 d3d12_shader_selector* prev;
1519 d3d12_shader_selector* next;
1520 if (stages[PIPE_SHADER_VERTEX]) {
1521 next = get_next_shader(ctx, PIPE_SHADER_VERTEX);
1522 select_shader_variant(&sel_ctx, stages[PIPE_SHADER_VERTEX], nullptr, next);
1523 }
1524 if (stages[PIPE_SHADER_TESS_CTRL]) {
1525 prev = get_prev_shader(ctx, PIPE_SHADER_TESS_CTRL);
1526 next = get_next_shader(ctx, PIPE_SHADER_TESS_CTRL);
1527 select_shader_variant(&sel_ctx, stages[PIPE_SHADER_TESS_CTRL], prev, next);
1528 }
1529 if (stages[PIPE_SHADER_TESS_EVAL]) {
1530 prev = get_prev_shader(ctx, PIPE_SHADER_TESS_EVAL);
1531 next = get_next_shader(ctx, PIPE_SHADER_TESS_EVAL);
1532 select_shader_variant(&sel_ctx, stages[PIPE_SHADER_TESS_EVAL], prev, next);
1533 }
1534 if (stages[PIPE_SHADER_GEOMETRY]) {
1535 prev = get_prev_shader(ctx, PIPE_SHADER_GEOMETRY);
1536 next = get_next_shader(ctx, PIPE_SHADER_GEOMETRY);
1537 select_shader_variant(&sel_ctx, stages[PIPE_SHADER_GEOMETRY], prev, next);
1538 }
1539 if (stages[PIPE_SHADER_FRAGMENT]) {
1540 prev = get_prev_shader(ctx, PIPE_SHADER_FRAGMENT);
1541 select_shader_variant(&sel_ctx, stages[PIPE_SHADER_FRAGMENT], prev, nullptr);
1542 }
1543 }
1544
1545 static const unsigned *
workgroup_size_variable(struct d3d12_context * ctx,const struct pipe_grid_info * info)1546 workgroup_size_variable(struct d3d12_context *ctx,
1547 const struct pipe_grid_info *info)
1548 {
1549 if (ctx->compute_state->workgroup_size_variable)
1550 return info->block;
1551 return nullptr;
1552 }
1553
1554 void
d3d12_select_compute_shader_variants(struct d3d12_context * ctx,const struct pipe_grid_info * info)1555 d3d12_select_compute_shader_variants(struct d3d12_context *ctx, const struct pipe_grid_info *info)
1556 {
1557 struct d3d12_selection_context sel_ctx = {};
1558
1559 sel_ctx.ctx = ctx;
1560 sel_ctx.variable_workgroup_size = workgroup_size_variable(ctx, info);
1561
1562 select_shader_variant(&sel_ctx, ctx->compute_state, nullptr, nullptr);
1563 }
1564
1565 void
d3d12_shader_free(struct d3d12_shader_selector * sel)1566 d3d12_shader_free(struct d3d12_shader_selector *sel)
1567 {
1568 auto shader = sel->first;
1569 while (shader) {
1570 free(shader->bytecode);
1571 shader = shader->next_variant;
1572 }
1573
1574 ralloc_free((void*)sel->initial);
1575 ralloc_free(sel);
1576 }
1577