1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "d3d12_compiler.h"
25 #include "d3d12_context.h"
26 #include "d3d12_debug.h"
27 #include "d3d12_screen.h"
28 #include "d3d12_nir_passes.h"
29 #include "nir_to_dxil.h"
30 #include "dxil_nir.h"
31 #include "dxil_nir_lower_int_cubemaps.h"
32
33 #include "pipe/p_state.h"
34
35 #include "nir.h"
36 #include "nir/nir_draw_helpers.h"
37 #include "nir/tgsi_to_nir.h"
38 #include "compiler/nir/nir_builder.h"
39
40 #include "util/hash_table.h"
41 #include "util/u_memory.h"
42 #include "util/u_prim.h"
43 #include "util/u_simple_shaders.h"
44 #include "util/u_dl.h"
45
46 #include <dxguids/dxguids.h>
47
48 #ifdef _WIN32
49 #include "dxil_validator.h"
50 #endif
51
52 const void *
d3d12_get_compiler_options(struct pipe_screen * screen,enum pipe_shader_ir ir,enum pipe_shader_type shader)53 d3d12_get_compiler_options(struct pipe_screen *screen,
54 enum pipe_shader_ir ir,
55 enum pipe_shader_type shader)
56 {
57 assert(ir == PIPE_SHADER_IR_NIR);
58 return &d3d12_screen(screen)->nir_options;
59 }
60
61 static uint32_t
resource_dimension(enum glsl_sampler_dim dim)62 resource_dimension(enum glsl_sampler_dim dim)
63 {
64 switch (dim) {
65 case GLSL_SAMPLER_DIM_1D:
66 return RESOURCE_DIMENSION_TEXTURE1D;
67 case GLSL_SAMPLER_DIM_2D:
68 return RESOURCE_DIMENSION_TEXTURE2D;
69 case GLSL_SAMPLER_DIM_3D:
70 return RESOURCE_DIMENSION_TEXTURE3D;
71 case GLSL_SAMPLER_DIM_CUBE:
72 return RESOURCE_DIMENSION_TEXTURECUBE;
73 default:
74 return RESOURCE_DIMENSION_UNKNOWN;
75 }
76 }
77
78 static bool
can_remove_dead_sampler(nir_variable * var,void * data)79 can_remove_dead_sampler(nir_variable *var, void *data)
80 {
81 const struct glsl_type *base_type = glsl_without_array(var->type);
82 return glsl_type_is_sampler(base_type) && !glsl_type_is_bare_sampler(base_type);
83 }
84
85 static struct d3d12_shader *
compile_nir(struct d3d12_context * ctx,struct d3d12_shader_selector * sel,struct d3d12_shader_key * key,struct nir_shader * nir)86 compile_nir(struct d3d12_context *ctx, struct d3d12_shader_selector *sel,
87 struct d3d12_shader_key *key, struct nir_shader *nir)
88 {
89 struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
90 struct d3d12_shader *shader = rzalloc(sel, d3d12_shader);
91 shader->key = *key;
92
93 if (shader->key.n_texture_states > 0) {
94 shader->key.tex_wrap_states = (dxil_wrap_sampler_state*)ralloc_size(sel, sizeof(dxil_wrap_sampler_state) * shader->key.n_texture_states);
95 memcpy(shader->key.tex_wrap_states, key->tex_wrap_states, sizeof(dxil_wrap_sampler_state) * shader->key.n_texture_states);
96 }
97 else
98 shader->key.tex_wrap_states = nullptr;
99
100 shader->output_vars_fs = nullptr;
101 shader->output_vars_gs = nullptr;
102 shader->output_vars_default = nullptr;
103
104 shader->input_vars_vs = nullptr;
105 shader->input_vars_default = nullptr;
106
107 shader->tess_eval_output_vars = nullptr;
108 shader->tess_ctrl_input_vars = nullptr;
109 shader->nir = nir;
110 sel->current = shader;
111
112 NIR_PASS_V(nir, nir_lower_samplers);
113 NIR_PASS_V(nir, dxil_nir_split_typed_samplers);
114
115 NIR_PASS_V(nir, nir_opt_dce);
116 struct nir_remove_dead_variables_options dead_var_opts = {};
117 dead_var_opts.can_remove_var = can_remove_dead_sampler;
118 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform, &dead_var_opts);
119
120 if (key->samples_int_textures)
121 NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex,
122 key->n_texture_states, key->tex_wrap_states, key->swizzle_state,
123 screen->base.get_paramf(&screen->base, PIPE_CAPF_MAX_TEXTURE_LOD_BIAS));
124
125 if (key->stage == PIPE_SHADER_VERTEX && key->vs.needs_format_emulation)
126 dxil_nir_lower_vs_vertex_conversion(nir, key->vs.format_conversion);
127
128 uint32_t num_ubos_before_lower_to_ubo = nir->info.num_ubos;
129 uint32_t num_uniforms_before_lower_to_ubo = nir->num_uniforms;
130 NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, false);
131 shader->has_default_ubo0 = num_uniforms_before_lower_to_ubo > 0 &&
132 nir->info.num_ubos > num_ubos_before_lower_to_ubo;
133
134 NIR_PASS_V(nir, dxil_nir_lower_subgroup_id);
135 NIR_PASS_V(nir, dxil_nir_lower_num_subgroups);
136
137 nir_lower_subgroups_options subgroup_options = {};
138 subgroup_options.ballot_bit_size = 32;
139 subgroup_options.ballot_components = 4;
140 subgroup_options.lower_subgroup_masks = true;
141 subgroup_options.lower_to_scalar = true;
142 subgroup_options.lower_relative_shuffle = true;
143 subgroup_options.lower_inverse_ballot = true;
144 if (nir->info.stage != MESA_SHADER_FRAGMENT && nir->info.stage != MESA_SHADER_COMPUTE)
145 subgroup_options.lower_quad = true;
146 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
147 NIR_PASS_V(nir, nir_lower_bit_size, [](const nir_instr *instr, void *) -> unsigned {
148 if (instr->type != nir_instr_type_intrinsic)
149 return 0;
150 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
151 switch (intr->intrinsic) {
152 case nir_intrinsic_quad_swap_horizontal:
153 case nir_intrinsic_quad_swap_vertical:
154 case nir_intrinsic_quad_swap_diagonal:
155 case nir_intrinsic_reduce:
156 case nir_intrinsic_inclusive_scan:
157 case nir_intrinsic_exclusive_scan:
158 return intr->def.bit_size == 1 ? 32 : 0;
159 default:
160 return 0;
161 }
162 }, NULL);
163
164 // Ensure subgroup scans on bools are gone
165 NIR_PASS_V(nir, nir_opt_dce);
166 NIR_PASS_V(nir, dxil_nir_lower_unsupported_subgroup_scan);
167
168 if (key->last_vertex_processing_stage) {
169 if (key->invert_depth)
170 NIR_PASS_V(nir, d3d12_nir_invert_depth, key->invert_depth, key->halfz);
171 if (!key->halfz)
172 NIR_PASS_V(nir, nir_lower_clip_halfz);
173 NIR_PASS_V(nir, d3d12_lower_yflip);
174 }
175 NIR_PASS_V(nir, d3d12_lower_load_draw_params);
176 NIR_PASS_V(nir, d3d12_lower_load_patch_vertices_in);
177 NIR_PASS_V(nir, d3d12_lower_state_vars, shader);
178 const struct dxil_nir_lower_loads_stores_options loads_stores_options = {};
179 NIR_PASS_V(nir, dxil_nir_lower_loads_stores_to_dxil, &loads_stores_options);
180 NIR_PASS_V(nir, dxil_nir_lower_double_math);
181
182 if (key->stage == PIPE_SHADER_FRAGMENT && key->fs.multisample_disabled)
183 NIR_PASS_V(nir, d3d12_disable_multisampling);
184
185 struct nir_to_dxil_options opts = {};
186 opts.interpolate_at_vertex = screen->have_load_at_vertex;
187 opts.lower_int16 = !screen->opts4.Native16BitShaderOpsSupported;
188 opts.no_ubo0 = !shader->has_default_ubo0;
189 opts.last_ubo_is_not_arrayed = shader->num_state_vars > 0;
190 if (key->stage == PIPE_SHADER_FRAGMENT)
191 opts.provoking_vertex = key->fs.provoking_vertex;
192 opts.input_clip_size = key->input_clip_size;
193 opts.environment = DXIL_ENVIRONMENT_GL;
194 opts.shader_model_max = screen->max_shader_model;
195 #ifdef _WIN32
196 opts.validator_version_max = dxil_get_validator_version(ctx->dxil_validator);
197 #endif
198
199 struct blob tmp;
200 if (!nir_to_dxil(nir, &opts, NULL, &tmp)) {
201 debug_printf("D3D12: nir_to_dxil failed\n");
202 return NULL;
203 }
204
205 // Non-ubo variables
206 shader->begin_srv_binding = (UINT_MAX);
207 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
208 auto type_no_array = glsl_without_array(var->type);
209 if (glsl_type_is_texture(type_no_array)) {
210 unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
211 for (unsigned i = 0; i < count; ++i) {
212 shader->srv_bindings[var->data.binding + i].dimension = resource_dimension(glsl_get_sampler_dim(type_no_array));
213 }
214 shader->begin_srv_binding = MIN2(var->data.binding, shader->begin_srv_binding);
215 shader->end_srv_binding = MAX2(var->data.binding + count, shader->end_srv_binding);
216 }
217 }
218
219 nir_foreach_image_variable(var, nir) {
220 auto type_no_array = glsl_without_array(var->type);
221 unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
222 for (unsigned i = 0; i < count; ++i) {
223 shader->uav_bindings[var->data.driver_location + i].dimension = resource_dimension(glsl_get_sampler_dim(type_no_array));
224 }
225 }
226
227 // Ubo variables
228 if(nir->info.num_ubos) {
229 // Ignore state_vars ubo as it is bound as root constants
230 unsigned num_ubo_bindings = nir->info.num_ubos - (shader->state_vars_used ? 1 : 0);
231 for(unsigned i = shader->has_default_ubo0 ? 0 : 1; i < num_ubo_bindings; ++i) {
232 shader->cb_bindings[shader->num_cb_bindings++].binding = i;
233 }
234 }
235
236 #ifdef _WIN32
237 if (ctx->dxil_validator) {
238 if (!(d3d12_debug & D3D12_DEBUG_EXPERIMENTAL)) {
239 char *err;
240 if (!dxil_validate_module(ctx->dxil_validator, tmp.data,
241 tmp.size, &err) && err) {
242 debug_printf(
243 "== VALIDATION ERROR =============================================\n"
244 "%s\n"
245 "== END ==========================================================\n",
246 err);
247 ralloc_free(err);
248 }
249 }
250
251 if (d3d12_debug & D3D12_DEBUG_DISASS) {
252 char *str = dxil_disasm_module(ctx->dxil_validator, tmp.data,
253 tmp.size);
254 fprintf(stderr,
255 "== BEGIN SHADER ============================================\n"
256 "%s\n"
257 "== END SHADER ==============================================\n",
258 str);
259 ralloc_free(str);
260 }
261 }
262 #endif
263
264 blob_finish_get_buffer(&tmp, &shader->bytecode, &shader->bytecode_length);
265
266 if (d3d12_debug & D3D12_DEBUG_DXIL) {
267 char buf[256];
268 static int i;
269 snprintf(buf, sizeof(buf), "dump%02d.dxil", i++);
270 FILE *fp = fopen(buf, "wb");
271 fwrite(shader->bytecode, sizeof(char), shader->bytecode_length, fp);
272 fclose(fp);
273 fprintf(stderr, "wrote '%s'...\n", buf);
274 }
275 return shader;
276 }
277
278 struct d3d12_selection_context {
279 struct d3d12_context *ctx;
280 bool needs_point_sprite_lowering;
281 bool needs_vertex_reordering;
282 unsigned provoking_vertex;
283 bool alternate_tri;
284 unsigned fill_mode_lowered;
285 unsigned cull_mode_lowered;
286 bool manual_depth_range;
287 unsigned missing_dual_src_outputs;
288 unsigned frag_result_color_lowering;
289 const unsigned *variable_workgroup_size;
290 };
291
292 unsigned
missing_dual_src_outputs(struct d3d12_context * ctx)293 missing_dual_src_outputs(struct d3d12_context *ctx)
294 {
295 if (!ctx->gfx_pipeline_state.blend || !ctx->gfx_pipeline_state.blend->is_dual_src)
296 return 0;
297
298 struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
299 if (!fs)
300 return 0;
301
302 const nir_shader *s = fs->initial;
303
304 unsigned indices_seen = 0;
305 nir_foreach_function_impl(impl, s) {
306 nir_foreach_block(block, impl) {
307 nir_foreach_instr(instr, block) {
308 if (instr->type != nir_instr_type_intrinsic)
309 continue;
310
311 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
312 if (intr->intrinsic != nir_intrinsic_store_deref)
313 continue;
314
315 nir_variable *var = nir_intrinsic_get_var(intr, 0);
316 if (var->data.mode != nir_var_shader_out)
317 continue;
318
319 unsigned index = var->data.index;
320 if (var->data.location > FRAG_RESULT_DATA0)
321 index = var->data.location - FRAG_RESULT_DATA0;
322 else if (var->data.location != FRAG_RESULT_COLOR &&
323 var->data.location != FRAG_RESULT_DATA0)
324 continue;
325
326 indices_seen |= 1u << index;
327 if ((indices_seen & 3) == 3)
328 return 0;
329 }
330 }
331 }
332
333 return 3 & ~indices_seen;
334 }
335
336 static unsigned
frag_result_color_lowering(struct d3d12_context * ctx)337 frag_result_color_lowering(struct d3d12_context *ctx)
338 {
339 struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
340 assert(fs);
341
342 if (fs->initial->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR))
343 return ctx->fb.nr_cbufs > 1 ? ctx->fb.nr_cbufs : 0;
344
345 return 0;
346 }
347
348 bool
manual_depth_range(struct d3d12_context * ctx)349 manual_depth_range(struct d3d12_context *ctx)
350 {
351 if (!d3d12_need_zero_one_depth_range(ctx))
352 return false;
353
354 /**
355 * If we can't use the D3D12 zero-one depth-range, we might have to apply
356 * depth-range ourselves.
357 *
358 * Because we only need to override the depth-range to zero-one range in
359 * the case where we write frag-depth, we only need to apply manual
360 * depth-range to gl_FragCoord.z.
361 *
362 * No extra care is needed to be taken in the case where gl_FragDepth is
363 * written conditionally, because the GLSL 4.60 spec states:
364 *
365 * If a shader statically assigns a value to gl_FragDepth, and there
366 * is an execution path through the shader that does not set
367 * gl_FragDepth, then the value of the fragment’s depth may be
368 * undefined for executions of the shader that take that path. That
369 * is, if the set of linked fragment shaders statically contain a
370 * write to gl_FragDepth, then it is responsible for always writing
371 * it.
372 */
373
374 struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
375 return fs && fs->initial->info.inputs_read & VARYING_BIT_POS;
376 }
377
378 static bool
needs_edge_flag_fix(enum mesa_prim mode)379 needs_edge_flag_fix(enum mesa_prim mode)
380 {
381 return (mode == MESA_PRIM_QUADS ||
382 mode == MESA_PRIM_QUAD_STRIP ||
383 mode == MESA_PRIM_POLYGON);
384 }
385
386 static unsigned
fill_mode_lowered(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)387 fill_mode_lowered(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
388 {
389 struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
390
391 if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
392 !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant) ||
393 ctx->gfx_pipeline_state.rast == NULL ||
394 (dinfo->mode != MESA_PRIM_TRIANGLES &&
395 dinfo->mode != MESA_PRIM_TRIANGLE_STRIP))
396 return PIPE_POLYGON_MODE_FILL;
397
398 /* D3D12 supports line mode (wireframe) but doesn't support edge flags */
399 if (((ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_LINE &&
400 ctx->gfx_pipeline_state.rast->base.cull_face != PIPE_FACE_FRONT) ||
401 (ctx->gfx_pipeline_state.rast->base.fill_back == PIPE_POLYGON_MODE_LINE &&
402 ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_FRONT)) &&
403 (vs->initial->info.outputs_written & VARYING_BIT_EDGE ||
404 needs_edge_flag_fix(ctx->initial_api_prim)))
405 return PIPE_POLYGON_MODE_LINE;
406
407 if (ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_POINT)
408 return PIPE_POLYGON_MODE_POINT;
409
410 return PIPE_POLYGON_MODE_FILL;
411 }
412
413 static bool
has_stream_out_for_streams(struct d3d12_context * ctx)414 has_stream_out_for_streams(struct d3d12_context *ctx)
415 {
416 unsigned mask = ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->initial->info.gs.active_stream_mask & ~1;
417 for (unsigned i = 0; i < ctx->gfx_pipeline_state.so_info.num_outputs; ++i) {
418 unsigned stream = ctx->gfx_pipeline_state.so_info.output[i].stream;
419 if (((1 << stream) & mask) &&
420 ctx->so_buffer_views[stream].SizeInBytes)
421 return true;
422 }
423 return false;
424 }
425
426 static bool
needs_point_sprite_lowering(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)427 needs_point_sprite_lowering(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
428 {
429 struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
430 struct d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
431
432 if (gs != NULL && !gs->is_variant) {
433 /* There is an user GS; Check if it outputs points with PSIZE */
434 return (gs->initial->info.gs.output_primitive == MESA_PRIM_POINTS &&
435 (gs->initial->info.outputs_written & VARYING_BIT_PSIZ ||
436 ctx->gfx_pipeline_state.rast->base.point_size > 1.0) &&
437 (gs->initial->info.gs.active_stream_mask == 1 ||
438 !has_stream_out_for_streams(ctx)));
439 } else {
440 /* No user GS; check if we are drawing wide points */
441 return ((dinfo->mode == MESA_PRIM_POINTS ||
442 fill_mode_lowered(ctx, dinfo) == PIPE_POLYGON_MODE_POINT) &&
443 (ctx->gfx_pipeline_state.rast->base.point_size > 1.0 ||
444 ctx->gfx_pipeline_state.rast->base.offset_point ||
445 (ctx->gfx_pipeline_state.rast->base.point_size_per_vertex &&
446 vs->initial->info.outputs_written & VARYING_BIT_PSIZ)) &&
447 (vs->initial->info.outputs_written & VARYING_BIT_POS));
448 }
449 }
450
451 static unsigned
cull_mode_lowered(struct d3d12_context * ctx,unsigned fill_mode)452 cull_mode_lowered(struct d3d12_context *ctx, unsigned fill_mode)
453 {
454 if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
455 !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant) ||
456 ctx->gfx_pipeline_state.rast == NULL ||
457 ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_NONE)
458 return PIPE_FACE_NONE;
459
460 return ctx->gfx_pipeline_state.rast->base.cull_face;
461 }
462
463 static unsigned
get_provoking_vertex(struct d3d12_selection_context * sel_ctx,bool * alternate,const struct pipe_draw_info * dinfo)464 get_provoking_vertex(struct d3d12_selection_context *sel_ctx, bool *alternate, const struct pipe_draw_info *dinfo)
465 {
466 if (dinfo->mode == GL_PATCHES) {
467 *alternate = false;
468 return 0;
469 }
470
471 struct d3d12_shader_selector *vs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_VERTEX];
472 struct d3d12_shader_selector *gs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
473 struct d3d12_shader_selector *last_vertex_stage = gs && !gs->is_variant ? gs : vs;
474
475 enum mesa_prim mode;
476 switch (last_vertex_stage->stage) {
477 case PIPE_SHADER_GEOMETRY:
478 mode = (enum mesa_prim)last_vertex_stage->current->nir->info.gs.output_primitive;
479 break;
480 case PIPE_SHADER_VERTEX:
481 mode = (enum mesa_prim)dinfo->mode;
482 break;
483 default:
484 unreachable("Tesselation shaders are not supported");
485 }
486
487 bool flatshade_first = sel_ctx->ctx->gfx_pipeline_state.rast &&
488 sel_ctx->ctx->gfx_pipeline_state.rast->base.flatshade_first;
489 *alternate = (mode == GL_TRIANGLE_STRIP || mode == GL_TRIANGLE_STRIP_ADJACENCY) &&
490 (!gs || gs->is_variant ||
491 gs->initial->info.gs.vertices_out > u_prim_vertex_count(mode)->min);
492 return flatshade_first ? 0 : u_prim_vertex_count(mode)->min - 1;
493 }
494
495 bool
has_flat_varyings(struct d3d12_context * ctx)496 has_flat_varyings(struct d3d12_context *ctx)
497 {
498 struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
499
500 if (!fs || !fs->current)
501 return false;
502
503 nir_foreach_variable_with_modes(input, fs->current->nir,
504 nir_var_shader_in) {
505 if (input->data.interpolation == INTERP_MODE_FLAT &&
506 /* Disregard sysvals */
507 (input->data.location >= VARYING_SLOT_VAR0 ||
508 input->data.location <= VARYING_SLOT_TEX7))
509 return true;
510 }
511
512 return false;
513 }
514
515 static bool
needs_vertex_reordering(struct d3d12_selection_context * sel_ctx,const struct pipe_draw_info * dinfo)516 needs_vertex_reordering(struct d3d12_selection_context *sel_ctx, const struct pipe_draw_info *dinfo)
517 {
518 struct d3d12_context *ctx = sel_ctx->ctx;
519 bool flat = ctx->has_flat_varyings;
520 bool xfb = ctx->gfx_pipeline_state.num_so_targets > 0;
521
522 if (fill_mode_lowered(ctx, dinfo) != PIPE_POLYGON_MODE_FILL)
523 return false;
524
525 /* TODO add support for line primitives */
526
527 /* When flat shading a triangle and provoking vertex is not the first one, we use load_at_vertex.
528 If not available for this adapter, or if it's a triangle strip, we need to reorder the vertices */
529 if (flat && sel_ctx->provoking_vertex >= 2 && (!d3d12_screen(ctx->base.screen)->have_load_at_vertex ||
530 sel_ctx->alternate_tri))
531 return true;
532
533 /* When transform feedback is enabled and the output is alternating (triangle strip or triangle
534 strip with adjacency), we need to reorder vertices to get the order expected by OpenGL. This
535 only works when there is no flat shading involved. In that scenario, we don't care about
536 the provoking vertex. */
537 if (xfb && !flat && sel_ctx->alternate_tri) {
538 sel_ctx->provoking_vertex = 0;
539 return true;
540 }
541
542 return false;
543 }
544
545 static nir_variable *
create_varying_from_info(nir_shader * nir,const struct d3d12_varying_info * info,unsigned slot,unsigned slot_frac,nir_variable_mode mode,bool patch)546 create_varying_from_info(nir_shader *nir, const struct d3d12_varying_info *info,
547 unsigned slot, unsigned slot_frac, nir_variable_mode mode, bool patch)
548 {
549 nir_variable *var;
550 char tmp[100];
551
552 snprintf(tmp, ARRAY_SIZE(tmp),
553 mode == nir_var_shader_in ? "in_%d" : "out_%d",
554 info->slots[slot].vars[slot_frac].driver_location);
555 var = nir_variable_create(nir, mode, info->slots[slot].types[slot_frac], tmp);
556 var->data.location = slot;
557 var->data.location_frac = slot_frac;
558 var->data.driver_location = info->slots[slot].vars[slot_frac].driver_location;
559 var->data.interpolation = info->slots[slot].vars[slot_frac].interpolation;
560 var->data.patch = info->slots[slot].patch;
561 var->data.compact = info->slots[slot].vars[slot_frac].compact;
562 if (patch)
563 var->data.location += VARYING_SLOT_PATCH0;
564
565 if (mode == nir_var_shader_out)
566 NIR_PASS_V(nir, d3d12_write_0_to_new_varying, var);
567
568 return var;
569 }
570
571 void
create_varyings_from_info(nir_shader * nir,const struct d3d12_varying_info * info,unsigned slot,nir_variable_mode mode,bool patch)572 create_varyings_from_info(nir_shader *nir, const struct d3d12_varying_info *info,
573 unsigned slot, nir_variable_mode mode, bool patch)
574 {
575 unsigned mask = info->slots[slot].location_frac_mask;
576 while (mask)
577 create_varying_from_info(nir, info, slot, u_bit_scan(&mask), mode, patch);
578 }
579
580 static d3d12_varying_info*
fill_varyings(struct d3d12_context * ctx,const nir_shader * s,nir_variable_mode modes,uint64_t mask,bool patch)581 fill_varyings(struct d3d12_context *ctx, const nir_shader *s,
582 nir_variable_mode modes, uint64_t mask, bool patch)
583 {
584 struct d3d12_varying_info info;
585
586 info.max = 0;
587 info.mask = 0;
588 info.hash = 0;
589
590 nir_foreach_variable_with_modes(var, s, modes) {
591 unsigned slot = var->data.location;
592 bool is_generic_patch = slot >= VARYING_SLOT_PATCH0;
593 if (patch ^ is_generic_patch)
594 continue;
595 if (is_generic_patch)
596 slot -= VARYING_SLOT_PATCH0;
597 uint64_t slot_bit = BITFIELD64_BIT(slot);
598
599 if (!(mask & slot_bit))
600 continue;
601
602 if ((info.mask & slot_bit) == 0) {
603 memset(info.slots + slot, 0, sizeof(info.slots[0]));
604 info.max = MAX2(info.max, slot);
605 }
606
607 const struct glsl_type *type = var->type;
608 if ((s->info.stage == MESA_SHADER_GEOMETRY ||
609 s->info.stage == MESA_SHADER_TESS_CTRL) &&
610 (modes & nir_var_shader_in) &&
611 glsl_type_is_array(type))
612 type = glsl_get_array_element(type);
613 info.slots[slot].types[var->data.location_frac] = type;
614
615 info.slots[slot].patch = var->data.patch;
616 auto& var_slot = info.slots[slot].vars[var->data.location_frac];
617 var_slot.driver_location = var->data.driver_location;
618 var_slot.interpolation = var->data.interpolation;
619 var_slot.compact = var->data.compact;
620 info.mask |= slot_bit;
621 info.slots[slot].location_frac_mask |= (1 << var->data.location_frac);
622 }
623
624 for (uint32_t i = 0; i <= info.max; ++i) {
625 if (((1llu << i) & info.mask) == 0)
626 memset(info.slots + i, 0, sizeof(info.slots[0]));
627 else
628 info.hash = _mesa_hash_data_with_seed(info.slots + i, sizeof(info.slots[0]), info.hash);
629 }
630 info.hash = _mesa_hash_data_with_seed(&info.mask, sizeof(info.mask), info.hash);
631
632 struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
633
634 mtx_lock(&screen->varying_info_mutex);
635 set_entry *pentry = _mesa_set_search_pre_hashed(screen->varying_info_set, info.hash, &info);
636 if (pentry != nullptr) {
637 mtx_unlock(&screen->varying_info_mutex);
638 return (d3d12_varying_info*)pentry->key;
639 }
640 else {
641 d3d12_varying_info *key = MALLOC_STRUCT(d3d12_varying_info);
642 *key = info;
643
644 _mesa_set_add_pre_hashed(screen->varying_info_set, info.hash, key);
645
646 mtx_unlock(&screen->varying_info_mutex);
647 return key;
648 }
649 }
650
651 static void
fill_flat_varyings(struct d3d12_gs_variant_key * key,d3d12_shader_selector * fs)652 fill_flat_varyings(struct d3d12_gs_variant_key *key, d3d12_shader_selector *fs)
653 {
654 if (!fs || !fs->current)
655 return;
656
657 nir_foreach_variable_with_modes(input, fs->current->nir,
658 nir_var_shader_in) {
659 if (input->data.interpolation == INTERP_MODE_FLAT)
660 key->flat_varyings |= BITFIELD64_BIT(input->data.location);
661 }
662 }
663
664 bool
d3d12_compare_varying_info(const d3d12_varying_info * expect,const d3d12_varying_info * have)665 d3d12_compare_varying_info(const d3d12_varying_info *expect, const d3d12_varying_info *have)
666 {
667 if (expect == have)
668 return true;
669
670 if (expect == nullptr || have == nullptr)
671 return false;
672
673 if (expect->mask != have->mask
674 || expect->max != have->max)
675 return false;
676
677 if (!expect->mask)
678 return true;
679
680 /* 6 is a rough (wild) guess for a bulk memcmp cross-over point. When there
681 * are a small number of slots present, individual is much faster. */
682 if (util_bitcount64(expect->mask) < 6) {
683 uint64_t mask = expect->mask;
684 while (mask) {
685 int slot = u_bit_scan64(&mask);
686 if (memcmp(&expect->slots[slot], &have->slots[slot], sizeof(have->slots[slot])))
687 return false;
688 }
689
690 return true;
691 }
692
693 return !memcmp(expect->slots, have->slots, sizeof(expect->slots[0]) * expect->max);
694 }
695
696
varying_info_hash(const void * info)697 uint32_t varying_info_hash(const void *info) {
698 return ((d3d12_varying_info*)info)->hash;
699 }
varying_info_compare(const void * a,const void * b)700 bool varying_info_compare(const void *a, const void *b) {
701 return d3d12_compare_varying_info((d3d12_varying_info*)a, (d3d12_varying_info*)b);
702 }
varying_info_entry_destroy(set_entry * entry)703 void varying_info_entry_destroy(set_entry *entry) {
704 if (entry->key)
705 free((void*)entry->key);
706 }
707
708 void
d3d12_varying_cache_init(struct d3d12_screen * screen)709 d3d12_varying_cache_init(struct d3d12_screen *screen) {
710 screen->varying_info_set = _mesa_set_create(nullptr, varying_info_hash, varying_info_compare);
711 }
712
713 void
d3d12_varying_cache_destroy(struct d3d12_screen * screen)714 d3d12_varying_cache_destroy(struct d3d12_screen *screen) {
715 _mesa_set_destroy(screen->varying_info_set, varying_info_entry_destroy);
716 }
717
718
719 static void
validate_geometry_shader_variant(struct d3d12_selection_context * sel_ctx)720 validate_geometry_shader_variant(struct d3d12_selection_context *sel_ctx)
721 {
722 struct d3d12_context *ctx = sel_ctx->ctx;
723 d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
724
725 /* Nothing to do if there is a user geometry shader bound */
726 if (gs != NULL && !gs->is_variant)
727 return;
728
729 d3d12_shader_selector* vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
730 d3d12_shader_selector* fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
731
732 struct d3d12_gs_variant_key key;
733 key.all = 0;
734 key.flat_varyings = 0;
735
736 /* Fill the geometry shader variant key */
737 if (sel_ctx->fill_mode_lowered != PIPE_POLYGON_MODE_FILL) {
738 key.fill_mode = sel_ctx->fill_mode_lowered;
739 key.cull_mode = sel_ctx->cull_mode_lowered;
740 key.has_front_face = BITSET_TEST(fs->initial->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
741 if (key.cull_mode != PIPE_FACE_NONE || key.has_front_face)
742 key.front_ccw = ctx->gfx_pipeline_state.rast->base.front_ccw ^ (ctx->flip_y < 0);
743 key.edge_flag_fix = needs_edge_flag_fix(ctx->initial_api_prim);
744 fill_flat_varyings(&key, fs);
745 if (key.flat_varyings != 0)
746 key.flatshade_first = ctx->gfx_pipeline_state.rast->base.flatshade_first;
747 } else if (sel_ctx->needs_point_sprite_lowering) {
748 key.passthrough = true;
749 } else if (sel_ctx->needs_vertex_reordering) {
750 /* TODO support cases where flat shading (pv != 0) and xfb are enabled */
751 key.provoking_vertex = sel_ctx->provoking_vertex;
752 key.alternate_tri = sel_ctx->alternate_tri;
753 }
754
755 if (vs->initial_output_vars == nullptr) {
756 vs->initial_output_vars = fill_varyings(sel_ctx->ctx, vs->initial, nir_var_shader_out,
757 vs->initial->info.outputs_written, false);
758 }
759 key.varyings = vs->initial_output_vars;
760 gs = d3d12_get_gs_variant(ctx, &key);
761 ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = gs;
762 }
763
764 static void
validate_tess_ctrl_shader_variant(struct d3d12_selection_context * sel_ctx)765 validate_tess_ctrl_shader_variant(struct d3d12_selection_context *sel_ctx)
766 {
767 struct d3d12_context *ctx = sel_ctx->ctx;
768 d3d12_shader_selector *tcs = ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
769
770 /* Nothing to do if there is a user tess ctrl shader bound */
771 if (tcs != NULL && !tcs->is_variant)
772 return;
773
774 d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
775 d3d12_shader_selector *tes = ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
776 struct d3d12_tcs_variant_key key = {0};
777
778 bool variant_needed = tes != nullptr;
779
780 /* Fill the variant key */
781 if (variant_needed) {
782 if (vs->initial_output_vars == nullptr) {
783 vs->initial_output_vars = fill_varyings(sel_ctx->ctx, vs->initial, nir_var_shader_out,
784 vs->initial->info.outputs_written, false);
785 }
786 key.varyings = vs->initial_output_vars;
787 key.vertices_out = ctx->patch_vertices;
788 }
789
790 /* Find/create the proper variant and bind it */
791 tcs = variant_needed ? d3d12_get_tcs_variant(ctx, &key) : NULL;
792 ctx->gfx_stages[PIPE_SHADER_TESS_CTRL] = tcs;
793 }
794
795 static bool
d3d12_compare_shader_keys(struct d3d12_selection_context * sel_ctx,const d3d12_shader_key * expect,const d3d12_shader_key * have)796 d3d12_compare_shader_keys(struct d3d12_selection_context* sel_ctx, const d3d12_shader_key *expect, const d3d12_shader_key *have)
797 {
798 assert(expect->stage == have->stage);
799 assert(expect);
800 assert(have);
801
802 if (expect->hash != have->hash)
803 return false;
804
805 switch (expect->stage) {
806 case PIPE_SHADER_VERTEX:
807 if (expect->vs.needs_format_emulation != have->vs.needs_format_emulation)
808 return false;
809
810 if (expect->vs.needs_format_emulation) {
811 if (memcmp(expect->vs.format_conversion, have->vs.format_conversion,
812 sel_ctx->ctx->gfx_pipeline_state.ves->num_elements * sizeof(enum pipe_format)))
813 return false;
814 }
815 break;
816 case PIPE_SHADER_GEOMETRY:
817 if (expect->gs.all != have->gs.all)
818 return false;
819 break;
820 case PIPE_SHADER_TESS_CTRL:
821 if (expect->hs.all != have->hs.all ||
822 expect->hs.required_patch_outputs != have->hs.required_patch_outputs)
823 return false;
824 break;
825 case PIPE_SHADER_TESS_EVAL:
826 if (expect->ds.tcs_vertices_out != have->ds.tcs_vertices_out ||
827 expect->ds.prev_patch_outputs != have->ds.prev_patch_outputs ||
828 expect->ds.required_patch_inputs != have->ds.required_patch_inputs)
829 return false;
830 break;
831 case PIPE_SHADER_FRAGMENT:
832 if (expect->fs.all != have->fs.all)
833 return false;
834 break;
835 case PIPE_SHADER_COMPUTE:
836 if (memcmp(expect->cs.workgroup_size, have->cs.workgroup_size,
837 sizeof(have->cs.workgroup_size)))
838 return false;
839 break;
840 default:
841 unreachable("invalid stage");
842 }
843
844 if (expect->n_texture_states != have->n_texture_states)
845 return false;
846
847 if (expect->n_images != have->n_images)
848 return false;
849
850 if (expect->n_texture_states > 0 &&
851 memcmp(expect->tex_wrap_states, have->tex_wrap_states,
852 expect->n_texture_states * sizeof(dxil_wrap_sampler_state)))
853 return false;
854
855 if (memcmp(expect->swizzle_state, have->swizzle_state,
856 expect->n_texture_states * sizeof(dxil_texture_swizzle_state)))
857 return false;
858
859 if (memcmp(expect->sampler_compare_funcs, have->sampler_compare_funcs,
860 expect->n_texture_states * sizeof(enum compare_func)))
861 return false;
862
863 if (memcmp(expect->image_format_conversion, have->image_format_conversion,
864 expect->n_images * sizeof(struct d3d12_image_format_conversion_info)))
865 return false;
866
867 return
868 expect->required_varying_inputs == have->required_varying_inputs &&
869 expect->required_varying_outputs == have->required_varying_outputs &&
870 expect->next_varying_inputs == have->next_varying_inputs &&
871 expect->prev_varying_outputs == have->prev_varying_outputs &&
872 expect->common_all == have->common_all &&
873 expect->tex_saturate_s == have->tex_saturate_s &&
874 expect->tex_saturate_r == have->tex_saturate_r &&
875 expect->tex_saturate_t == have->tex_saturate_t;
876 }
877
878 static uint32_t
d3d12_shader_key_hash(const d3d12_shader_key * key)879 d3d12_shader_key_hash(const d3d12_shader_key *key)
880 {
881 uint32_t hash;
882
883 hash = (uint32_t)key->stage;
884 hash += ((uint64_t)key->required_varying_inputs) +
885 (((uint64_t)key->required_varying_inputs) >> 32);
886 hash += ((uint64_t)key->required_varying_outputs) +
887 (((uint64_t)key->required_varying_outputs) >> 32);
888
889 hash += key->next_varying_inputs;
890 hash += key->prev_varying_outputs;
891 switch (key->stage) {
892 case PIPE_SHADER_VERTEX:
893 /* (Probably) not worth the bit extraction for needs_format_emulation and
894 * the rest of the the format_conversion data is large. Don't bother
895 * hashing for now until this is shown to be worthwhile. */
896 break;
897 case PIPE_SHADER_GEOMETRY:
898 hash += key->gs.all;
899 break;
900 case PIPE_SHADER_FRAGMENT:
901 hash += key->fs.all;
902 break;
903 case PIPE_SHADER_COMPUTE:
904 hash = _mesa_hash_data_with_seed(&key->cs, sizeof(key->cs), hash);
905 break;
906 case PIPE_SHADER_TESS_CTRL:
907 hash += key->hs.all;
908 hash += ((uint64_t)key->hs.required_patch_outputs) +
909 (((uint64_t)key->hs.required_patch_outputs) >> 32);
910 break;
911 case PIPE_SHADER_TESS_EVAL:
912 hash += key->ds.tcs_vertices_out;
913 hash += key->ds.prev_patch_outputs;
914 hash += ((uint64_t)key->ds.required_patch_inputs) +
915 (((uint64_t)key->ds.required_patch_inputs) >> 32);
916 break;
917 default:
918 /* No type specific information to hash for other stages. */
919 break;
920 }
921
922 hash += key->n_texture_states;
923 hash += key->n_images;
924 return hash;
925 }
926
927 static void
d3d12_fill_shader_key(struct d3d12_selection_context * sel_ctx,d3d12_shader_key * key,d3d12_shader_selector * sel,d3d12_shader_selector * prev,d3d12_shader_selector * next)928 d3d12_fill_shader_key(struct d3d12_selection_context *sel_ctx,
929 d3d12_shader_key *key, d3d12_shader_selector *sel,
930 d3d12_shader_selector *prev, d3d12_shader_selector *next)
931 {
932 pipe_shader_type stage = sel->stage;
933
934 uint64_t system_generated_in_values =
935 VARYING_BIT_PNTC |
936 VARYING_BIT_PRIMITIVE_ID;
937
938 uint64_t system_out_values =
939 VARYING_BIT_CLIP_DIST0 |
940 VARYING_BIT_CLIP_DIST1;
941
942 memset(key, 0, offsetof(d3d12_shader_key, vs));
943 key->stage = stage;
944
945 switch (stage)
946 {
947 case PIPE_SHADER_VERTEX:
948 key->vs.needs_format_emulation = 0;
949 break;
950 case PIPE_SHADER_FRAGMENT:
951 key->fs.all = 0;
952 break;
953 case PIPE_SHADER_GEOMETRY:
954 key->gs.all = 0;
955 break;
956 case PIPE_SHADER_TESS_CTRL:
957 key->hs.all = 0;
958 key->hs.required_patch_outputs = nullptr;
959 break;
960 case PIPE_SHADER_TESS_EVAL:
961 key->ds.tcs_vertices_out = 0;
962 key->ds.prev_patch_outputs = 0;
963 key->ds.required_patch_inputs = nullptr;
964 break;
965 case PIPE_SHADER_COMPUTE:
966 memset(key->cs.workgroup_size, 0, sizeof(key->cs.workgroup_size));
967 break;
968 default: unreachable("Invalid stage type");
969 }
970
971 key->n_texture_states = 0;
972 key->tex_wrap_states = sel_ctx->ctx->tex_wrap_states_shader_key;
973 key->n_images = 0;
974
975 if (prev) {
976 /* We require as inputs what the previous stage has written,
977 * except certain system values */
978
979 struct d3d12_varying_info **output_vars = nullptr;
980
981 switch (stage) {
982 case PIPE_SHADER_FRAGMENT:
983 system_out_values |= VARYING_BIT_POS | VARYING_BIT_PSIZ | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER;
984 output_vars = &prev->current->output_vars_fs;
985 break;
986 case PIPE_SHADER_GEOMETRY:
987 system_out_values |= VARYING_BIT_POS;
988 output_vars = &prev->current->output_vars_gs;
989 break;
990 default:
991 output_vars = &prev->current->output_vars_default;
992 break;
993 }
994
995 uint64_t mask = prev->current->nir->info.outputs_written & ~system_out_values;
996
997 if (*output_vars == nullptr) {
998 *output_vars = fill_varyings(sel_ctx->ctx, prev->current->nir,
999 nir_var_shader_out, mask, false);
1000 }
1001
1002 key->required_varying_inputs = *output_vars;
1003
1004 key->prev_varying_outputs = prev->current->nir->info.outputs_written;
1005
1006 if (stage == PIPE_SHADER_TESS_EVAL) {
1007 uint32_t patch_mask = prev->current->nir->info.patch_outputs_written;
1008
1009 if (prev->current->tess_eval_output_vars == nullptr) {
1010 prev->current->tess_eval_output_vars = fill_varyings(sel_ctx->ctx, prev->current->nir,
1011 nir_var_shader_out, patch_mask, true);
1012 }
1013
1014 key->ds.required_patch_inputs = prev->current->tess_eval_output_vars;
1015 key->ds.prev_patch_outputs = patch_mask;
1016 }
1017
1018 /* Set the provoking vertex based on the previous shader output. Only set the
1019 * key value if the driver actually supports changing the provoking vertex though */
1020 if (stage == PIPE_SHADER_FRAGMENT && sel_ctx->ctx->gfx_pipeline_state.rast &&
1021 !sel_ctx->needs_vertex_reordering &&
1022 d3d12_screen(sel_ctx->ctx->base.screen)->have_load_at_vertex)
1023 key->fs.provoking_vertex = sel_ctx->provoking_vertex;
1024
1025 /* Get the input clip distance size. The info's clip_distance_array_size corresponds
1026 * to the output, and in cases of TES or GS you could have differently-sized inputs
1027 * and outputs. For FS, there is no output, so it's repurposed to mean input.
1028 */
1029 if (stage != PIPE_SHADER_FRAGMENT)
1030 key->input_clip_size = prev->current->nir->info.clip_distance_array_size;
1031 }
1032
1033 /* We require as outputs what the next stage reads,
1034 * except certain system values */
1035 if (next) {
1036 if (!next->is_variant) {
1037
1038 struct d3d12_varying_info **input_vars = &next->current->input_vars_default;
1039
1040 if (stage == PIPE_SHADER_VERTEX) {
1041 system_generated_in_values |= VARYING_BIT_POS;
1042 input_vars = &next->current->input_vars_vs;
1043 }
1044 uint64_t mask = next->current->nir->info.inputs_read & ~system_generated_in_values;
1045
1046
1047 if (*input_vars == nullptr) {
1048 *input_vars = fill_varyings(sel_ctx->ctx, next->current->nir,
1049 nir_var_shader_in, mask, false);
1050 }
1051
1052 key->required_varying_outputs = *input_vars;
1053
1054
1055 if (stage == PIPE_SHADER_TESS_CTRL) {
1056 uint32_t patch_mask = next->current->nir->info.patch_outputs_read;
1057
1058 if (prev->current->tess_ctrl_input_vars == nullptr){
1059 prev->current->tess_ctrl_input_vars = fill_varyings(sel_ctx->ctx, prev->current->nir,
1060 nir_var_shader_in, patch_mask, true);
1061 }
1062
1063 key->hs.required_patch_outputs = prev->current->tess_ctrl_input_vars;
1064 key->hs.next_patch_inputs = patch_mask;
1065 }
1066 }
1067 key->next_varying_inputs = next->current->nir->info.inputs_read;
1068
1069 }
1070
1071 if (stage == PIPE_SHADER_GEOMETRY ||
1072 ((stage == PIPE_SHADER_VERTEX || stage == PIPE_SHADER_TESS_EVAL) &&
1073 (!next || next->stage == PIPE_SHADER_FRAGMENT))) {
1074 key->last_vertex_processing_stage = 1;
1075 key->invert_depth = sel_ctx->ctx->reverse_depth_range;
1076 key->halfz = sel_ctx->ctx->gfx_pipeline_state.rast ?
1077 sel_ctx->ctx->gfx_pipeline_state.rast->base.clip_halfz : false;
1078 if (sel_ctx->ctx->pstipple.enabled &&
1079 sel_ctx->ctx->gfx_pipeline_state.rast->base.poly_stipple_enable)
1080 key->next_varying_inputs |= VARYING_BIT_POS;
1081 }
1082
1083 if (stage == PIPE_SHADER_GEOMETRY && sel_ctx->ctx->gfx_pipeline_state.rast) {
1084 struct pipe_rasterizer_state *rast = &sel_ctx->ctx->gfx_pipeline_state.rast->base;
1085 if (sel_ctx->needs_point_sprite_lowering) {
1086 key->gs.writes_psize = 1;
1087 key->gs.point_size_per_vertex = rast->point_size_per_vertex;
1088 key->gs.sprite_coord_enable = rast->sprite_coord_enable;
1089 key->gs.sprite_origin_upper_left = (rast->sprite_coord_mode != PIPE_SPRITE_COORD_LOWER_LEFT);
1090 if (sel_ctx->ctx->flip_y < 0)
1091 key->gs.sprite_origin_upper_left = !key->gs.sprite_origin_upper_left;
1092 key->gs.aa_point = rast->point_smooth;
1093 key->gs.stream_output_factor = 6;
1094 } else if (sel_ctx->fill_mode_lowered == PIPE_POLYGON_MODE_LINE) {
1095 key->gs.stream_output_factor = 2;
1096 } else if (sel_ctx->needs_vertex_reordering && !sel->is_variant) {
1097 key->gs.triangle_strip = 1;
1098 }
1099
1100 if (sel->is_variant && next && next->initial->info.inputs_read & VARYING_BIT_PRIMITIVE_ID)
1101 key->gs.primitive_id = 1;
1102 } else if (stage == PIPE_SHADER_FRAGMENT) {
1103 key->fs.missing_dual_src_outputs = sel_ctx->missing_dual_src_outputs;
1104 key->fs.frag_result_color_lowering = sel_ctx->frag_result_color_lowering;
1105 key->fs.manual_depth_range = sel_ctx->manual_depth_range;
1106 key->fs.polygon_stipple = sel_ctx->ctx->pstipple.enabled &&
1107 sel_ctx->ctx->gfx_pipeline_state.rast->base.poly_stipple_enable;
1108 key->fs.multisample_disabled = sel_ctx->ctx->gfx_pipeline_state.rast &&
1109 !sel_ctx->ctx->gfx_pipeline_state.rast->desc.MultisampleEnable;
1110 if (sel_ctx->ctx->gfx_pipeline_state.blend &&
1111 sel_ctx->ctx->gfx_pipeline_state.blend->desc.RenderTarget[0].LogicOpEnable &&
1112 !sel_ctx->ctx->gfx_pipeline_state.has_float_rtv) {
1113 key->fs.cast_to_uint = util_format_is_unorm(sel_ctx->ctx->fb.cbufs[0]->format);
1114 key->fs.cast_to_int = !key->fs.cast_to_uint;
1115 }
1116 } else if (stage == PIPE_SHADER_TESS_CTRL) {
1117 if (next && next->current->nir->info.stage == MESA_SHADER_TESS_EVAL) {
1118 key->hs.primitive_mode = next->current->nir->info.tess._primitive_mode;
1119 key->hs.ccw = next->current->nir->info.tess.ccw;
1120 key->hs.point_mode = next->current->nir->info.tess.point_mode;
1121 key->hs.spacing = next->current->nir->info.tess.spacing;
1122 } else {
1123 key->hs.primitive_mode = TESS_PRIMITIVE_QUADS;
1124 key->hs.ccw = true;
1125 key->hs.point_mode = false;
1126 key->hs.spacing = TESS_SPACING_EQUAL;
1127 }
1128 key->hs.patch_vertices_in = MAX2(sel_ctx->ctx->patch_vertices, 1);
1129 } else if (stage == PIPE_SHADER_TESS_EVAL) {
1130 if (prev && prev->current->nir->info.stage == MESA_SHADER_TESS_CTRL)
1131 key->ds.tcs_vertices_out = prev->current->nir->info.tess.tcs_vertices_out;
1132 else
1133 key->ds.tcs_vertices_out = 32;
1134 }
1135
1136 if (sel->samples_int_textures) {
1137 key->samples_int_textures = sel->samples_int_textures;
1138 key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
1139 /* Copy only states with integer textures */
1140 for(int i = 0; i < key->n_texture_states; ++i) {
1141 auto& wrap_state = sel_ctx->ctx->tex_wrap_states[stage][i];
1142 if (wrap_state.is_int_sampler) {
1143 memcpy(&key->tex_wrap_states[i], &wrap_state, sizeof(wrap_state));
1144 key->swizzle_state[i] = sel_ctx->ctx->tex_swizzle_state[stage][i];
1145 } else {
1146 memset(&key->tex_wrap_states[i], 0, sizeof(key->tex_wrap_states[i]));
1147 key->swizzle_state[i] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W };
1148 }
1149 }
1150 }
1151
1152 for (unsigned i = 0, e = sel_ctx->ctx->num_samplers[stage]; i < e; ++i) {
1153 if (!sel_ctx->ctx->samplers[stage][i] ||
1154 sel_ctx->ctx->samplers[stage][i]->filter == PIPE_TEX_FILTER_NEAREST)
1155 continue;
1156
1157 if (sel_ctx->ctx->samplers[stage][i]->wrap_r == PIPE_TEX_WRAP_CLAMP)
1158 key->tex_saturate_r |= 1 << i;
1159 if (sel_ctx->ctx->samplers[stage][i]->wrap_s == PIPE_TEX_WRAP_CLAMP)
1160 key->tex_saturate_s |= 1 << i;
1161 if (sel_ctx->ctx->samplers[stage][i]->wrap_t == PIPE_TEX_WRAP_CLAMP)
1162 key->tex_saturate_t |= 1 << i;
1163 }
1164
1165 if (sel->compare_with_lod_bias_grad) {
1166 key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
1167 memcpy(key->sampler_compare_funcs, sel_ctx->ctx->tex_compare_func[stage],
1168 key->n_texture_states * sizeof(enum compare_func));
1169 memcpy(key->swizzle_state, sel_ctx->ctx->tex_swizzle_state[stage],
1170 key->n_texture_states * sizeof(dxil_texture_swizzle_state));
1171 if (!sel->samples_int_textures)
1172 memset(key->tex_wrap_states, 0, sizeof(key->tex_wrap_states[0]) * key->n_texture_states);
1173 }
1174
1175 if (stage == PIPE_SHADER_VERTEX && sel_ctx->ctx->gfx_pipeline_state.ves) {
1176 key->vs.needs_format_emulation = sel_ctx->ctx->gfx_pipeline_state.ves->needs_format_emulation;
1177 if (key->vs.needs_format_emulation) {
1178 unsigned num_elements = sel_ctx->ctx->gfx_pipeline_state.ves->num_elements;
1179
1180 memset(key->vs.format_conversion + num_elements,
1181 0,
1182 sizeof(key->vs.format_conversion) - (num_elements * sizeof(enum pipe_format)));
1183
1184 memcpy(key->vs.format_conversion, sel_ctx->ctx->gfx_pipeline_state.ves->format_conversion,
1185 num_elements * sizeof(enum pipe_format));
1186 }
1187 }
1188
1189 if (stage == PIPE_SHADER_FRAGMENT &&
1190 sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY] &&
1191 sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant &&
1192 sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->gs_key.has_front_face) {
1193 key->fs.remap_front_facing = 1;
1194 }
1195
1196 if (stage == PIPE_SHADER_COMPUTE && sel_ctx->variable_workgroup_size) {
1197 memcpy(key->cs.workgroup_size, sel_ctx->variable_workgroup_size, sizeof(key->cs.workgroup_size));
1198 }
1199
1200 key->n_images = sel_ctx->ctx->num_image_views[stage];
1201 for (int i = 0; i < key->n_images; ++i) {
1202 key->image_format_conversion[i].emulated_format = sel_ctx->ctx->image_view_emulation_formats[stage][i];
1203 if (key->image_format_conversion[i].emulated_format != PIPE_FORMAT_NONE)
1204 key->image_format_conversion[i].view_format = sel_ctx->ctx->image_views[stage][i].format;
1205 }
1206
1207 key->hash = d3d12_shader_key_hash(key);
1208 }
1209
1210 static void
select_shader_variant(struct d3d12_selection_context * sel_ctx,d3d12_shader_selector * sel,d3d12_shader_selector * prev,d3d12_shader_selector * next)1211 select_shader_variant(struct d3d12_selection_context *sel_ctx, d3d12_shader_selector *sel,
1212 d3d12_shader_selector *prev, d3d12_shader_selector *next)
1213 {
1214 struct d3d12_context *ctx = sel_ctx->ctx;
1215 d3d12_shader_key key;
1216 nir_shader *new_nir_variant;
1217 unsigned pstipple_binding = UINT32_MAX;
1218
1219 d3d12_fill_shader_key(sel_ctx, &key, sel, prev, next);
1220
1221 /* Check for an existing variant */
1222 for (d3d12_shader *variant = sel->first; variant;
1223 variant = variant->next_variant) {
1224
1225 if (d3d12_compare_shader_keys(sel_ctx, &key, &variant->key)) {
1226 sel->current = variant;
1227 return;
1228 }
1229 }
1230
1231 /* Clone the NIR shader */
1232 new_nir_variant = nir_shader_clone(sel, sel->initial);
1233
1234 /* Apply any needed lowering passes */
1235 if (key.stage == PIPE_SHADER_GEOMETRY) {
1236 if (key.gs.writes_psize) {
1237 NIR_PASS_V(new_nir_variant, d3d12_lower_point_sprite,
1238 !key.gs.sprite_origin_upper_left,
1239 key.gs.point_size_per_vertex,
1240 key.gs.sprite_coord_enable,
1241 key.next_varying_inputs);
1242
1243 nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
1244 nir_shader_gather_info(new_nir_variant, impl);
1245 }
1246
1247 if (key.gs.primitive_id) {
1248 NIR_PASS_V(new_nir_variant, d3d12_lower_primitive_id);
1249
1250 nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
1251 nir_shader_gather_info(new_nir_variant, impl);
1252 }
1253
1254 if (key.gs.triangle_strip)
1255 NIR_PASS_V(new_nir_variant, d3d12_lower_triangle_strip);
1256 }
1257 else if (key.stage == PIPE_SHADER_FRAGMENT)
1258 {
1259 if (key.fs.polygon_stipple) {
1260 NIR_PASS_V(new_nir_variant, nir_lower_pstipple_fs,
1261 &pstipple_binding, 0, false, nir_type_bool1);
1262
1263 nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
1264 nir_shader_gather_info(new_nir_variant, impl);
1265 }
1266
1267 if (key.fs.remap_front_facing) {
1268 dxil_nir_forward_front_face(new_nir_variant);
1269
1270 nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
1271 nir_shader_gather_info(new_nir_variant, impl);
1272 }
1273
1274 if (key.fs.missing_dual_src_outputs) {
1275 NIR_PASS_V(new_nir_variant, d3d12_add_missing_dual_src_target,
1276 key.fs.missing_dual_src_outputs);
1277 } else if (key.fs.frag_result_color_lowering) {
1278 NIR_PASS_V(new_nir_variant, nir_lower_fragcolor,
1279 key.fs.frag_result_color_lowering);
1280 }
1281
1282 if (key.fs.manual_depth_range)
1283 NIR_PASS_V(new_nir_variant, d3d12_lower_depth_range);
1284 }
1285
1286
1287 if (sel->compare_with_lod_bias_grad) {
1288 STATIC_ASSERT(sizeof(dxil_texture_swizzle_state) ==
1289 sizeof(nir_lower_tex_shadow_swizzle));
1290
1291 NIR_PASS_V(new_nir_variant, nir_lower_tex_shadow, key.n_texture_states,
1292 key.sampler_compare_funcs, (nir_lower_tex_shadow_swizzle *)key.swizzle_state);
1293 }
1294
1295 if (key.stage == PIPE_SHADER_FRAGMENT) {
1296 if (key.fs.cast_to_uint)
1297 NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, false);
1298 if (key.fs.cast_to_int)
1299 NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, true);
1300 }
1301
1302 if (key.n_images) {
1303 d3d12_image_format_conversion_info_arr image_format_arr = { key.n_images, key.image_format_conversion };
1304 NIR_PASS_V(new_nir_variant, d3d12_lower_image_casts, &image_format_arr);
1305 }
1306
1307 if (key.stage == PIPE_SHADER_COMPUTE && sel->workgroup_size_variable) {
1308 new_nir_variant->info.workgroup_size[0] = key.cs.workgroup_size[0];
1309 new_nir_variant->info.workgroup_size[1] = key.cs.workgroup_size[1];
1310 new_nir_variant->info.workgroup_size[2] = key.cs.workgroup_size[2];
1311 }
1312
1313 if (new_nir_variant->info.stage == MESA_SHADER_TESS_CTRL) {
1314 new_nir_variant->info.tess._primitive_mode = (tess_primitive_mode)key.hs.primitive_mode;
1315 new_nir_variant->info.tess.ccw = key.hs.ccw;
1316 new_nir_variant->info.tess.point_mode = key.hs.point_mode;
1317 new_nir_variant->info.tess.spacing = key.hs.spacing;
1318
1319 NIR_PASS_V(new_nir_variant, dxil_nir_set_tcs_patches_in, key.hs.patch_vertices_in);
1320 } else if (new_nir_variant->info.stage == MESA_SHADER_TESS_EVAL) {
1321 new_nir_variant->info.tess.tcs_vertices_out = key.ds.tcs_vertices_out;
1322 }
1323
1324 {
1325 struct nir_lower_tex_options tex_options = { };
1326 tex_options.lower_txp = ~0u; /* No equivalent for textureProj */
1327 tex_options.lower_rect = true;
1328 tex_options.lower_rect_offset = true;
1329 tex_options.saturate_s = key.tex_saturate_s;
1330 tex_options.saturate_r = key.tex_saturate_r;
1331 tex_options.saturate_t = key.tex_saturate_t;
1332 tex_options.lower_invalid_implicit_lod = true;
1333 tex_options.lower_tg4_offsets = true;
1334
1335 NIR_PASS_V(new_nir_variant, nir_lower_tex, &tex_options);
1336 }
1337
1338 /* Add the needed in and outputs, and re-sort */
1339 if (prev) {
1340 if (key.required_varying_inputs != nullptr) {
1341 uint64_t mask = key.required_varying_inputs->mask & ~new_nir_variant->info.inputs_read;
1342 new_nir_variant->info.inputs_read |= mask;
1343 while (mask) {
1344 int slot = u_bit_scan64(&mask);
1345 create_varyings_from_info(new_nir_variant, key.required_varying_inputs, slot, nir_var_shader_in, false);
1346 }
1347 }
1348
1349 if (sel->stage == PIPE_SHADER_TESS_EVAL) {
1350 uint32_t patch_mask = (uint32_t)key.ds.required_patch_inputs->mask & ~new_nir_variant->info.patch_inputs_read;
1351 new_nir_variant->info.patch_inputs_read |= patch_mask;
1352 while (patch_mask) {
1353 int slot = u_bit_scan(&patch_mask);
1354 create_varyings_from_info(new_nir_variant, key.ds.required_patch_inputs, slot, nir_var_shader_in, true);
1355 }
1356 }
1357 dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_in,
1358 key.prev_varying_outputs);
1359 }
1360
1361
1362 if (next) {
1363 if (key.required_varying_outputs != nullptr) {
1364 uint64_t mask = key.required_varying_outputs->mask & ~new_nir_variant->info.outputs_written;
1365 new_nir_variant->info.outputs_written |= mask;
1366 while (mask) {
1367 int slot = u_bit_scan64(&mask);
1368 create_varyings_from_info(new_nir_variant, key.required_varying_outputs, slot, nir_var_shader_out, false);
1369 }
1370 }
1371
1372 if (sel->stage == PIPE_SHADER_TESS_CTRL &&
1373 key.hs.required_patch_outputs != nullptr) {
1374 uint32_t patch_mask = (uint32_t)key.hs.required_patch_outputs->mask & ~new_nir_variant->info.patch_outputs_written;
1375 new_nir_variant->info.patch_outputs_written |= patch_mask;
1376 while (patch_mask) {
1377 int slot = u_bit_scan(&patch_mask);
1378 create_varyings_from_info(new_nir_variant, key.hs.required_patch_outputs, slot, nir_var_shader_out, true);
1379 }
1380 }
1381 dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_out,
1382 key.next_varying_inputs);
1383 }
1384
1385 d3d12_shader *new_variant = compile_nir(ctx, sel, &key, new_nir_variant);
1386 assert(new_variant);
1387
1388 /* keep track of polygon stipple texture binding */
1389 new_variant->pstipple_binding = pstipple_binding;
1390
1391 /* prepend the new shader in the selector chain and pick it */
1392 new_variant->next_variant = sel->first;
1393 sel->current = sel->first = new_variant;
1394 }
1395
1396 static d3d12_shader_selector *
get_prev_shader(struct d3d12_context * ctx,pipe_shader_type current)1397 get_prev_shader(struct d3d12_context *ctx, pipe_shader_type current)
1398 {
1399 switch (current) {
1400 case PIPE_SHADER_VERTEX:
1401 return NULL;
1402 case PIPE_SHADER_FRAGMENT:
1403 if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
1404 return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1405 FALLTHROUGH;
1406 case PIPE_SHADER_GEOMETRY:
1407 if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
1408 return ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
1409 FALLTHROUGH;
1410 case PIPE_SHADER_TESS_EVAL:
1411 if (ctx->gfx_stages[PIPE_SHADER_TESS_CTRL])
1412 return ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
1413 FALLTHROUGH;
1414 case PIPE_SHADER_TESS_CTRL:
1415 return ctx->gfx_stages[PIPE_SHADER_VERTEX];
1416 default:
1417 unreachable("shader type not supported");
1418 }
1419 }
1420
1421 static d3d12_shader_selector *
get_next_shader(struct d3d12_context * ctx,pipe_shader_type current)1422 get_next_shader(struct d3d12_context *ctx, pipe_shader_type current)
1423 {
1424 switch (current) {
1425 case PIPE_SHADER_VERTEX:
1426 if (ctx->gfx_stages[PIPE_SHADER_TESS_CTRL])
1427 return ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
1428 FALLTHROUGH;
1429 case PIPE_SHADER_TESS_CTRL:
1430 if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
1431 return ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
1432 FALLTHROUGH;
1433 case PIPE_SHADER_TESS_EVAL:
1434 if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
1435 return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1436 FALLTHROUGH;
1437 case PIPE_SHADER_GEOMETRY:
1438 return ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
1439 case PIPE_SHADER_FRAGMENT:
1440 return NULL;
1441 default:
1442 unreachable("shader type not supported");
1443 }
1444 }
1445
1446 enum tex_scan_flags {
1447 TEX_SAMPLE_INTEGER_TEXTURE = 1 << 0,
1448 TEX_CMP_WITH_LOD_BIAS_GRAD = 1 << 1,
1449 TEX_SCAN_ALL_FLAGS = (1 << 2) - 1
1450 };
1451
1452 static unsigned
scan_texture_use(nir_shader * nir)1453 scan_texture_use(nir_shader *nir)
1454 {
1455 unsigned result = 0;
1456 nir_foreach_function_impl(impl, nir) {
1457 nir_foreach_block(block, impl) {
1458 nir_foreach_instr(instr, block) {
1459 if (instr->type == nir_instr_type_tex) {
1460 auto tex = nir_instr_as_tex(instr);
1461 switch (tex->op) {
1462 case nir_texop_txb:
1463 case nir_texop_txl:
1464 case nir_texop_txd:
1465 if (tex->is_shadow)
1466 result |= TEX_CMP_WITH_LOD_BIAS_GRAD;
1467 FALLTHROUGH;
1468 case nir_texop_tex:
1469 if (tex->dest_type & (nir_type_int | nir_type_uint))
1470 result |= TEX_SAMPLE_INTEGER_TEXTURE;
1471 default:
1472 ;
1473 }
1474 }
1475 if (TEX_SCAN_ALL_FLAGS == result)
1476 return result;
1477 }
1478 }
1479 }
1480 return result;
1481 }
1482
1483 static uint64_t
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)1484 update_so_info(struct pipe_stream_output_info *so_info,
1485 uint64_t outputs_written)
1486 {
1487 uint64_t so_outputs = 0;
1488 uint8_t reverse_map[64] = {0};
1489 unsigned slot = 0;
1490
1491 while (outputs_written)
1492 reverse_map[slot++] = u_bit_scan64(&outputs_written);
1493
1494 for (unsigned i = 0; i < so_info->num_outputs; i++) {
1495 struct pipe_stream_output *output = &so_info->output[i];
1496
1497 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
1498 output->register_index = reverse_map[output->register_index];
1499
1500 so_outputs |= 1ull << output->register_index;
1501 }
1502
1503 return so_outputs;
1504 }
1505
1506 static struct d3d12_shader_selector *
d3d12_create_shader_impl(struct d3d12_context * ctx,struct d3d12_shader_selector * sel,struct nir_shader * nir,struct d3d12_shader_selector * prev,struct d3d12_shader_selector * next)1507 d3d12_create_shader_impl(struct d3d12_context *ctx,
1508 struct d3d12_shader_selector *sel,
1509 struct nir_shader *nir,
1510 struct d3d12_shader_selector *prev,
1511 struct d3d12_shader_selector *next)
1512 {
1513 unsigned tex_scan_result = scan_texture_use(nir);
1514 sel->samples_int_textures = (tex_scan_result & TEX_SAMPLE_INTEGER_TEXTURE) != 0;
1515 sel->compare_with_lod_bias_grad = (tex_scan_result & TEX_CMP_WITH_LOD_BIAS_GRAD) != 0;
1516 sel->workgroup_size_variable = nir->info.workgroup_size_variable;
1517
1518 /* Integer cube maps are not supported in DirectX because sampling is not supported
1519 * on integer textures and TextureLoad is not supported for cube maps, so we have to
1520 * lower integer cube maps to be handled like 2D textures arrays*/
1521 NIR_PASS_V(nir, dxil_nir_lower_int_cubemaps, true);
1522
1523 /* Keep this initial shader as the blue print for possible variants */
1524 sel->initial = nir;
1525 sel->initial_output_vars = nullptr;
1526 sel->gs_key.varyings = nullptr;
1527 sel->tcs_key.varyings = nullptr;
1528
1529 /*
1530 * We must compile some shader here, because if the previous or a next shaders exists later
1531 * when the shaders are bound, then the key evaluation in the shader selector will access
1532 * the current variant of these prev and next shader, and we can only assign
1533 * a current variant when it has been successfully compiled.
1534 *
1535 * For shaders that require lowering because certain instructions are not available
1536 * and their emulation is state depended (like sampling an integer texture that must be
1537 * emulated and needs handling of boundary conditions, or shadow compare sampling with LOD),
1538 * we must go through the shader selector here to create a compilable variant.
1539 * For shaders that are not depended on the state this is just compiling the original
1540 * shader.
1541 *
1542 * TODO: get rid of having to compiling the shader here if it can be forseen that it will
1543 * be thrown away (i.e. it depends on states that are likely to change before the shader is
1544 * used for the first time)
1545 */
1546 struct d3d12_selection_context sel_ctx = {0};
1547 sel_ctx.ctx = ctx;
1548 select_shader_variant(&sel_ctx, sel, prev, next);
1549
1550 if (!sel->current) {
1551 ralloc_free(sel);
1552 return NULL;
1553 }
1554
1555 return sel;
1556 }
1557
1558 struct d3d12_shader_selector *
d3d12_create_shader(struct d3d12_context * ctx,pipe_shader_type stage,const struct pipe_shader_state * shader)1559 d3d12_create_shader(struct d3d12_context *ctx,
1560 pipe_shader_type stage,
1561 const struct pipe_shader_state *shader)
1562 {
1563 struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
1564 sel->stage = stage;
1565
1566 struct nir_shader *nir = NULL;
1567
1568 if (shader->type == PIPE_SHADER_IR_NIR) {
1569 nir = (nir_shader *)shader->ir.nir;
1570 } else {
1571 assert(shader->type == PIPE_SHADER_IR_TGSI);
1572 nir = tgsi_to_nir(shader->tokens, ctx->base.screen, false);
1573 }
1574
1575 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1576 memcpy(&sel->so_info, &shader->stream_output, sizeof(sel->so_info));
1577 update_so_info(&sel->so_info, nir->info.outputs_written);
1578
1579 assert(nir != NULL);
1580 d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage);
1581 d3d12_shader_selector *next = get_next_shader(ctx, sel->stage);
1582
1583 NIR_PASS_V(nir, dxil_nir_split_clip_cull_distance);
1584 NIR_PASS_V(nir, d3d12_split_needed_varyings);
1585
1586 if (nir->info.stage != MESA_SHADER_VERTEX) {
1587 nir->info.inputs_read =
1588 dxil_reassign_driver_locations(nir, nir_var_shader_in,
1589 prev ? prev->current->nir->info.outputs_written : 0);
1590 } else {
1591 nir->info.inputs_read = dxil_sort_by_driver_location(nir, nir_var_shader_in);
1592
1593 uint32_t driver_loc = 0;
1594 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) {
1595 var->data.driver_location = driver_loc;
1596 driver_loc += glsl_count_attribute_slots(var->type, false);
1597 }
1598 }
1599
1600 if (nir->info.stage != MESA_SHADER_FRAGMENT) {
1601 nir->info.outputs_written =
1602 dxil_reassign_driver_locations(nir, nir_var_shader_out,
1603 next ? next->current->nir->info.inputs_read : 0);
1604 } else {
1605 NIR_PASS_V(nir, nir_lower_fragcoord_wtrans);
1606 NIR_PASS_V(nir, dxil_nir_lower_sample_pos);
1607 dxil_sort_ps_outputs(nir);
1608 }
1609
1610 return d3d12_create_shader_impl(ctx, sel, nir, prev, next);
1611 }
1612
1613 struct d3d12_shader_selector *
d3d12_create_compute_shader(struct d3d12_context * ctx,const struct pipe_compute_state * shader)1614 d3d12_create_compute_shader(struct d3d12_context *ctx,
1615 const struct pipe_compute_state *shader)
1616 {
1617 struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
1618 sel->stage = PIPE_SHADER_COMPUTE;
1619
1620 struct nir_shader *nir = NULL;
1621
1622 if (shader->ir_type == PIPE_SHADER_IR_NIR) {
1623 nir = (nir_shader *)shader->prog;
1624 } else {
1625 assert(shader->ir_type == PIPE_SHADER_IR_TGSI);
1626 nir = tgsi_to_nir(shader->prog, ctx->base.screen, false);
1627 }
1628
1629 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1630
1631 NIR_PASS_V(nir, d3d12_lower_compute_state_vars);
1632
1633 return d3d12_create_shader_impl(ctx, sel, nir, nullptr, nullptr);
1634 }
1635
1636 void
d3d12_select_shader_variants(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)1637 d3d12_select_shader_variants(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
1638 {
1639 struct d3d12_selection_context sel_ctx;
1640
1641 sel_ctx.ctx = ctx;
1642 sel_ctx.needs_point_sprite_lowering = needs_point_sprite_lowering(ctx, dinfo);
1643 sel_ctx.fill_mode_lowered = fill_mode_lowered(ctx, dinfo);
1644 sel_ctx.cull_mode_lowered = cull_mode_lowered(ctx, sel_ctx.fill_mode_lowered);
1645 sel_ctx.provoking_vertex = get_provoking_vertex(&sel_ctx, &sel_ctx.alternate_tri, dinfo);
1646 sel_ctx.needs_vertex_reordering = needs_vertex_reordering(&sel_ctx, dinfo);
1647 sel_ctx.missing_dual_src_outputs = ctx->missing_dual_src_outputs;
1648 sel_ctx.frag_result_color_lowering = frag_result_color_lowering(ctx);
1649 sel_ctx.manual_depth_range = ctx->manual_depth_range;
1650
1651 d3d12_shader_selector* gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1652 if (gs == nullptr || gs->is_variant) {
1653 if (sel_ctx.fill_mode_lowered != PIPE_POLYGON_MODE_FILL || sel_ctx.needs_point_sprite_lowering || sel_ctx.needs_vertex_reordering)
1654 validate_geometry_shader_variant(&sel_ctx);
1655 else if (gs != nullptr) {
1656 ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = NULL;
1657 }
1658 }
1659
1660 validate_tess_ctrl_shader_variant(&sel_ctx);
1661
1662 auto* stages = ctx->gfx_stages;
1663 d3d12_shader_selector* prev;
1664 d3d12_shader_selector* next;
1665 if (stages[PIPE_SHADER_VERTEX]) {
1666 next = get_next_shader(ctx, PIPE_SHADER_VERTEX);
1667 select_shader_variant(&sel_ctx, stages[PIPE_SHADER_VERTEX], nullptr, next);
1668 }
1669 if (stages[PIPE_SHADER_TESS_CTRL]) {
1670 prev = get_prev_shader(ctx, PIPE_SHADER_TESS_CTRL);
1671 next = get_next_shader(ctx, PIPE_SHADER_TESS_CTRL);
1672 select_shader_variant(&sel_ctx, stages[PIPE_SHADER_TESS_CTRL], prev, next);
1673 }
1674 if (stages[PIPE_SHADER_TESS_EVAL]) {
1675 prev = get_prev_shader(ctx, PIPE_SHADER_TESS_EVAL);
1676 next = get_next_shader(ctx, PIPE_SHADER_TESS_EVAL);
1677 select_shader_variant(&sel_ctx, stages[PIPE_SHADER_TESS_EVAL], prev, next);
1678 }
1679 if (stages[PIPE_SHADER_GEOMETRY]) {
1680 prev = get_prev_shader(ctx, PIPE_SHADER_GEOMETRY);
1681 next = get_next_shader(ctx, PIPE_SHADER_GEOMETRY);
1682 select_shader_variant(&sel_ctx, stages[PIPE_SHADER_GEOMETRY], prev, next);
1683 }
1684 if (stages[PIPE_SHADER_FRAGMENT]) {
1685 prev = get_prev_shader(ctx, PIPE_SHADER_FRAGMENT);
1686 select_shader_variant(&sel_ctx, stages[PIPE_SHADER_FRAGMENT], prev, nullptr);
1687 }
1688 }
1689
1690 static const unsigned *
workgroup_size_variable(struct d3d12_context * ctx,const struct pipe_grid_info * info)1691 workgroup_size_variable(struct d3d12_context *ctx,
1692 const struct pipe_grid_info *info)
1693 {
1694 if (ctx->compute_state->workgroup_size_variable)
1695 return info->block;
1696 return nullptr;
1697 }
1698
1699 void
d3d12_select_compute_shader_variants(struct d3d12_context * ctx,const struct pipe_grid_info * info)1700 d3d12_select_compute_shader_variants(struct d3d12_context *ctx, const struct pipe_grid_info *info)
1701 {
1702 struct d3d12_selection_context sel_ctx = {};
1703
1704 sel_ctx.ctx = ctx;
1705 sel_ctx.variable_workgroup_size = workgroup_size_variable(ctx, info);
1706
1707 select_shader_variant(&sel_ctx, ctx->compute_state, nullptr, nullptr);
1708 }
1709
1710 void
d3d12_shader_free(struct d3d12_shader_selector * sel)1711 d3d12_shader_free(struct d3d12_shader_selector *sel)
1712 {
1713 auto shader = sel->first;
1714 while (shader) {
1715 free(shader->bytecode);
1716 shader = shader->next_variant;
1717 }
1718
1719 ralloc_free((void*)sel->initial);
1720 ralloc_free(sel);
1721 }
1722