• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "d3d12_compiler.h"
25 #include "d3d12_context.h"
26 #include "d3d12_debug.h"
27 #include "d3d12_screen.h"
28 #include "d3d12_nir_passes.h"
29 #include "nir_to_dxil.h"
30 #include "dxil_nir.h"
31 #include "dxil_nir_lower_int_cubemaps.h"
32 
33 #include "pipe/p_state.h"
34 
35 #include "nir.h"
36 #include "nir/nir_draw_helpers.h"
37 #include "nir/tgsi_to_nir.h"
38 #include "compiler/nir/nir_builder.h"
39 #include "tgsi/tgsi_from_mesa.h"
40 #include "tgsi/tgsi_ureg.h"
41 
42 #include "util/hash_table.h"
43 #include "util/u_memory.h"
44 #include "util/u_prim.h"
45 #include "util/u_simple_shaders.h"
46 #include "util/u_dl.h"
47 
48 #include <dxguids/dxguids.h>
49 
50 extern "C" {
51 #include "tgsi/tgsi_parse.h"
52 #include "tgsi/tgsi_point_sprite.h"
53 }
54 
55 #ifdef _WIN32
56 #include "dxil_validator.h"
57 #endif
58 
59 const void *
d3d12_get_compiler_options(struct pipe_screen * screen,enum pipe_shader_ir ir,enum pipe_shader_type shader)60 d3d12_get_compiler_options(struct pipe_screen *screen,
61                            enum pipe_shader_ir ir,
62                            enum pipe_shader_type shader)
63 {
64    assert(ir == PIPE_SHADER_IR_NIR);
65    return &d3d12_screen(screen)->nir_options;
66 }
67 
68 static uint32_t
resource_dimension(enum glsl_sampler_dim dim)69 resource_dimension(enum glsl_sampler_dim dim)
70 {
71    switch (dim) {
72    case GLSL_SAMPLER_DIM_1D:
73       return RESOURCE_DIMENSION_TEXTURE1D;
74    case GLSL_SAMPLER_DIM_2D:
75       return RESOURCE_DIMENSION_TEXTURE2D;
76    case GLSL_SAMPLER_DIM_3D:
77       return RESOURCE_DIMENSION_TEXTURE3D;
78    case GLSL_SAMPLER_DIM_CUBE:
79       return RESOURCE_DIMENSION_TEXTURECUBE;
80    default:
81       return RESOURCE_DIMENSION_UNKNOWN;
82    }
83 }
84 
85 static bool
can_remove_dead_sampler(nir_variable * var,void * data)86 can_remove_dead_sampler(nir_variable *var, void *data)
87 {
88    const struct glsl_type *base_type = glsl_without_array(var->type);
89    return glsl_type_is_sampler(base_type) && !glsl_type_is_bare_sampler(base_type);
90 }
91 
92 static struct d3d12_shader *
compile_nir(struct d3d12_context * ctx,struct d3d12_shader_selector * sel,struct d3d12_shader_key * key,struct nir_shader * nir)93 compile_nir(struct d3d12_context *ctx, struct d3d12_shader_selector *sel,
94             struct d3d12_shader_key *key, struct nir_shader *nir)
95 {
96    struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
97    struct d3d12_shader *shader = rzalloc(sel, d3d12_shader);
98    shader->key = *key;
99    shader->nir = nir;
100    sel->current = shader;
101 
102    NIR_PASS_V(nir, nir_lower_samplers);
103    NIR_PASS_V(nir, dxil_nir_split_typed_samplers);
104 
105    NIR_PASS_V(nir, nir_opt_dce);
106    struct nir_remove_dead_variables_options dead_var_opts = {};
107    dead_var_opts.can_remove_var = can_remove_dead_sampler;
108    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform, &dead_var_opts);
109 
110    if (key->samples_int_textures)
111       NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex,
112                  key->tex_wrap_states, key->swizzle_state,
113                  screen->base.get_paramf(&screen->base, PIPE_CAPF_MAX_TEXTURE_LOD_BIAS));
114 
115    if (key->vs.needs_format_emulation)
116       dxil_nir_lower_vs_vertex_conversion(nir, key->vs.format_conversion);
117 
118    uint32_t num_ubos_before_lower_to_ubo = nir->info.num_ubos;
119    uint32_t num_uniforms_before_lower_to_ubo = nir->num_uniforms;
120    NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, false);
121    shader->has_default_ubo0 = num_uniforms_before_lower_to_ubo > 0 &&
122                               nir->info.num_ubos > num_ubos_before_lower_to_ubo;
123 
124    if (key->last_vertex_processing_stage) {
125       if (key->invert_depth)
126          NIR_PASS_V(nir, d3d12_nir_invert_depth, key->invert_depth, key->halfz);
127       if (!key->halfz)
128          NIR_PASS_V(nir, nir_lower_clip_halfz);
129       NIR_PASS_V(nir, d3d12_lower_yflip);
130    }
131    NIR_PASS_V(nir, nir_lower_packed_ubo_loads);
132    NIR_PASS_V(nir, d3d12_lower_load_draw_params);
133    NIR_PASS_V(nir, d3d12_lower_load_patch_vertices_in);
134    NIR_PASS_V(nir, d3d12_lower_state_vars, shader);
135    NIR_PASS_V(nir, dxil_nir_lower_bool_input);
136    NIR_PASS_V(nir, dxil_nir_lower_loads_stores_to_dxil);
137    NIR_PASS_V(nir, dxil_nir_lower_atomics_to_dxil);
138    NIR_PASS_V(nir, dxil_nir_lower_double_math);
139 
140    if (key->fs.multisample_disabled)
141       NIR_PASS_V(nir, d3d12_disable_multisampling);
142 
143    struct nir_to_dxil_options opts = {};
144    opts.interpolate_at_vertex = screen->have_load_at_vertex;
145    opts.lower_int16 = !screen->opts4.Native16BitShaderOpsSupported;
146    opts.no_ubo0 = !shader->has_default_ubo0;
147    opts.last_ubo_is_not_arrayed = shader->num_state_vars > 0;
148    opts.provoking_vertex = key->fs.provoking_vertex;
149    opts.input_clip_size = key->input_clip_size;
150    opts.environment = DXIL_ENVIRONMENT_GL;
151    opts.shader_model_max = SHADER_MODEL_6_2;
152 #ifdef _WIN32
153    opts.validator_version_max = dxil_get_validator_version(ctx->dxil_validator);
154 #endif
155 
156    struct blob tmp;
157    if (!nir_to_dxil(nir, &opts, &tmp)) {
158       debug_printf("D3D12: nir_to_dxil failed\n");
159       return NULL;
160    }
161 
162    // Non-ubo variables
163    shader->begin_srv_binding = (UINT_MAX);
164    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
165       auto type_no_array = glsl_without_array(var->type);
166       if (glsl_type_is_texture(type_no_array)) {
167          unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
168          for (unsigned i = 0; i < count; ++i) {
169             shader->srv_bindings[var->data.binding + i].dimension = resource_dimension(glsl_get_sampler_dim(type_no_array));
170          }
171          shader->begin_srv_binding = MIN2(var->data.binding, shader->begin_srv_binding);
172          shader->end_srv_binding = MAX2(var->data.binding + count, shader->end_srv_binding);
173       }
174    }
175 
176    nir_foreach_image_variable(var, nir) {
177       auto type_no_array = glsl_without_array(var->type);
178       unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
179       for (unsigned i = 0; i < count; ++i) {
180          shader->uav_bindings[var->data.driver_location + i].format = var->data.image.format;
181          shader->uav_bindings[var->data.driver_location + i].dimension = resource_dimension(glsl_get_sampler_dim(type_no_array));
182       }
183    }
184 
185    // Ubo variables
186    if(nir->info.num_ubos) {
187       // Ignore state_vars ubo as it is bound as root constants
188       unsigned num_ubo_bindings = nir->info.num_ubos - (shader->state_vars_used ? 1 : 0);
189       for(unsigned i = shader->has_default_ubo0 ? 0 : 1; i < num_ubo_bindings; ++i) {
190          shader->cb_bindings[shader->num_cb_bindings++].binding = i;
191       }
192    }
193 
194 #ifdef _WIN32
195    if (ctx->dxil_validator) {
196       if (!(d3d12_debug & D3D12_DEBUG_EXPERIMENTAL)) {
197          char *err;
198          if (!dxil_validate_module(ctx->dxil_validator, tmp.data,
199                                    tmp.size, &err) && err) {
200             debug_printf(
201                "== VALIDATION ERROR =============================================\n"
202                "%s\n"
203                "== END ==========================================================\n",
204                err);
205             ralloc_free(err);
206          }
207       }
208 
209       if (d3d12_debug & D3D12_DEBUG_DISASS) {
210          char *str = dxil_disasm_module(ctx->dxil_validator, tmp.data,
211                                         tmp.size);
212          fprintf(stderr,
213                  "== BEGIN SHADER ============================================\n"
214                  "%s\n"
215                  "== END SHADER ==============================================\n",
216                str);
217          ralloc_free(str);
218       }
219    }
220 #endif
221 
222    blob_finish_get_buffer(&tmp, &shader->bytecode, &shader->bytecode_length);
223 
224    if (d3d12_debug & D3D12_DEBUG_DXIL) {
225       char buf[256];
226       static int i;
227       snprintf(buf, sizeof(buf), "dump%02d.dxil", i++);
228       FILE *fp = fopen(buf, "wb");
229       fwrite(shader->bytecode, sizeof(char), shader->bytecode_length, fp);
230       fclose(fp);
231       fprintf(stderr, "wrote '%s'...\n", buf);
232    }
233    return shader;
234 }
235 
236 struct d3d12_selection_context {
237    struct d3d12_context *ctx;
238    bool needs_point_sprite_lowering;
239    bool needs_vertex_reordering;
240    unsigned provoking_vertex;
241    bool alternate_tri;
242    unsigned fill_mode_lowered;
243    unsigned cull_mode_lowered;
244    bool manual_depth_range;
245    unsigned missing_dual_src_outputs;
246    unsigned frag_result_color_lowering;
247    const unsigned *variable_workgroup_size;
248 };
249 
250 static unsigned
missing_dual_src_outputs(struct d3d12_context * ctx)251 missing_dual_src_outputs(struct d3d12_context *ctx)
252 {
253    if (!ctx->gfx_pipeline_state.blend->is_dual_src)
254       return 0;
255 
256    struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
257    nir_shader *s = fs->initial;
258 
259    unsigned indices_seen = 0;
260    nir_foreach_function(function, s) {
261       if (function->impl) {
262          nir_foreach_block(block, function->impl) {
263             nir_foreach_instr(instr, block) {
264                if (instr->type != nir_instr_type_intrinsic)
265                   continue;
266 
267                nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
268                if (intr->intrinsic != nir_intrinsic_store_deref)
269                   continue;
270 
271                nir_variable *var = nir_intrinsic_get_var(intr, 0);
272                if (var->data.mode != nir_var_shader_out)
273                   continue;
274 
275                unsigned index = var->data.index;
276                if (var->data.location > FRAG_RESULT_DATA0)
277                   index = var->data.location - FRAG_RESULT_DATA0;
278                else if (var->data.location != FRAG_RESULT_COLOR &&
279                         var->data.location != FRAG_RESULT_DATA0)
280                   continue;
281 
282                indices_seen |= 1u << index;
283                if ((indices_seen & 3) == 3)
284                   return 0;
285             }
286          }
287       }
288    }
289 
290    return 3 & ~indices_seen;
291 }
292 
293 static unsigned
frag_result_color_lowering(struct d3d12_context * ctx)294 frag_result_color_lowering(struct d3d12_context *ctx)
295 {
296    struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
297    assert(fs);
298 
299    if (fs->initial->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR))
300       return ctx->fb.nr_cbufs > 1 ? ctx->fb.nr_cbufs : 0;
301 
302    return 0;
303 }
304 
305 static bool
manual_depth_range(struct d3d12_context * ctx)306 manual_depth_range(struct d3d12_context *ctx)
307 {
308    if (!d3d12_need_zero_one_depth_range(ctx))
309       return false;
310 
311    /**
312     * If we can't use the D3D12 zero-one depth-range, we might have to apply
313     * depth-range ourselves.
314     *
315     * Because we only need to override the depth-range to zero-one range in
316     * the case where we write frag-depth, we only need to apply manual
317     * depth-range to gl_FragCoord.z.
318     *
319     * No extra care is needed to be taken in the case where gl_FragDepth is
320     * written conditionally, because the GLSL 4.60 spec states:
321     *
322     *    If a shader statically assigns a value to gl_FragDepth, and there
323     *    is an execution path through the shader that does not set
324     *    gl_FragDepth, then the value of the fragment’s depth may be
325     *    undefined for executions of the shader that take that path. That
326     *    is, if the set of linked fragment shaders statically contain a
327     *    write to gl_FragDepth, then it is responsible for always writing
328     *    it.
329     */
330 
331    struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
332    return fs && fs->initial->info.inputs_read & VARYING_BIT_POS;
333 }
334 
335 static bool
needs_edge_flag_fix(enum pipe_prim_type mode)336 needs_edge_flag_fix(enum pipe_prim_type mode)
337 {
338    return (mode == PIPE_PRIM_QUADS ||
339            mode == PIPE_PRIM_QUAD_STRIP ||
340            mode == PIPE_PRIM_POLYGON);
341 }
342 
343 static unsigned
fill_mode_lowered(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)344 fill_mode_lowered(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
345 {
346    struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
347 
348    if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
349         !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant) ||
350        ctx->gfx_pipeline_state.rast == NULL ||
351        (dinfo->mode != PIPE_PRIM_TRIANGLES &&
352         dinfo->mode != PIPE_PRIM_TRIANGLE_STRIP))
353       return PIPE_POLYGON_MODE_FILL;
354 
355    /* D3D12 supports line mode (wireframe) but doesn't support edge flags */
356    if (((ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_LINE &&
357          ctx->gfx_pipeline_state.rast->base.cull_face != PIPE_FACE_FRONT) ||
358         (ctx->gfx_pipeline_state.rast->base.fill_back == PIPE_POLYGON_MODE_LINE &&
359          ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_FRONT)) &&
360        (vs->initial->info.outputs_written & VARYING_BIT_EDGE ||
361         needs_edge_flag_fix(ctx->initial_api_prim)))
362       return PIPE_POLYGON_MODE_LINE;
363 
364    if (ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_POINT)
365       return PIPE_POLYGON_MODE_POINT;
366 
367    return PIPE_POLYGON_MODE_FILL;
368 }
369 
370 static bool
has_stream_out_for_streams(struct d3d12_context * ctx)371 has_stream_out_for_streams(struct d3d12_context *ctx)
372 {
373    unsigned mask = ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->initial->info.gs.active_stream_mask & ~1;
374    for (unsigned i = 0; i < ctx->gfx_pipeline_state.so_info.num_outputs; ++i) {
375       unsigned stream = ctx->gfx_pipeline_state.so_info.output[i].stream;
376       if (((1 << stream) & mask) &&
377          ctx->so_buffer_views[stream].SizeInBytes)
378          return true;
379    }
380    return false;
381 }
382 
383 static bool
needs_point_sprite_lowering(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)384 needs_point_sprite_lowering(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
385 {
386    struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
387    struct d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
388 
389    if (gs != NULL && !gs->is_variant) {
390       /* There is an user GS; Check if it outputs points with PSIZE */
391       return (gs->initial->info.gs.output_primitive == GL_POINTS &&
392               (gs->initial->info.outputs_written & VARYING_BIT_PSIZ ||
393                  ctx->gfx_pipeline_state.rast->base.point_size > 1.0) &&
394               (gs->initial->info.gs.active_stream_mask == 1 ||
395                  !has_stream_out_for_streams(ctx)));
396    } else {
397       /* No user GS; check if we are drawing wide points */
398       return ((dinfo->mode == PIPE_PRIM_POINTS ||
399                fill_mode_lowered(ctx, dinfo) == PIPE_POLYGON_MODE_POINT) &&
400               (ctx->gfx_pipeline_state.rast->base.point_size > 1.0 ||
401                ctx->gfx_pipeline_state.rast->base.offset_point ||
402                (ctx->gfx_pipeline_state.rast->base.point_size_per_vertex &&
403                 vs->initial->info.outputs_written & VARYING_BIT_PSIZ)) &&
404               (vs->initial->info.outputs_written & VARYING_BIT_POS));
405    }
406 }
407 
408 static unsigned
cull_mode_lowered(struct d3d12_context * ctx,unsigned fill_mode)409 cull_mode_lowered(struct d3d12_context *ctx, unsigned fill_mode)
410 {
411    if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
412         !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant) ||
413        ctx->gfx_pipeline_state.rast == NULL ||
414        ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_NONE)
415       return PIPE_FACE_NONE;
416 
417    return ctx->gfx_pipeline_state.rast->base.cull_face;
418 }
419 
420 static unsigned
get_provoking_vertex(struct d3d12_selection_context * sel_ctx,bool * alternate,const struct pipe_draw_info * dinfo)421 get_provoking_vertex(struct d3d12_selection_context *sel_ctx, bool *alternate, const struct pipe_draw_info *dinfo)
422 {
423    if (dinfo->mode == GL_PATCHES) {
424       *alternate = false;
425       return 0;
426    }
427 
428    struct d3d12_shader_selector *vs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_VERTEX];
429    struct d3d12_shader_selector *gs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
430    struct d3d12_shader_selector *last_vertex_stage = gs && !gs->is_variant ? gs : vs;
431 
432    /* Make sure GL prims match Gallium prims */
433    STATIC_ASSERT(GL_POINTS == PIPE_PRIM_POINTS);
434    STATIC_ASSERT(GL_LINES == PIPE_PRIM_LINES);
435    STATIC_ASSERT(GL_LINE_STRIP == PIPE_PRIM_LINE_STRIP);
436 
437    enum pipe_prim_type mode;
438    switch (last_vertex_stage->stage) {
439    case PIPE_SHADER_GEOMETRY:
440       mode = (enum pipe_prim_type)last_vertex_stage->current->nir->info.gs.output_primitive;
441       break;
442    case PIPE_SHADER_VERTEX:
443       mode = (enum pipe_prim_type)dinfo->mode;
444       break;
445    default:
446       unreachable("Tesselation shaders are not supported");
447    }
448 
449    bool flatshade_first = sel_ctx->ctx->gfx_pipeline_state.rast &&
450                           sel_ctx->ctx->gfx_pipeline_state.rast->base.flatshade_first;
451    *alternate = (mode == GL_TRIANGLE_STRIP || mode == GL_TRIANGLE_STRIP_ADJACENCY) &&
452                 (!gs || gs->is_variant ||
453                  gs->initial->info.gs.vertices_out > u_prim_vertex_count(mode)->min);
454    return flatshade_first ? 0 : u_prim_vertex_count(mode)->min - 1;
455 }
456 
457 static bool
has_flat_varyings(struct d3d12_context * ctx)458 has_flat_varyings(struct d3d12_context *ctx)
459 {
460    struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
461 
462    if (!fs || !fs->current)
463       return false;
464 
465    nir_foreach_variable_with_modes(input, fs->current->nir,
466                                    nir_var_shader_in) {
467       if (input->data.interpolation == INTERP_MODE_FLAT &&
468           /* Disregard sysvals */
469           (input->data.location >= VARYING_SLOT_VAR0 ||
470              input->data.location <= VARYING_SLOT_TEX7))
471          return true;
472    }
473 
474    return false;
475 }
476 
477 static bool
needs_vertex_reordering(struct d3d12_selection_context * sel_ctx,const struct pipe_draw_info * dinfo)478 needs_vertex_reordering(struct d3d12_selection_context *sel_ctx, const struct pipe_draw_info *dinfo)
479 {
480    struct d3d12_context *ctx = sel_ctx->ctx;
481    bool flat = has_flat_varyings(ctx);
482    bool xfb = ctx->gfx_pipeline_state.num_so_targets > 0;
483 
484    if (fill_mode_lowered(ctx, dinfo) != PIPE_POLYGON_MODE_FILL)
485       return false;
486 
487    /* TODO add support for line primitives */
488 
489    /* When flat shading a triangle and provoking vertex is not the first one, we use load_at_vertex.
490       If not available for this adapter, or if it's a triangle strip, we need to reorder the vertices */
491    if (flat && sel_ctx->provoking_vertex >= 2 && (!d3d12_screen(ctx->base.screen)->have_load_at_vertex ||
492                                                   sel_ctx->alternate_tri))
493       return true;
494 
495    /* When transform feedback is enabled and the output is alternating (triangle strip or triangle
496       strip with adjacency), we need to reorder vertices to get the order expected by OpenGL. This
497       only works when there is no flat shading involved. In that scenario, we don't care about
498       the provoking vertex. */
499    if (xfb && !flat && sel_ctx->alternate_tri) {
500       sel_ctx->provoking_vertex = 0;
501       return true;
502    }
503 
504    return false;
505 }
506 
507 static nir_variable *
create_varying_from_info(nir_shader * nir,struct d3d12_varying_info * info,unsigned slot,unsigned slot_frac,nir_variable_mode mode,bool patch)508 create_varying_from_info(nir_shader *nir, struct d3d12_varying_info *info,
509                          unsigned slot, unsigned slot_frac, nir_variable_mode mode, bool patch)
510 {
511    nir_variable *var;
512    char tmp[100];
513 
514    snprintf(tmp, ARRAY_SIZE(tmp),
515             mode == nir_var_shader_in ? "in_%d" : "out_%d",
516             info->slots[slot].vars[slot_frac].driver_location);
517    var = nir_variable_create(nir, mode, info->slots[slot].types[slot_frac], tmp);
518    var->data.location = slot;
519    var->data.location_frac = slot_frac;
520    var->data.driver_location = info->slots[slot].vars[slot_frac].driver_location;
521    var->data.interpolation = info->slots[slot].vars[slot_frac].interpolation;
522    var->data.patch = info->slots[slot].patch;
523    var->data.compact = info->slots[slot].vars[slot_frac].compact;
524    if (patch)
525       var->data.location += VARYING_SLOT_PATCH0;
526 
527    if (mode == nir_var_shader_out)
528       NIR_PASS_V(nir, d3d12_write_0_to_new_varying, var);
529 
530    return var;
531 }
532 
533 void
create_varyings_from_info(nir_shader * nir,struct d3d12_varying_info * info,unsigned slot,nir_variable_mode mode,bool patch)534 create_varyings_from_info(nir_shader *nir, struct d3d12_varying_info *info,
535                           unsigned slot, nir_variable_mode mode, bool patch)
536 {
537    unsigned mask = info->slots[slot].location_frac_mask;
538    while (mask)
539       create_varying_from_info(nir, info, slot, u_bit_scan(&mask), mode, patch);
540 }
541 
542 static void
fill_varyings(struct d3d12_varying_info * info,nir_shader * s,nir_variable_mode modes,uint64_t mask,bool patch)543 fill_varyings(struct d3d12_varying_info *info, nir_shader *s,
544               nir_variable_mode modes, uint64_t mask, bool patch)
545 {
546    nir_foreach_variable_with_modes(var, s, modes) {
547       unsigned slot = var->data.location;
548       bool is_generic_patch = slot >= VARYING_SLOT_PATCH0;
549       if (patch ^ is_generic_patch)
550          continue;
551       if (is_generic_patch)
552          slot -= VARYING_SLOT_PATCH0;
553       uint64_t slot_bit = BITFIELD64_BIT(slot);
554 
555       if (!(mask & slot_bit))
556          continue;
557 
558       const struct glsl_type *type = var->type;
559       if ((s->info.stage == MESA_SHADER_GEOMETRY ||
560            s->info.stage == MESA_SHADER_TESS_CTRL) &&
561           (modes & nir_var_shader_in) &&
562           glsl_type_is_array(type))
563          type = glsl_get_array_element(type);
564       info->slots[slot].types[var->data.location_frac] = type;
565 
566       info->slots[slot].patch = var->data.patch;
567       auto& var_slot = info->slots[slot].vars[var->data.location_frac];
568       var_slot.driver_location = var->data.driver_location;
569       var_slot.interpolation = var->data.interpolation;
570       var_slot.compact = var->data.compact;
571       info->mask |= slot_bit;
572       info->slots[slot].location_frac_mask |= (1 << var->data.location_frac);
573    }
574 }
575 
576 static void
fill_flat_varyings(struct d3d12_gs_variant_key * key,d3d12_shader_selector * fs)577 fill_flat_varyings(struct d3d12_gs_variant_key *key, d3d12_shader_selector *fs)
578 {
579    if (!fs || !fs->current)
580       return;
581 
582    nir_foreach_variable_with_modes(input, fs->current->nir,
583                                    nir_var_shader_in) {
584       if (input->data.interpolation == INTERP_MODE_FLAT)
585          key->flat_varyings |= BITFIELD64_BIT(input->data.location);
586    }
587 }
588 
589 static void
validate_geometry_shader_variant(struct d3d12_selection_context * sel_ctx)590 validate_geometry_shader_variant(struct d3d12_selection_context *sel_ctx)
591 {
592    struct d3d12_context *ctx = sel_ctx->ctx;
593    d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
594    d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
595    struct d3d12_gs_variant_key key = {0};
596    bool variant_needed = false;
597 
598    d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
599 
600    /* Nothing to do if there is a user geometry shader bound */
601    if (gs != NULL && !gs->is_variant)
602       return;
603 
604    /* Fill the geometry shader variant key */
605    if (sel_ctx->fill_mode_lowered != PIPE_POLYGON_MODE_FILL) {
606       key.fill_mode = sel_ctx->fill_mode_lowered;
607       key.cull_mode = sel_ctx->cull_mode_lowered;
608       key.has_front_face = BITSET_TEST(fs->initial->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
609       if (key.cull_mode != PIPE_FACE_NONE || key.has_front_face)
610          key.front_ccw = ctx->gfx_pipeline_state.rast->base.front_ccw ^ (ctx->flip_y < 0);
611       key.edge_flag_fix = needs_edge_flag_fix(ctx->initial_api_prim);
612       fill_flat_varyings(&key, fs);
613       if (key.flat_varyings != 0)
614          key.flatshade_first = ctx->gfx_pipeline_state.rast->base.flatshade_first;
615       variant_needed = true;
616    } else if (sel_ctx->needs_point_sprite_lowering) {
617       key.passthrough = true;
618       variant_needed = true;
619    } else if (sel_ctx->needs_vertex_reordering) {
620       /* TODO support cases where flat shading (pv != 0) and xfb are enabled */
621       key.provoking_vertex = sel_ctx->provoking_vertex;
622       key.alternate_tri = sel_ctx->alternate_tri;
623       variant_needed = true;
624    }
625 
626    if (variant_needed) {
627       fill_varyings(&key.varyings, vs->initial, nir_var_shader_out,
628                     vs->initial->info.outputs_written, false);
629    }
630 
631    /* Check if the currently bound geometry shader variant is correct */
632    if (gs && memcmp(&gs->gs_key, &key, sizeof(key)) == 0)
633       return;
634 
635    /* Find/create the proper variant and bind it */
636    gs = variant_needed ? d3d12_get_gs_variant(ctx, &key) : NULL;
637    ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = gs;
638 }
639 
640 static void
validate_tess_ctrl_shader_variant(struct d3d12_selection_context * sel_ctx)641 validate_tess_ctrl_shader_variant(struct d3d12_selection_context *sel_ctx)
642 {
643    struct d3d12_context *ctx = sel_ctx->ctx;
644    d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
645    d3d12_shader_selector *tcs = ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
646    d3d12_shader_selector *tes = ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
647    struct d3d12_tcs_variant_key key = {0};
648 
649    /* Nothing to do if there is a user tess ctrl shader bound */
650    if (tcs != NULL && !tcs->is_variant)
651       return;
652 
653    bool variant_needed = tes != nullptr;
654 
655    /* Fill the variant key */
656    if (variant_needed) {
657       fill_varyings(&key.varyings, vs->initial, nir_var_shader_out,
658                     vs->initial->info.outputs_written, false);
659       key.vertices_out = ctx->patch_vertices;
660    }
661 
662    /* Check if the currently bound tessellation control shader variant is correct */
663    if (tcs && memcmp(&tcs->tcs_key, &key, sizeof(key)) == 0)
664       return;
665 
666    /* Find/create the proper variant and bind it */
667    tcs = variant_needed ? d3d12_get_tcs_variant(ctx, &key) : NULL;
668    ctx->gfx_stages[PIPE_SHADER_TESS_CTRL] = tcs;
669 }
670 
671 static bool
d3d12_compare_varying_info(const d3d12_varying_info * expect,const d3d12_varying_info * have)672 d3d12_compare_varying_info(const d3d12_varying_info *expect, const d3d12_varying_info *have)
673 {
674    if (expect->mask != have->mask)
675       return false;
676 
677    if (!expect->mask)
678       return true;
679 
680    /* 6 is a rough (wild) guess for a bulk memcmp cross-over point.  When there
681     * are a small number of slots present, individual memcmp is much faster. */
682    if (util_bitcount64(expect->mask) < 6) {
683       uint64_t mask = expect->mask;
684       while (mask) {
685          int slot = u_bit_scan64(&mask);
686          if (memcmp(&expect->slots[slot], &have->slots[slot], sizeof(have->slots[slot])))
687             return false;
688       }
689 
690       return true;
691    }
692 
693    return !memcmp(expect, have, sizeof(struct d3d12_varying_info));
694 }
695 
696 static bool
d3d12_compare_shader_keys(const d3d12_shader_key * expect,const d3d12_shader_key * have)697 d3d12_compare_shader_keys(const d3d12_shader_key *expect, const d3d12_shader_key *have)
698 {
699    assert(expect->stage == have->stage);
700    assert(expect);
701    assert(have);
702 
703    if (expect->hash != have->hash)
704       return false;
705 
706    /* Because we only add varyings we check that a shader has at least the expected in-
707     * and outputs. */
708 
709    if (!d3d12_compare_varying_info(&expect->required_varying_inputs,
710                                    &have->required_varying_inputs) ||
711        expect->next_varying_inputs != have->next_varying_inputs)
712       return false;
713 
714    if (!d3d12_compare_varying_info(&expect->required_varying_outputs,
715                                    &have->required_varying_outputs) ||
716        expect->prev_varying_outputs != have->prev_varying_outputs)
717       return false;
718 
719    if (expect->stage == PIPE_SHADER_GEOMETRY) {
720       if (expect->gs.writes_psize) {
721          if (!have->gs.writes_psize ||
722              expect->gs.point_pos_stream_out != have->gs.point_pos_stream_out ||
723              expect->gs.sprite_coord_enable != have->gs.sprite_coord_enable ||
724              expect->gs.sprite_origin_upper_left != have->gs.sprite_origin_upper_left ||
725              expect->gs.point_size_per_vertex != have->gs.point_size_per_vertex)
726             return false;
727       } else if (have->gs.writes_psize) {
728          return false;
729       }
730       if (expect->gs.primitive_id != have->gs.primitive_id ||
731           expect->gs.triangle_strip != have->gs.triangle_strip)
732          return false;
733    } else if (expect->stage == PIPE_SHADER_FRAGMENT) {
734       if (expect->fs.frag_result_color_lowering != have->fs.frag_result_color_lowering ||
735           expect->fs.manual_depth_range != have->fs.manual_depth_range ||
736           expect->fs.polygon_stipple != have->fs.polygon_stipple ||
737           expect->fs.cast_to_uint != have->fs.cast_to_uint ||
738           expect->fs.cast_to_int != have->fs.cast_to_int ||
739           expect->fs.remap_front_facing != have->fs.remap_front_facing ||
740           expect->fs.missing_dual_src_outputs != have->fs.missing_dual_src_outputs ||
741           expect->fs.multisample_disabled != have->fs.multisample_disabled)
742          return false;
743    } else if (expect->stage == PIPE_SHADER_COMPUTE) {
744       if (memcmp(expect->cs.workgroup_size, have->cs.workgroup_size,
745                  sizeof(have->cs.workgroup_size)))
746          return false;
747    } else if (expect->stage == PIPE_SHADER_TESS_CTRL) {
748       if (expect->hs.primitive_mode != have->hs.primitive_mode ||
749           expect->hs.ccw != have->hs.ccw ||
750           expect->hs.point_mode != have->hs.point_mode ||
751           expect->hs.spacing != have->hs.spacing ||
752           expect->hs.patch_vertices_in != have->hs.patch_vertices_in ||
753           memcmp(&expect->hs.required_patch_outputs, &have->hs.required_patch_outputs,
754                  sizeof(struct d3d12_varying_info)) ||
755           expect->hs.next_patch_inputs != have->hs.next_patch_inputs)
756          return false;
757    } else if (expect->stage == PIPE_SHADER_TESS_EVAL) {
758       if (expect->ds.tcs_vertices_out != have->ds.tcs_vertices_out ||
759           memcmp(&expect->ds.required_patch_inputs, &have->ds.required_patch_inputs,
760                  sizeof(struct d3d12_varying_info)) ||
761           expect->ds.prev_patch_outputs != have ->ds.prev_patch_outputs)
762          return false;
763    }
764 
765    if (expect->input_clip_size != have->input_clip_size)
766       return false;
767 
768    if (expect->tex_saturate_s != have->tex_saturate_s ||
769        expect->tex_saturate_r != have->tex_saturate_r ||
770        expect->tex_saturate_t != have->tex_saturate_t)
771       return false;
772 
773    if (expect->samples_int_textures != have->samples_int_textures)
774       return false;
775 
776    if (expect->n_texture_states != have->n_texture_states)
777       return false;
778 
779    if (expect->n_images != have->n_images)
780       return false;
781 
782    if (memcmp(expect->tex_wrap_states, have->tex_wrap_states,
783               expect->n_texture_states * sizeof(dxil_wrap_sampler_state)))
784       return false;
785 
786    if (memcmp(expect->swizzle_state, have->swizzle_state,
787               expect->n_texture_states * sizeof(dxil_texture_swizzle_state)))
788       return false;
789 
790    if (memcmp(expect->sampler_compare_funcs, have->sampler_compare_funcs,
791               expect->n_texture_states * sizeof(enum compare_func)))
792       return false;
793 
794    if (memcmp(expect->image_format_conversion, have->image_format_conversion,
795       expect->n_images * sizeof(struct d3d12_image_format_conversion_info)))
796       return false;
797 
798    if (expect->invert_depth != have->invert_depth ||
799        expect->halfz != have->halfz)
800       return false;
801 
802    if (expect->stage == PIPE_SHADER_VERTEX) {
803       if (expect->vs.needs_format_emulation != have->vs.needs_format_emulation)
804          return false;
805 
806       if (expect->vs.needs_format_emulation) {
807          if (memcmp(expect->vs.format_conversion, have->vs.format_conversion,
808                     PIPE_MAX_ATTRIBS * sizeof (enum pipe_format)))
809             return false;
810       }
811    }
812 
813    if (expect->fs.provoking_vertex != have->fs.provoking_vertex)
814       return false;
815 
816    return true;
817 }
818 
819 static uint32_t
d3d12_shader_key_hash(const d3d12_shader_key * key)820 d3d12_shader_key_hash(const d3d12_shader_key *key)
821 {
822    uint32_t hash;
823 
824    hash = (uint32_t)key->stage;
825    hash += key->required_varying_inputs.mask;
826    hash += key->required_varying_outputs.mask;
827    hash += key->next_varying_inputs;
828    hash += key->prev_varying_outputs;
829    switch (key->stage) {
830    case PIPE_SHADER_VERTEX:
831       /* (Probably) not worth the bit extraction for needs_format_emulation and
832        * the rest of the the format_conversion data is large.  Don't bother
833        * hashing for now until this is shown to be worthwhile. */
834        break;
835    case PIPE_SHADER_GEOMETRY:
836       hash = _mesa_hash_data_with_seed(&key->gs, sizeof(key->gs), hash);
837       break;
838    case PIPE_SHADER_FRAGMENT:
839       hash = _mesa_hash_data_with_seed(&key->fs, sizeof(key->fs), hash);
840       break;
841    case PIPE_SHADER_COMPUTE:
842       hash = _mesa_hash_data_with_seed(&key->cs, sizeof(key->cs), hash);
843       break;
844    case PIPE_SHADER_TESS_CTRL:
845       hash += key->hs.next_patch_inputs;
846       break;
847    case PIPE_SHADER_TESS_EVAL:
848       hash += key->ds.tcs_vertices_out;
849       hash += key->ds.prev_patch_outputs;
850       break;
851    default:
852       /* No type specific information to hash for other stages. */
853       break;
854    }
855 
856    hash += key->n_texture_states;
857    hash += key->n_images;
858    return hash;
859 }
860 
861 static void
d3d12_fill_shader_key(struct d3d12_selection_context * sel_ctx,d3d12_shader_key * key,d3d12_shader_selector * sel,d3d12_shader_selector * prev,d3d12_shader_selector * next)862 d3d12_fill_shader_key(struct d3d12_selection_context *sel_ctx,
863                       d3d12_shader_key *key, d3d12_shader_selector *sel,
864                       d3d12_shader_selector *prev, d3d12_shader_selector *next)
865 {
866    pipe_shader_type stage = sel->stage;
867 
868    uint64_t system_generated_in_values =
869          VARYING_BIT_PNTC |
870          VARYING_BIT_PRIMITIVE_ID;
871 
872    uint64_t system_out_values =
873          VARYING_BIT_CLIP_DIST0 |
874          VARYING_BIT_CLIP_DIST1;
875 
876    memset(key, 0, sizeof(d3d12_shader_key));
877    key->stage = stage;
878 
879    if (prev) {
880       /* We require as inputs what the previous stage has written,
881        * except certain system values */
882       if (stage == PIPE_SHADER_FRAGMENT || stage == PIPE_SHADER_GEOMETRY)
883          system_out_values |= VARYING_BIT_POS;
884       if (stage == PIPE_SHADER_FRAGMENT)
885          system_out_values |= VARYING_BIT_PSIZ | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER;
886       uint64_t mask = prev->current->nir->info.outputs_written & ~system_out_values;
887       fill_varyings(&key->required_varying_inputs, prev->current->nir,
888                     nir_var_shader_out, mask, false);
889       key->prev_varying_outputs = prev->current->nir->info.outputs_written;
890 
891       if (stage == PIPE_SHADER_TESS_EVAL) {
892          uint32_t patch_mask = prev->current->nir->info.patch_outputs_written;
893          fill_varyings(&key->ds.required_patch_inputs, prev->current->nir,
894                        nir_var_shader_out, patch_mask, true);
895          key->ds.prev_patch_outputs = patch_mask;
896       }
897 
898       /* Set the provoking vertex based on the previous shader output. Only set the
899        * key value if the driver actually supports changing the provoking vertex though */
900       if (stage == PIPE_SHADER_FRAGMENT && sel_ctx->ctx->gfx_pipeline_state.rast &&
901           !sel_ctx->needs_vertex_reordering &&
902           d3d12_screen(sel_ctx->ctx->base.screen)->have_load_at_vertex)
903          key->fs.provoking_vertex = sel_ctx->provoking_vertex;
904 
905       /* Get the input clip distance size. The info's clip_distance_array_size corresponds
906        * to the output, and in cases of TES or GS you could have differently-sized inputs
907        * and outputs. For FS, there is no output, so it's repurposed to mean input.
908        */
909       if (stage != PIPE_SHADER_FRAGMENT)
910          key->input_clip_size = prev->current->nir->info.clip_distance_array_size;
911    }
912 
913    /* We require as outputs what the next stage reads,
914     * except certain system values */
915    if (next) {
916       if (!next->is_variant) {
917          if (stage == PIPE_SHADER_VERTEX)
918             system_generated_in_values |= VARYING_BIT_POS;
919          uint64_t mask = next->current->nir->info.inputs_read & ~system_generated_in_values;
920          fill_varyings(&key->required_varying_outputs, next->current->nir,
921                        nir_var_shader_in, mask, false);
922 
923          if (stage == PIPE_SHADER_TESS_CTRL) {
924             uint32_t patch_mask = next->current->nir->info.patch_outputs_read;
925             fill_varyings(&key->hs.required_patch_outputs, prev->current->nir,
926                           nir_var_shader_in, patch_mask, true);
927             key->hs.next_patch_inputs = patch_mask;
928          }
929       }
930       key->next_varying_inputs = next->current->nir->info.inputs_read;
931 
932    }
933 
934    if (stage == PIPE_SHADER_GEOMETRY ||
935        ((stage == PIPE_SHADER_VERTEX || stage == PIPE_SHADER_TESS_EVAL) &&
936           (!next || next->stage == PIPE_SHADER_FRAGMENT))) {
937       key->last_vertex_processing_stage = 1;
938       key->invert_depth = sel_ctx->ctx->reverse_depth_range;
939       key->halfz = sel_ctx->ctx->gfx_pipeline_state.rast ?
940          sel_ctx->ctx->gfx_pipeline_state.rast->base.clip_halfz : false;
941       if (sel_ctx->ctx->pstipple.enabled &&
942          sel_ctx->ctx->gfx_pipeline_state.rast->base.poly_stipple_enable)
943          key->next_varying_inputs |= VARYING_BIT_POS;
944    }
945 
946    if (stage == PIPE_SHADER_GEOMETRY && sel_ctx->ctx->gfx_pipeline_state.rast) {
947       struct pipe_rasterizer_state *rast = &sel_ctx->ctx->gfx_pipeline_state.rast->base;
948       if (sel_ctx->needs_point_sprite_lowering) {
949          key->gs.writes_psize = 1;
950          key->gs.point_size_per_vertex = rast->point_size_per_vertex;
951          key->gs.sprite_coord_enable = rast->sprite_coord_enable;
952          key->gs.sprite_origin_upper_left = (rast->sprite_coord_mode != PIPE_SPRITE_COORD_LOWER_LEFT);
953          if (sel_ctx->ctx->flip_y < 0)
954             key->gs.sprite_origin_upper_left = !key->gs.sprite_origin_upper_left;
955          key->gs.aa_point = rast->point_smooth;
956          key->gs.stream_output_factor = 6;
957       } else if (sel_ctx->fill_mode_lowered == PIPE_POLYGON_MODE_LINE) {
958          key->gs.stream_output_factor = 2;
959       } else if (sel_ctx->needs_vertex_reordering && !sel->is_variant) {
960          key->gs.triangle_strip = 1;
961       }
962 
963       if (sel->is_variant && next && next->initial->info.inputs_read & VARYING_BIT_PRIMITIVE_ID)
964          key->gs.primitive_id = 1;
965    } else if (stage == PIPE_SHADER_FRAGMENT) {
966       key->fs.missing_dual_src_outputs = sel_ctx->missing_dual_src_outputs;
967       key->fs.frag_result_color_lowering = sel_ctx->frag_result_color_lowering;
968       key->fs.manual_depth_range = sel_ctx->manual_depth_range;
969       key->fs.polygon_stipple = sel_ctx->ctx->pstipple.enabled &&
970          sel_ctx->ctx->gfx_pipeline_state.rast->base.poly_stipple_enable;
971       key->fs.multisample_disabled = sel_ctx->ctx->gfx_pipeline_state.rast &&
972          !sel_ctx->ctx->gfx_pipeline_state.rast->desc.MultisampleEnable;
973       if (sel_ctx->ctx->gfx_pipeline_state.blend &&
974           sel_ctx->ctx->gfx_pipeline_state.blend->desc.RenderTarget[0].LogicOpEnable &&
975           !sel_ctx->ctx->gfx_pipeline_state.has_float_rtv) {
976          key->fs.cast_to_uint = util_format_is_unorm(sel_ctx->ctx->fb.cbufs[0]->format);
977          key->fs.cast_to_int = !key->fs.cast_to_uint;
978       }
979    } else if (stage == PIPE_SHADER_TESS_CTRL) {
980       if (next && next->current->nir->info.stage == MESA_SHADER_TESS_EVAL) {
981          key->hs.primitive_mode = next->current->nir->info.tess._primitive_mode;
982          key->hs.ccw = next->current->nir->info.tess.ccw;
983          key->hs.point_mode = next->current->nir->info.tess.point_mode;
984          key->hs.spacing = next->current->nir->info.tess.spacing;
985       } else {
986          key->hs.primitive_mode = TESS_PRIMITIVE_QUADS;
987          key->hs.ccw = true;
988          key->hs.point_mode = false;
989          key->hs.spacing = TESS_SPACING_EQUAL;
990       }
991       key->hs.patch_vertices_in = MAX2(sel_ctx->ctx->patch_vertices, 1);
992    } else if (stage == PIPE_SHADER_TESS_EVAL) {
993       if (prev && prev->current->nir->info.stage == MESA_SHADER_TESS_CTRL)
994          key->ds.tcs_vertices_out = prev->current->nir->info.tess.tcs_vertices_out;
995       else
996          key->ds.tcs_vertices_out = 32;
997    }
998 
999    if (sel->samples_int_textures) {
1000       key->samples_int_textures = sel->samples_int_textures;
1001       key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
1002       /* Copy only states with integer textures */
1003       for(int i = 0; i < key->n_texture_states; ++i) {
1004          auto& wrap_state = sel_ctx->ctx->tex_wrap_states[stage][i];
1005          if (wrap_state.is_int_sampler) {
1006             memcpy(&key->tex_wrap_states[i], &wrap_state, sizeof(wrap_state));
1007             key->swizzle_state[i] = sel_ctx->ctx->tex_swizzle_state[stage][i];
1008          }
1009       }
1010    }
1011 
1012    for (unsigned i = 0; i < sel_ctx->ctx->num_samplers[stage]; ++i) {
1013       if (!sel_ctx->ctx->samplers[stage][i] ||
1014           sel_ctx->ctx->samplers[stage][i]->filter == PIPE_TEX_FILTER_NEAREST)
1015          continue;
1016 
1017       if (sel_ctx->ctx->samplers[stage][i]->wrap_r == PIPE_TEX_WRAP_CLAMP)
1018          key->tex_saturate_r |= 1 << i;
1019       if (sel_ctx->ctx->samplers[stage][i]->wrap_s == PIPE_TEX_WRAP_CLAMP)
1020          key->tex_saturate_s |= 1 << i;
1021       if (sel_ctx->ctx->samplers[stage][i]->wrap_t == PIPE_TEX_WRAP_CLAMP)
1022          key->tex_saturate_t |= 1 << i;
1023    }
1024 
1025    if (sel->compare_with_lod_bias_grad) {
1026       key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
1027       memcpy(key->sampler_compare_funcs, sel_ctx->ctx->tex_compare_func[stage],
1028              key->n_texture_states * sizeof(enum compare_func));
1029       memcpy(key->swizzle_state, sel_ctx->ctx->tex_swizzle_state[stage],
1030              key->n_texture_states * sizeof(dxil_texture_swizzle_state));
1031    }
1032 
1033    if (stage == PIPE_SHADER_VERTEX && sel_ctx->ctx->gfx_pipeline_state.ves) {
1034       key->vs.needs_format_emulation = sel_ctx->ctx->gfx_pipeline_state.ves->needs_format_emulation;
1035       if (key->vs.needs_format_emulation) {
1036          memcpy(key->vs.format_conversion, sel_ctx->ctx->gfx_pipeline_state.ves->format_conversion,
1037                 sel_ctx->ctx->gfx_pipeline_state.ves->num_elements * sizeof(enum pipe_format));
1038       }
1039    }
1040 
1041    if (stage == PIPE_SHADER_FRAGMENT &&
1042        sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY] &&
1043        sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant &&
1044        sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->gs_key.has_front_face) {
1045       key->fs.remap_front_facing = 1;
1046    }
1047 
1048    if (stage == PIPE_SHADER_COMPUTE && sel_ctx->variable_workgroup_size) {
1049       memcpy(key->cs.workgroup_size, sel_ctx->variable_workgroup_size, sizeof(key->cs.workgroup_size));
1050    }
1051 
1052    key->n_images = sel_ctx->ctx->num_image_views[stage];
1053    for (int i = 0; i < key->n_images; ++i) {
1054       key->image_format_conversion[i].emulated_format = sel_ctx->ctx->image_view_emulation_formats[stage][i];
1055       if (key->image_format_conversion[i].emulated_format != PIPE_FORMAT_NONE)
1056          key->image_format_conversion[i].view_format = sel_ctx->ctx->image_views[stage][i].format;
1057    }
1058 
1059    key->hash = d3d12_shader_key_hash(key);
1060 }
1061 
1062 static void
select_shader_variant(struct d3d12_selection_context * sel_ctx,d3d12_shader_selector * sel,d3d12_shader_selector * prev,d3d12_shader_selector * next)1063 select_shader_variant(struct d3d12_selection_context *sel_ctx, d3d12_shader_selector *sel,
1064                      d3d12_shader_selector *prev, d3d12_shader_selector *next)
1065 {
1066    struct d3d12_context *ctx = sel_ctx->ctx;
1067    d3d12_shader_key key;
1068    nir_shader *new_nir_variant;
1069    unsigned pstipple_binding = UINT32_MAX;
1070 
1071    d3d12_fill_shader_key(sel_ctx, &key, sel, prev, next);
1072 
1073    /* Check for an existing variant */
1074    for (d3d12_shader *variant = sel->first; variant;
1075         variant = variant->next_variant) {
1076 
1077       if (d3d12_compare_shader_keys(&key, &variant->key)) {
1078          sel->current = variant;
1079          return;
1080       }
1081    }
1082 
1083    /* Clone the NIR shader */
1084    new_nir_variant = nir_shader_clone(sel, sel->initial);
1085 
1086    /* Apply any needed lowering passes */
1087    if (key.gs.writes_psize) {
1088       NIR_PASS_V(new_nir_variant, d3d12_lower_point_sprite,
1089                  !key.gs.sprite_origin_upper_left,
1090                  key.gs.point_size_per_vertex,
1091                  key.gs.sprite_coord_enable,
1092                  key.next_varying_inputs);
1093 
1094       nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
1095       nir_shader_gather_info(new_nir_variant, impl);
1096    }
1097 
1098    if (key.gs.primitive_id) {
1099       NIR_PASS_V(new_nir_variant, d3d12_lower_primitive_id);
1100 
1101       nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
1102       nir_shader_gather_info(new_nir_variant, impl);
1103    }
1104 
1105    if (key.gs.triangle_strip)
1106       NIR_PASS_V(new_nir_variant, d3d12_lower_triangle_strip);
1107 
1108    if (key.fs.polygon_stipple) {
1109       NIR_PASS_V(new_nir_variant, nir_lower_pstipple_fs,
1110                  &pstipple_binding, 0, false);
1111 
1112       nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
1113       nir_shader_gather_info(new_nir_variant, impl);
1114    }
1115 
1116    if (key.fs.remap_front_facing) {
1117       d3d12_forward_front_face(new_nir_variant);
1118 
1119       nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
1120       nir_shader_gather_info(new_nir_variant, impl);
1121    }
1122 
1123    if (key.fs.missing_dual_src_outputs) {
1124       NIR_PASS_V(new_nir_variant, d3d12_add_missing_dual_src_target,
1125                  key.fs.missing_dual_src_outputs);
1126    } else if (key.fs.frag_result_color_lowering) {
1127       NIR_PASS_V(new_nir_variant, nir_lower_fragcolor,
1128                  key.fs.frag_result_color_lowering);
1129    }
1130 
1131    if (key.fs.manual_depth_range)
1132       NIR_PASS_V(new_nir_variant, d3d12_lower_depth_range);
1133 
1134    if (sel->compare_with_lod_bias_grad) {
1135       STATIC_ASSERT(sizeof(dxil_texture_swizzle_state) ==
1136                     sizeof(nir_lower_tex_shadow_swizzle));
1137 
1138       NIR_PASS_V(new_nir_variant, nir_lower_tex_shadow, key.n_texture_states,
1139                  key.sampler_compare_funcs, (nir_lower_tex_shadow_swizzle *)key.swizzle_state);
1140    }
1141 
1142    if (key.fs.cast_to_uint)
1143       NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, false);
1144    if (key.fs.cast_to_int)
1145       NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, true);
1146 
1147    if (key.n_images)
1148       NIR_PASS_V(new_nir_variant, d3d12_lower_image_casts, key.image_format_conversion);
1149 
1150    if (sel->workgroup_size_variable) {
1151       new_nir_variant->info.workgroup_size[0] = key.cs.workgroup_size[0];
1152       new_nir_variant->info.workgroup_size[1] = key.cs.workgroup_size[1];
1153       new_nir_variant->info.workgroup_size[2] = key.cs.workgroup_size[2];
1154    }
1155 
1156    if (new_nir_variant->info.stage == MESA_SHADER_TESS_CTRL) {
1157       new_nir_variant->info.tess._primitive_mode = (tess_primitive_mode)key.hs.primitive_mode;
1158       new_nir_variant->info.tess.ccw = key.hs.ccw;
1159       new_nir_variant->info.tess.point_mode = key.hs.point_mode;
1160       new_nir_variant->info.tess.spacing = key.hs.spacing;
1161 
1162       NIR_PASS_V(new_nir_variant, dxil_nir_set_tcs_patches_in, key.hs.patch_vertices_in);
1163    } else if (new_nir_variant->info.stage == MESA_SHADER_TESS_EVAL) {
1164       new_nir_variant->info.tess.tcs_vertices_out = key.ds.tcs_vertices_out;
1165    }
1166 
1167    {
1168       struct nir_lower_tex_options tex_options = { };
1169       tex_options.lower_txp = ~0u; /* No equivalent for textureProj */
1170       tex_options.lower_rect = true;
1171       tex_options.lower_rect_offset = true;
1172       tex_options.saturate_s = key.tex_saturate_s;
1173       tex_options.saturate_r = key.tex_saturate_r;
1174       tex_options.saturate_t = key.tex_saturate_t;
1175       tex_options.lower_invalid_implicit_lod = true;
1176       tex_options.lower_tg4_offsets = true;
1177 
1178       NIR_PASS_V(new_nir_variant, nir_lower_tex, &tex_options);
1179    }
1180 
1181    /* Add the needed in and outputs, and re-sort */
1182    if (prev) {
1183       uint64_t mask = key.required_varying_inputs.mask & ~new_nir_variant->info.inputs_read;
1184       new_nir_variant->info.inputs_read |= mask;
1185       while (mask) {
1186          int slot = u_bit_scan64(&mask);
1187          create_varyings_from_info(new_nir_variant, &key.required_varying_inputs, slot, nir_var_shader_in, false);
1188       }
1189 
1190       if (sel->stage == PIPE_SHADER_TESS_EVAL) {
1191          uint32_t patch_mask = (uint32_t)key.ds.required_patch_inputs.mask & ~new_nir_variant->info.patch_inputs_read;
1192          new_nir_variant->info.patch_inputs_read |= patch_mask;
1193          while (patch_mask) {
1194             int slot = u_bit_scan(&patch_mask);
1195             create_varyings_from_info(new_nir_variant, &key.ds.required_patch_inputs, slot, nir_var_shader_in, true);
1196          }
1197       }
1198       dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_in,
1199                                       key.prev_varying_outputs);
1200    }
1201 
1202 
1203    if (next) {
1204       uint64_t mask = key.required_varying_outputs.mask & ~new_nir_variant->info.outputs_written;
1205       new_nir_variant->info.outputs_written |= mask;
1206       while (mask) {
1207          int slot = u_bit_scan64(&mask);
1208          create_varyings_from_info(new_nir_variant, &key.required_varying_outputs, slot, nir_var_shader_out, false);
1209       }
1210 
1211       if (sel->stage == PIPE_SHADER_TESS_CTRL) {
1212          uint32_t patch_mask = (uint32_t)key.hs.required_patch_outputs.mask & ~new_nir_variant->info.patch_outputs_written;
1213          new_nir_variant->info.patch_outputs_written |= patch_mask;
1214          while (patch_mask) {
1215             int slot = u_bit_scan(&patch_mask);
1216             create_varyings_from_info(new_nir_variant, &key.ds.required_patch_inputs, slot, nir_var_shader_out, true);
1217          }
1218       }
1219       dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_out,
1220                                      key.next_varying_inputs);
1221    }
1222 
1223    d3d12_shader *new_variant = compile_nir(ctx, sel, &key, new_nir_variant);
1224    assert(new_variant);
1225 
1226    /* keep track of polygon stipple texture binding */
1227    new_variant->pstipple_binding = pstipple_binding;
1228 
1229    /* prepend the new shader in the selector chain and pick it */
1230    new_variant->next_variant = sel->first;
1231    sel->current = sel->first = new_variant;
1232 }
1233 
1234 static d3d12_shader_selector *
get_prev_shader(struct d3d12_context * ctx,pipe_shader_type current)1235 get_prev_shader(struct d3d12_context *ctx, pipe_shader_type current)
1236 {
1237    switch (current) {
1238    case PIPE_SHADER_VERTEX:
1239       return NULL;
1240    case PIPE_SHADER_FRAGMENT:
1241       if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
1242          return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1243       FALLTHROUGH;
1244    case PIPE_SHADER_GEOMETRY:
1245       if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
1246          return ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
1247       FALLTHROUGH;
1248    case PIPE_SHADER_TESS_EVAL:
1249       if (ctx->gfx_stages[PIPE_SHADER_TESS_CTRL])
1250          return ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
1251       FALLTHROUGH;
1252    case PIPE_SHADER_TESS_CTRL:
1253       return ctx->gfx_stages[PIPE_SHADER_VERTEX];
1254    default:
1255       unreachable("shader type not supported");
1256    }
1257 }
1258 
1259 static d3d12_shader_selector *
get_next_shader(struct d3d12_context * ctx,pipe_shader_type current)1260 get_next_shader(struct d3d12_context *ctx, pipe_shader_type current)
1261 {
1262    switch (current) {
1263    case PIPE_SHADER_VERTEX:
1264       if (ctx->gfx_stages[PIPE_SHADER_TESS_CTRL])
1265          return ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
1266       FALLTHROUGH;
1267    case PIPE_SHADER_TESS_CTRL:
1268       if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
1269          return ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
1270       FALLTHROUGH;
1271    case PIPE_SHADER_TESS_EVAL:
1272       if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
1273          return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1274       FALLTHROUGH;
1275    case PIPE_SHADER_GEOMETRY:
1276       return ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
1277    case PIPE_SHADER_FRAGMENT:
1278       return NULL;
1279    default:
1280       unreachable("shader type not supported");
1281    }
1282 }
1283 
1284 enum tex_scan_flags {
1285    TEX_SAMPLE_INTEGER_TEXTURE = 1 << 0,
1286    TEX_CMP_WITH_LOD_BIAS_GRAD = 1 << 1,
1287    TEX_SCAN_ALL_FLAGS         = (1 << 2) - 1
1288 };
1289 
1290 static unsigned
scan_texture_use(nir_shader * nir)1291 scan_texture_use(nir_shader *nir)
1292 {
1293    unsigned result = 0;
1294    nir_foreach_function(func, nir) {
1295       nir_foreach_block(block, func->impl) {
1296          nir_foreach_instr(instr, block) {
1297             if (instr->type == nir_instr_type_tex) {
1298                auto tex = nir_instr_as_tex(instr);
1299                switch (tex->op) {
1300                case nir_texop_txb:
1301                case nir_texop_txl:
1302                case nir_texop_txd:
1303                   if (tex->is_shadow)
1304                      result |= TEX_CMP_WITH_LOD_BIAS_GRAD;
1305                   FALLTHROUGH;
1306                case nir_texop_tex:
1307                   if (tex->dest_type & (nir_type_int | nir_type_uint))
1308                      result |= TEX_SAMPLE_INTEGER_TEXTURE;
1309                default:
1310                   ;
1311                }
1312             }
1313             if (TEX_SCAN_ALL_FLAGS == result)
1314                return result;
1315          }
1316       }
1317    }
1318    return result;
1319 }
1320 
1321 static uint64_t
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)1322 update_so_info(struct pipe_stream_output_info *so_info,
1323                uint64_t outputs_written)
1324 {
1325    uint64_t so_outputs = 0;
1326    uint8_t reverse_map[64] = {0};
1327    unsigned slot = 0;
1328 
1329    while (outputs_written)
1330       reverse_map[slot++] = u_bit_scan64(&outputs_written);
1331 
1332    for (unsigned i = 0; i < so_info->num_outputs; i++) {
1333       struct pipe_stream_output *output = &so_info->output[i];
1334 
1335       /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
1336       output->register_index = reverse_map[output->register_index];
1337 
1338       so_outputs |= 1ull << output->register_index;
1339    }
1340 
1341    return so_outputs;
1342 }
1343 
1344 static struct d3d12_shader_selector *
d3d12_create_shader_impl(struct d3d12_context * ctx,struct d3d12_shader_selector * sel,struct nir_shader * nir,struct d3d12_shader_selector * prev,struct d3d12_shader_selector * next)1345 d3d12_create_shader_impl(struct d3d12_context *ctx,
1346                          struct d3d12_shader_selector *sel,
1347                          struct nir_shader *nir,
1348                          struct d3d12_shader_selector *prev,
1349                          struct d3d12_shader_selector *next)
1350 {
1351    unsigned tex_scan_result = scan_texture_use(nir);
1352    sel->samples_int_textures = (tex_scan_result & TEX_SAMPLE_INTEGER_TEXTURE) != 0;
1353    sel->compare_with_lod_bias_grad = (tex_scan_result & TEX_CMP_WITH_LOD_BIAS_GRAD) != 0;
1354    sel->workgroup_size_variable = nir->info.workgroup_size_variable;
1355 
1356    /* Integer cube maps are not supported in DirectX because sampling is not supported
1357     * on integer textures and TextureLoad is not supported for cube maps, so we have to
1358     * lower integer cube maps to be handled like 2D textures arrays*/
1359    NIR_PASS_V(nir, dxil_nir_lower_int_cubemaps, true);
1360 
1361    /* Keep this initial shader as the blue print for possible variants */
1362    sel->initial = nir;
1363 
1364    /*
1365     * We must compile some shader here, because if the previous or a next shaders exists later
1366     * when the shaders are bound, then the key evaluation in the shader selector will access
1367     * the current variant of these  prev and next shader, and we can only assign
1368     * a current variant when it has been successfully compiled.
1369     *
1370     * For shaders that require lowering because certain instructions are not available
1371     * and their emulation is state depended (like sampling an integer texture that must be
1372     * emulated and needs handling of boundary conditions, or shadow compare sampling with LOD),
1373     * we must go through the shader selector here to create a compilable variant.
1374     * For shaders that are not depended on the state this is just compiling the original
1375     * shader.
1376     *
1377     * TODO: get rid of having to compiling the shader here if it can be forseen that it will
1378     * be thrown away (i.e. it depends on states that are likely to change before the shader is
1379     * used for the first time)
1380     */
1381    struct d3d12_selection_context sel_ctx = {0};
1382    sel_ctx.ctx = ctx;
1383    select_shader_variant(&sel_ctx, sel, prev, next);
1384 
1385    if (!sel->current) {
1386       ralloc_free(sel);
1387       return NULL;
1388    }
1389 
1390    return sel;
1391 }
1392 
1393 struct d3d12_shader_selector *
d3d12_create_shader(struct d3d12_context * ctx,pipe_shader_type stage,const struct pipe_shader_state * shader)1394 d3d12_create_shader(struct d3d12_context *ctx,
1395                     pipe_shader_type stage,
1396                     const struct pipe_shader_state *shader)
1397 {
1398    struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
1399    sel->stage = stage;
1400 
1401    struct nir_shader *nir = NULL;
1402 
1403    if (shader->type == PIPE_SHADER_IR_NIR) {
1404       nir = (nir_shader *)shader->ir.nir;
1405    } else {
1406       assert(shader->type == PIPE_SHADER_IR_TGSI);
1407       nir = tgsi_to_nir(shader->tokens, ctx->base.screen, false);
1408    }
1409 
1410    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1411    memcpy(&sel->so_info, &shader->stream_output, sizeof(sel->so_info));
1412    update_so_info(&sel->so_info, nir->info.outputs_written);
1413 
1414    assert(nir != NULL);
1415    d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage);
1416    d3d12_shader_selector *next = get_next_shader(ctx, sel->stage);
1417 
1418    NIR_PASS_V(nir, dxil_nir_split_clip_cull_distance);
1419    NIR_PASS_V(nir, d3d12_split_multistream_varyings);
1420 
1421    if (nir->info.stage != MESA_SHADER_VERTEX)
1422       nir->info.inputs_read =
1423             dxil_reassign_driver_locations(nir, nir_var_shader_in,
1424                                             prev ? prev->current->nir->info.outputs_written : 0);
1425    else
1426       nir->info.inputs_read = dxil_sort_by_driver_location(nir, nir_var_shader_in);
1427 
1428    if (nir->info.stage != MESA_SHADER_FRAGMENT) {
1429       nir->info.outputs_written =
1430             dxil_reassign_driver_locations(nir, nir_var_shader_out,
1431                                             next ? next->current->nir->info.inputs_read : 0);
1432    } else {
1433       NIR_PASS_V(nir, nir_lower_fragcoord_wtrans);
1434       NIR_PASS_V(nir, d3d12_lower_sample_pos);
1435       dxil_sort_ps_outputs(nir);
1436    }
1437 
1438    return d3d12_create_shader_impl(ctx, sel, nir, prev, next);
1439 }
1440 
1441 struct d3d12_shader_selector *
d3d12_create_compute_shader(struct d3d12_context * ctx,const struct pipe_compute_state * shader)1442 d3d12_create_compute_shader(struct d3d12_context *ctx,
1443                             const struct pipe_compute_state *shader)
1444 {
1445    struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
1446    sel->stage = PIPE_SHADER_COMPUTE;
1447 
1448    struct nir_shader *nir = NULL;
1449 
1450    if (shader->ir_type == PIPE_SHADER_IR_NIR) {
1451       nir = (nir_shader *)shader->prog;
1452    } else {
1453       assert(shader->ir_type == PIPE_SHADER_IR_TGSI);
1454       nir = tgsi_to_nir(shader->prog, ctx->base.screen, false);
1455    }
1456 
1457    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1458 
1459    NIR_PASS_V(nir, d3d12_lower_compute_state_vars);
1460 
1461    return d3d12_create_shader_impl(ctx, sel, nir, nullptr, nullptr);
1462 }
1463 
1464 void
d3d12_select_shader_variants(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)1465 d3d12_select_shader_variants(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
1466 {
1467    static unsigned order[] = {
1468       PIPE_SHADER_VERTEX,
1469       PIPE_SHADER_TESS_CTRL,
1470       PIPE_SHADER_TESS_EVAL,
1471       PIPE_SHADER_GEOMETRY,
1472       PIPE_SHADER_FRAGMENT
1473    };
1474    struct d3d12_selection_context sel_ctx;
1475 
1476    sel_ctx.ctx = ctx;
1477    sel_ctx.needs_point_sprite_lowering = needs_point_sprite_lowering(ctx, dinfo);
1478    sel_ctx.fill_mode_lowered = fill_mode_lowered(ctx, dinfo);
1479    sel_ctx.cull_mode_lowered = cull_mode_lowered(ctx, sel_ctx.fill_mode_lowered);
1480    sel_ctx.provoking_vertex = get_provoking_vertex(&sel_ctx, &sel_ctx.alternate_tri, dinfo);
1481    sel_ctx.needs_vertex_reordering = needs_vertex_reordering(&sel_ctx, dinfo);
1482    sel_ctx.missing_dual_src_outputs = missing_dual_src_outputs(ctx);
1483    sel_ctx.frag_result_color_lowering = frag_result_color_lowering(ctx);
1484    sel_ctx.manual_depth_range = manual_depth_range(ctx);
1485 
1486    validate_geometry_shader_variant(&sel_ctx);
1487    validate_tess_ctrl_shader_variant(&sel_ctx);
1488 
1489    for (unsigned i = 0; i < ARRAY_SIZE(order); ++i) {
1490       auto sel = ctx->gfx_stages[order[i]];
1491       if (!sel)
1492          continue;
1493 
1494       d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage);
1495       d3d12_shader_selector *next = get_next_shader(ctx, sel->stage);
1496 
1497       select_shader_variant(&sel_ctx, sel, prev, next);
1498    }
1499 }
1500 
1501 static const unsigned *
workgroup_size_variable(struct d3d12_context * ctx,const struct pipe_grid_info * info)1502 workgroup_size_variable(struct d3d12_context *ctx,
1503                         const struct pipe_grid_info *info)
1504 {
1505    if (ctx->compute_state->workgroup_size_variable)
1506       return info->block;
1507    return nullptr;
1508 }
1509 
1510 void
d3d12_select_compute_shader_variants(struct d3d12_context * ctx,const struct pipe_grid_info * info)1511 d3d12_select_compute_shader_variants(struct d3d12_context *ctx, const struct pipe_grid_info *info)
1512 {
1513    struct d3d12_selection_context sel_ctx = {};
1514 
1515    sel_ctx.ctx = ctx;
1516    sel_ctx.variable_workgroup_size = workgroup_size_variable(ctx, info);
1517 
1518    select_shader_variant(&sel_ctx, ctx->compute_state, nullptr, nullptr);
1519 }
1520 
1521 void
d3d12_shader_free(struct d3d12_shader_selector * sel)1522 d3d12_shader_free(struct d3d12_shader_selector *sel)
1523 {
1524    auto shader = sel->first;
1525    while (shader) {
1526       free(shader->bytecode);
1527       shader = shader->next_variant;
1528    }
1529    ralloc_free(sel->initial);
1530    ralloc_free(sel);
1531 }
1532