• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "d3d12_compiler.h"
25 #include "d3d12_context.h"
26 #include "d3d12_debug.h"
27 #include "d3d12_screen.h"
28 #include "d3d12_nir_passes.h"
29 #include "nir_to_dxil.h"
30 #include "dxil_nir.h"
31 #include "dxil_nir_lower_int_cubemaps.h"
32 
33 #include "pipe/p_state.h"
34 
35 #include "nir.h"
36 #include "nir/nir_draw_helpers.h"
37 #include "nir/tgsi_to_nir.h"
38 #include "compiler/nir/nir_builder.h"
39 
40 #include "util/hash_table.h"
41 #include "util/u_memory.h"
42 #include "util/u_prim.h"
43 #include "util/u_simple_shaders.h"
44 #include "util/u_dl.h"
45 
46 #include <dxguids/dxguids.h>
47 
48 #ifdef _WIN32
49 #include "dxil_validator.h"
50 #endif
51 
52 const void *
d3d12_get_compiler_options(struct pipe_screen * screen,enum pipe_shader_ir ir,enum pipe_shader_type shader)53 d3d12_get_compiler_options(struct pipe_screen *screen,
54                            enum pipe_shader_ir ir,
55                            enum pipe_shader_type shader)
56 {
57    assert(ir == PIPE_SHADER_IR_NIR);
58    return &d3d12_screen(screen)->nir_options;
59 }
60 
61 static uint32_t
resource_dimension(enum glsl_sampler_dim dim)62 resource_dimension(enum glsl_sampler_dim dim)
63 {
64    switch (dim) {
65    case GLSL_SAMPLER_DIM_1D:
66       return RESOURCE_DIMENSION_TEXTURE1D;
67    case GLSL_SAMPLER_DIM_2D:
68       return RESOURCE_DIMENSION_TEXTURE2D;
69    case GLSL_SAMPLER_DIM_3D:
70       return RESOURCE_DIMENSION_TEXTURE3D;
71    case GLSL_SAMPLER_DIM_CUBE:
72       return RESOURCE_DIMENSION_TEXTURECUBE;
73    default:
74       return RESOURCE_DIMENSION_UNKNOWN;
75    }
76 }
77 
78 static bool
can_remove_dead_sampler(nir_variable * var,void * data)79 can_remove_dead_sampler(nir_variable *var, void *data)
80 {
81    const struct glsl_type *base_type = glsl_without_array(var->type);
82    return glsl_type_is_sampler(base_type) && !glsl_type_is_bare_sampler(base_type);
83 }
84 
85 static struct d3d12_shader *
compile_nir(struct d3d12_context * ctx,struct d3d12_shader_selector * sel,struct d3d12_shader_key * key,struct nir_shader * nir)86 compile_nir(struct d3d12_context *ctx, struct d3d12_shader_selector *sel,
87             struct d3d12_shader_key *key, struct nir_shader *nir)
88 {
89    struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
90    struct d3d12_shader *shader = rzalloc(sel, d3d12_shader);
91    shader->key = *key;
92 
93    if (shader->key.n_texture_states > 0) {
94       shader->key.tex_wrap_states = (dxil_wrap_sampler_state*)ralloc_size(sel, sizeof(dxil_wrap_sampler_state) * shader->key.n_texture_states);
95       memcpy(shader->key.tex_wrap_states, key->tex_wrap_states, sizeof(dxil_wrap_sampler_state) * shader->key.n_texture_states);
96    }
97    else
98       shader->key.tex_wrap_states = nullptr;
99 
100    shader->nir = nir;
101    sel->current = shader;
102 
103    NIR_PASS_V(nir, nir_lower_samplers);
104    NIR_PASS_V(nir, dxil_nir_split_typed_samplers);
105 
106    NIR_PASS_V(nir, nir_opt_dce);
107    struct nir_remove_dead_variables_options dead_var_opts = {};
108    dead_var_opts.can_remove_var = can_remove_dead_sampler;
109    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform, &dead_var_opts);
110 
111    if (key->samples_int_textures)
112       NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex,
113                  key->n_texture_states, key->tex_wrap_states, key->swizzle_state,
114                  screen->base.caps.max_texture_lod_bias);
115 
116    if (key->stage == PIPE_SHADER_VERTEX && key->vs.needs_format_emulation)
117       dxil_nir_lower_vs_vertex_conversion(nir, key->vs.format_conversion);
118 
119    if (key->last_vertex_processing_stage) {
120       if (key->invert_depth)
121          NIR_PASS_V(nir, d3d12_nir_invert_depth, key->invert_depth, key->halfz);
122       if (!key->halfz)
123          NIR_PASS_V(nir, nir_lower_clip_halfz);
124       NIR_PASS_V(nir, d3d12_lower_yflip);
125    }
126 
127    NIR_PASS_V(nir, d3d12_lower_state_vars, shader);
128 
129    const struct dxil_nir_lower_loads_stores_options loads_stores_options = {};
130    NIR_PASS_V(nir, dxil_nir_lower_loads_stores_to_dxil, &loads_stores_options);
131 
132    if (key->stage == PIPE_SHADER_FRAGMENT && key->fs.multisample_disabled)
133       NIR_PASS_V(nir, d3d12_disable_multisampling);
134 
135    struct nir_to_dxil_options opts = {};
136    opts.interpolate_at_vertex = screen->have_load_at_vertex;
137    opts.lower_int16 = !screen->opts4.Native16BitShaderOpsSupported;
138    opts.last_ubo_is_not_arrayed = shader->num_state_vars > 0;
139    if (key->stage == PIPE_SHADER_FRAGMENT)
140       opts.provoking_vertex = key->fs.provoking_vertex;
141    opts.input_clip_size = key->input_clip_size;
142    opts.environment = DXIL_ENVIRONMENT_GL;
143    opts.shader_model_max = screen->max_shader_model;
144 #ifdef _WIN32
145    opts.validator_version_max = dxil_get_validator_version(ctx->dxil_validator);
146 #endif
147 
148    struct blob tmp;
149    if (!nir_to_dxil(nir, &opts, NULL, &tmp)) {
150       debug_printf("D3D12: nir_to_dxil failed\n");
151       return NULL;
152    }
153 
154    // Non-ubo variables
155    shader->begin_srv_binding = (UINT_MAX);
156    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
157       auto type_no_array = glsl_without_array(var->type);
158       if (glsl_type_is_texture(type_no_array)) {
159          unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
160          for (unsigned i = 0; i < count; ++i) {
161             shader->srv_bindings[var->data.binding + i].dimension = resource_dimension(glsl_get_sampler_dim(type_no_array));
162          }
163          shader->begin_srv_binding = MIN2(var->data.binding, shader->begin_srv_binding);
164          shader->end_srv_binding = MAX2(var->data.binding + count, shader->end_srv_binding);
165       }
166    }
167 
168    nir_foreach_image_variable(var, nir) {
169       auto type_no_array = glsl_without_array(var->type);
170       unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
171       for (unsigned i = 0; i < count; ++i) {
172          shader->uav_bindings[var->data.driver_location + i].dimension = resource_dimension(glsl_get_sampler_dim(type_no_array));
173       }
174    }
175 
176    // Ubo variables
177    if(nir->info.num_ubos) {
178       shader->begin_ubo_binding = shader->nir->num_uniforms > 0 || !shader->nir->info.first_ubo_is_default_ubo ? 0 : 1;
179       // Ignore state_vars ubo as it is bound as root constants
180       shader->end_ubo_binding = nir->info.num_ubos - (shader->state_vars_used ? 1 : 0);
181    }
182 
183 #ifdef _WIN32
184    if (ctx->dxil_validator) {
185       if (!(d3d12_debug & D3D12_DEBUG_EXPERIMENTAL)) {
186          char *err;
187          if (!dxil_validate_module(ctx->dxil_validator, tmp.data,
188                                    tmp.size, &err) && err) {
189             debug_printf(
190                "== VALIDATION ERROR =============================================\n"
191                "%s\n"
192                "== END ==========================================================\n",
193                err);
194             ralloc_free(err);
195          }
196       }
197 
198       if (d3d12_debug & D3D12_DEBUG_DISASS) {
199          char *str = dxil_disasm_module(ctx->dxil_validator, tmp.data,
200                                         tmp.size);
201          fprintf(stderr,
202                  "== BEGIN SHADER ============================================\n"
203                  "%s\n"
204                  "== END SHADER ==============================================\n",
205                str);
206          ralloc_free(str);
207       }
208    }
209 #endif
210 
211    blob_finish_get_buffer(&tmp, &shader->bytecode, &shader->bytecode_length);
212 
213    if (d3d12_debug & D3D12_DEBUG_DXIL) {
214       char buf[256];
215       static int i;
216       snprintf(buf, sizeof(buf), "dump%02d.dxil", i++);
217       FILE *fp = fopen(buf, "wb");
218       fwrite(shader->bytecode, sizeof(char), shader->bytecode_length, fp);
219       fclose(fp);
220       fprintf(stderr, "wrote '%s'...\n", buf);
221    }
222    return shader;
223 }
224 
225 struct d3d12_selection_context {
226    struct d3d12_context *ctx;
227    bool needs_point_sprite_lowering;
228    bool needs_vertex_reordering;
229    unsigned provoking_vertex;
230    bool alternate_tri;
231    unsigned fill_mode_lowered;
232    unsigned cull_mode_lowered;
233    bool manual_depth_range;
234    unsigned missing_dual_src_outputs;
235    unsigned frag_result_color_lowering;
236    const unsigned *variable_workgroup_size;
237 };
238 
239 unsigned
missing_dual_src_outputs(struct d3d12_context * ctx)240 missing_dual_src_outputs(struct d3d12_context *ctx)
241 {
242    if (!ctx->gfx_pipeline_state.blend || !ctx->gfx_pipeline_state.blend->is_dual_src)
243       return 0;
244 
245    struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
246    if (!fs)
247       return 0;
248 
249    const nir_shader *s = fs->initial;
250 
251    unsigned indices_seen = 0;
252    nir_foreach_function_impl(impl, s) {
253       nir_foreach_block(block, impl) {
254          nir_foreach_instr(instr, block) {
255             if (instr->type != nir_instr_type_intrinsic)
256                continue;
257 
258             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
259             if (intr->intrinsic != nir_intrinsic_store_deref)
260                continue;
261 
262             nir_variable *var = nir_intrinsic_get_var(intr, 0);
263             if (var->data.mode != nir_var_shader_out)
264                continue;
265 
266             unsigned index = var->data.index;
267             if (var->data.location > FRAG_RESULT_DATA0)
268                index = var->data.location - FRAG_RESULT_DATA0;
269             else if (var->data.location != FRAG_RESULT_COLOR &&
270                      var->data.location != FRAG_RESULT_DATA0)
271                continue;
272 
273             indices_seen |= 1u << index;
274             if ((indices_seen & 3) == 3)
275                return 0;
276          }
277       }
278    }
279 
280    return 3 & ~indices_seen;
281 }
282 
283 static unsigned
frag_result_color_lowering(struct d3d12_context * ctx)284 frag_result_color_lowering(struct d3d12_context *ctx)
285 {
286    struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
287    assert(fs);
288 
289    if (fs->initial->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR))
290       return ctx->fb.nr_cbufs > 1 ? ctx->fb.nr_cbufs : 0;
291 
292    return 0;
293 }
294 
295 bool
manual_depth_range(struct d3d12_context * ctx)296 manual_depth_range(struct d3d12_context *ctx)
297 {
298    if (!d3d12_need_zero_one_depth_range(ctx))
299       return false;
300 
301    /**
302     * If we can't use the D3D12 zero-one depth-range, we might have to apply
303     * depth-range ourselves.
304     *
305     * Because we only need to override the depth-range to zero-one range in
306     * the case where we write frag-depth, we only need to apply manual
307     * depth-range to gl_FragCoord.z.
308     *
309     * No extra care is needed to be taken in the case where gl_FragDepth is
310     * written conditionally, because the GLSL 4.60 spec states:
311     *
312     *    If a shader statically assigns a value to gl_FragDepth, and there
313     *    is an execution path through the shader that does not set
314     *    gl_FragDepth, then the value of the fragment’s depth may be
315     *    undefined for executions of the shader that take that path. That
316     *    is, if the set of linked fragment shaders statically contain a
317     *    write to gl_FragDepth, then it is responsible for always writing
318     *    it.
319     */
320 
321    struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
322    return fs && fs->initial->info.inputs_read & VARYING_BIT_POS;
323 }
324 
325 static bool
needs_edge_flag_fix(enum mesa_prim mode)326 needs_edge_flag_fix(enum mesa_prim mode)
327 {
328    return (mode == MESA_PRIM_QUADS ||
329            mode == MESA_PRIM_QUAD_STRIP ||
330            mode == MESA_PRIM_POLYGON);
331 }
332 
333 static unsigned
fill_mode_lowered(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)334 fill_mode_lowered(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
335 {
336    struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
337 
338    if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
339         !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant) ||
340        ctx->gfx_pipeline_state.rast == NULL ||
341        (dinfo->mode != MESA_PRIM_TRIANGLES &&
342         dinfo->mode != MESA_PRIM_TRIANGLE_STRIP))
343       return PIPE_POLYGON_MODE_FILL;
344 
345    /* D3D12 supports line mode (wireframe) but doesn't support edge flags */
346    if (((ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_LINE &&
347          ctx->gfx_pipeline_state.rast->base.cull_face != PIPE_FACE_FRONT) ||
348         (ctx->gfx_pipeline_state.rast->base.fill_back == PIPE_POLYGON_MODE_LINE &&
349          ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_FRONT)) &&
350        (vs->initial->info.outputs_written & VARYING_BIT_EDGE ||
351         needs_edge_flag_fix(ctx->initial_api_prim)))
352       return PIPE_POLYGON_MODE_LINE;
353 
354    if (ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_POINT)
355       return PIPE_POLYGON_MODE_POINT;
356 
357    return PIPE_POLYGON_MODE_FILL;
358 }
359 
360 static bool
has_stream_out_for_streams(struct d3d12_context * ctx)361 has_stream_out_for_streams(struct d3d12_context *ctx)
362 {
363    unsigned mask = ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->initial->info.gs.active_stream_mask & ~1;
364    for (unsigned i = 0; i < ctx->gfx_pipeline_state.so_info.num_outputs; ++i) {
365       unsigned stream = ctx->gfx_pipeline_state.so_info.output[i].stream;
366       if (((1 << stream) & mask) &&
367          ctx->so_buffer_views[stream].SizeInBytes)
368          return true;
369    }
370    return false;
371 }
372 
373 static bool
needs_point_sprite_lowering(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)374 needs_point_sprite_lowering(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
375 {
376    struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
377    struct d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
378 
379    if (gs != NULL && !gs->is_variant) {
380       /* There is an user GS; Check if it outputs points with PSIZE */
381       return (gs->initial->info.gs.output_primitive == MESA_PRIM_POINTS &&
382               (gs->initial->info.outputs_written & VARYING_BIT_PSIZ ||
383                  ctx->gfx_pipeline_state.rast->base.point_size > 1.0) &&
384               (gs->initial->info.gs.active_stream_mask == 1 ||
385                  !has_stream_out_for_streams(ctx)));
386    } else {
387       /* No user GS; check if we are drawing wide points */
388       return ((dinfo->mode == MESA_PRIM_POINTS ||
389                fill_mode_lowered(ctx, dinfo) == PIPE_POLYGON_MODE_POINT) &&
390               (ctx->gfx_pipeline_state.rast->base.point_size > 1.0 ||
391                ctx->gfx_pipeline_state.rast->base.offset_point ||
392                (ctx->gfx_pipeline_state.rast->base.point_size_per_vertex &&
393                 vs->initial->info.outputs_written & VARYING_BIT_PSIZ)) &&
394               (vs->initial->info.outputs_written & VARYING_BIT_POS));
395    }
396 }
397 
398 static unsigned
cull_mode_lowered(struct d3d12_context * ctx,unsigned fill_mode)399 cull_mode_lowered(struct d3d12_context *ctx, unsigned fill_mode)
400 {
401    if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
402         !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant) ||
403        ctx->gfx_pipeline_state.rast == NULL ||
404        ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_NONE)
405       return PIPE_FACE_NONE;
406 
407    return ctx->gfx_pipeline_state.rast->base.cull_face;
408 }
409 
410 static unsigned
get_provoking_vertex(struct d3d12_selection_context * sel_ctx,bool * alternate,const struct pipe_draw_info * dinfo)411 get_provoking_vertex(struct d3d12_selection_context *sel_ctx, bool *alternate, const struct pipe_draw_info *dinfo)
412 {
413    if (dinfo->mode == GL_PATCHES) {
414       *alternate = false;
415       return 0;
416    }
417 
418    struct d3d12_shader_selector *vs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_VERTEX];
419    struct d3d12_shader_selector *gs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
420    struct d3d12_shader_selector *last_vertex_stage = gs && !gs->is_variant ? gs : vs;
421 
422    enum mesa_prim mode;
423    switch (last_vertex_stage->stage) {
424    case PIPE_SHADER_GEOMETRY:
425       mode = (enum mesa_prim)last_vertex_stage->initial->info.gs.output_primitive;
426       break;
427    case PIPE_SHADER_VERTEX:
428       mode = (enum mesa_prim)dinfo->mode;
429       break;
430    default:
431       unreachable("Tesselation shaders are not supported");
432    }
433 
434    bool flatshade_first = sel_ctx->ctx->gfx_pipeline_state.rast &&
435                           sel_ctx->ctx->gfx_pipeline_state.rast->base.flatshade_first;
436    *alternate = (mode == GL_TRIANGLE_STRIP || mode == GL_TRIANGLE_STRIP_ADJACENCY) &&
437                 (!gs || gs->is_variant ||
438                  gs->initial->info.gs.vertices_out > u_prim_vertex_count(mode)->min);
439    return flatshade_first ? 0 : u_prim_vertex_count(mode)->min - 1;
440 }
441 
442 bool
has_flat_varyings(struct d3d12_context * ctx)443 has_flat_varyings(struct d3d12_context *ctx)
444 {
445    struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
446 
447    if (!fs)
448       return false;
449 
450    nir_foreach_variable_with_modes(input, fs->initial,
451                                    nir_var_shader_in) {
452       if (input->data.interpolation == INTERP_MODE_FLAT &&
453           /* Disregard sysvals */
454           (input->data.location >= VARYING_SLOT_VAR0 ||
455              input->data.location <= VARYING_SLOT_TEX7))
456          return true;
457    }
458 
459    return false;
460 }
461 
462 static bool
needs_vertex_reordering(struct d3d12_selection_context * sel_ctx,const struct pipe_draw_info * dinfo)463 needs_vertex_reordering(struct d3d12_selection_context *sel_ctx, const struct pipe_draw_info *dinfo)
464 {
465    struct d3d12_context *ctx = sel_ctx->ctx;
466    bool flat = ctx->has_flat_varyings;
467    bool xfb = ctx->gfx_pipeline_state.num_so_targets > 0;
468 
469    if (fill_mode_lowered(ctx, dinfo) != PIPE_POLYGON_MODE_FILL)
470       return false;
471 
472    /* TODO add support for line primitives */
473    if (u_reduced_prim((mesa_prim)dinfo->mode) == MESA_PRIM_LINES)
474       return false;
475 
476    /* When flat shading a triangle and provoking vertex is not the first one, we use load_at_vertex.
477       If not available for this adapter, or if it's a triangle strip, we need to reorder the vertices */
478    if (flat && sel_ctx->provoking_vertex >= 2 && (!d3d12_screen(ctx->base.screen)->have_load_at_vertex ||
479                                                   sel_ctx->alternate_tri))
480       return true;
481 
482    /* When transform feedback is enabled and the output is alternating (triangle strip or triangle
483       strip with adjacency), we need to reorder vertices to get the order expected by OpenGL. This
484       only works when there is no flat shading involved. In that scenario, we don't care about
485       the provoking vertex. */
486    if (xfb && !flat && sel_ctx->alternate_tri) {
487       sel_ctx->provoking_vertex = 0;
488       return true;
489    }
490 
491    return false;
492 }
493 
494 static d3d12_varying_info*
fill_varyings(struct d3d12_context * ctx,const nir_shader * s,nir_variable_mode modes,uint64_t mask,bool patch)495 fill_varyings(struct d3d12_context *ctx, const nir_shader *s,
496               nir_variable_mode modes, uint64_t mask, bool patch)
497 {
498    struct d3d12_varying_info info;
499 
500    info.max = 0;
501    info.mask = 0;
502    info.hash = 0;
503 
504    nir_foreach_variable_with_modes(var, s, modes) {
505       unsigned slot = var->data.location;
506       bool is_generic_patch = slot >= VARYING_SLOT_PATCH0;
507       if (patch ^ is_generic_patch)
508          continue;
509       if (is_generic_patch)
510          slot -= VARYING_SLOT_PATCH0;
511       uint64_t slot_bit = BITFIELD64_BIT(slot);
512 
513       if (!(mask & slot_bit))
514          continue;
515 
516       if ((info.mask & slot_bit) == 0) {
517          memset(info.slots + slot, 0, sizeof(info.slots[0]));
518          info.max = MAX2(info.max, slot);
519       }
520 
521       const struct glsl_type *type = var->type;
522       if (nir_is_arrayed_io(var, s->info.stage))
523          type = glsl_get_array_element(type);
524       info.slots[slot].types[var->data.location_frac] = type;
525 
526       info.slots[slot].patch = var->data.patch;
527       auto& var_slot = info.slots[slot].vars[var->data.location_frac];
528       var_slot.driver_location = var->data.driver_location;
529       var_slot.interpolation = var->data.interpolation;
530       var_slot.compact = var->data.compact;
531       var_slot.always_active_io = var->data.always_active_io;
532       info.mask |= slot_bit;
533       info.slots[slot].location_frac_mask |= (1 << var->data.location_frac);
534    }
535 
536    for (uint32_t i = 0; i <= info.max; ++i) {
537       if (((1llu << i) & info.mask) == 0)
538          memset(info.slots + i, 0, sizeof(info.slots[0]));
539       else
540          info.hash = _mesa_hash_data_with_seed(info.slots + i, sizeof(info.slots[0]), info.hash);
541    }
542    info.hash = _mesa_hash_data_with_seed(&info.mask, sizeof(info.mask), info.hash);
543 
544    struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
545 
546    mtx_lock(&screen->varying_info_mutex);
547    set_entry *pentry = _mesa_set_search_pre_hashed(screen->varying_info_set, info.hash, &info);
548    if (pentry != nullptr) {
549       mtx_unlock(&screen->varying_info_mutex);
550       return (d3d12_varying_info*)pentry->key;
551    }
552    else {
553       d3d12_varying_info *key = MALLOC_STRUCT(d3d12_varying_info);
554       *key = info;
555 
556       _mesa_set_add_pre_hashed(screen->varying_info_set, info.hash, key);
557 
558       mtx_unlock(&screen->varying_info_mutex);
559       return key;
560    }
561 }
562 
563 static void
fill_flat_varyings(struct d3d12_gs_variant_key * key,d3d12_shader_selector * fs)564 fill_flat_varyings(struct d3d12_gs_variant_key *key, d3d12_shader_selector *fs)
565 {
566    if (!fs)
567       return;
568 
569    nir_foreach_variable_with_modes(input, fs->initial,
570                                    nir_var_shader_in) {
571       if (input->data.interpolation == INTERP_MODE_FLAT)
572          key->flat_varyings |= BITFIELD64_BIT(input->data.location);
573    }
574 }
575 
576 bool
d3d12_compare_varying_info(const d3d12_varying_info * expect,const d3d12_varying_info * have)577 d3d12_compare_varying_info(const d3d12_varying_info *expect, const d3d12_varying_info *have)
578 {
579    if (expect == have)
580       return true;
581 
582    if (expect == nullptr || have == nullptr)
583       return false;
584 
585    if (expect->mask != have->mask
586       || expect->max != have->max)
587       return false;
588 
589    if (!expect->mask)
590       return true;
591 
592    /* 6 is a rough (wild) guess for a bulk memcmp cross-over point.  When there
593     * are a small number of slots present, individual   is much faster. */
594    if (util_bitcount64(expect->mask) < 6) {
595       uint64_t mask = expect->mask;
596       while (mask) {
597          int slot = u_bit_scan64(&mask);
598          if (memcmp(&expect->slots[slot], &have->slots[slot], sizeof(have->slots[slot])))
599             return false;
600       }
601 
602       return true;
603    }
604 
605    return !memcmp(expect->slots, have->slots, sizeof(expect->slots[0]) * expect->max);
606 }
607 
608 
varying_info_hash(const void * info)609 uint32_t varying_info_hash(const void *info) {
610    return ((d3d12_varying_info*)info)->hash;
611 }
varying_info_compare(const void * a,const void * b)612 bool varying_info_compare(const void *a, const void *b) {
613    return d3d12_compare_varying_info((d3d12_varying_info*)a, (d3d12_varying_info*)b);
614 }
varying_info_entry_destroy(set_entry * entry)615 void varying_info_entry_destroy(set_entry *entry) {
616    if (entry->key)
617       free((void*)entry->key);
618 }
619 
620 void
d3d12_varying_cache_init(struct d3d12_screen * screen)621 d3d12_varying_cache_init(struct d3d12_screen *screen) {
622    screen->varying_info_set = _mesa_set_create(nullptr, varying_info_hash, varying_info_compare);
623 }
624 
625 void
d3d12_varying_cache_destroy(struct d3d12_screen * screen)626 d3d12_varying_cache_destroy(struct d3d12_screen *screen) {
627    _mesa_set_destroy(screen->varying_info_set, varying_info_entry_destroy);
628 }
629 
630 
631 static void
validate_geometry_shader_variant(struct d3d12_selection_context * sel_ctx)632 validate_geometry_shader_variant(struct d3d12_selection_context *sel_ctx)
633 {
634    struct d3d12_context *ctx = sel_ctx->ctx;
635    d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
636 
637    /* Nothing to do if there is a user geometry shader bound */
638    if (gs != NULL && !gs->is_variant)
639       return;
640 
641    d3d12_shader_selector* vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
642    d3d12_shader_selector* fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
643 
644    struct d3d12_gs_variant_key key;
645    key.all = 0;
646    key.flat_varyings = 0;
647 
648    /* Fill the geometry shader variant key */
649    if (sel_ctx->fill_mode_lowered != PIPE_POLYGON_MODE_FILL) {
650       key.fill_mode = sel_ctx->fill_mode_lowered;
651       key.cull_mode = sel_ctx->cull_mode_lowered;
652       key.has_front_face = (fs->initial->info.inputs_read & VARYING_BIT_FACE) != 0;
653       if (key.cull_mode != PIPE_FACE_NONE || key.has_front_face)
654          key.front_ccw = ctx->gfx_pipeline_state.rast->base.front_ccw ^ (ctx->flip_y < 0);
655       key.edge_flag_fix = needs_edge_flag_fix(ctx->initial_api_prim);
656       fill_flat_varyings(&key, fs);
657       if (key.flat_varyings != 0)
658          key.flatshade_first = ctx->gfx_pipeline_state.rast->base.flatshade_first;
659    } else if (sel_ctx->needs_point_sprite_lowering) {
660       key.passthrough = true;
661    } else if (sel_ctx->needs_vertex_reordering) {
662       /* TODO support cases where flat shading (pv != 0) and xfb are enabled, or lines */
663       key.provoking_vertex = sel_ctx->provoking_vertex;
664       key.alternate_tri = sel_ctx->alternate_tri;
665    }
666 
667    if (vs->initial_output_vars == nullptr) {
668       vs->initial_output_vars = fill_varyings(sel_ctx->ctx, vs->initial, nir_var_shader_out,
669                                                 vs->initial->info.outputs_written, false);
670    }
671    key.varyings = vs->initial_output_vars;
672    gs = d3d12_get_gs_variant(ctx, &key);
673    ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = gs;
674 }
675 
676 static void
validate_tess_ctrl_shader_variant(struct d3d12_selection_context * sel_ctx)677 validate_tess_ctrl_shader_variant(struct d3d12_selection_context *sel_ctx)
678 {
679    struct d3d12_context *ctx = sel_ctx->ctx;
680    d3d12_shader_selector *tcs = ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
681 
682    /* Nothing to do if there is a user tess ctrl shader bound */
683    if (tcs != NULL && !tcs->is_variant)
684       return;
685 
686    d3d12_shader_selector *tes = ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
687    struct d3d12_tcs_variant_key key = {0};
688 
689    bool variant_needed = tes != nullptr;
690 
691    /* Fill the variant key */
692    if (variant_needed) {
693       if (tes->initial_input_vars == nullptr) {
694          tes->initial_input_vars = fill_varyings(sel_ctx->ctx, tes->initial, nir_var_shader_in,
695                                                  tes->initial->info.inputs_read & ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER),
696                                                  false);
697       }
698       key.varyings = tes->initial_input_vars;
699       key.vertices_out = ctx->patch_vertices;
700    }
701 
702    /* Find/create the proper variant and bind it */
703    tcs = variant_needed ? d3d12_get_tcs_variant(ctx, &key) : NULL;
704    ctx->gfx_stages[PIPE_SHADER_TESS_CTRL] = tcs;
705 }
706 
707 static bool
d3d12_compare_shader_keys(struct d3d12_selection_context * sel_ctx,const d3d12_shader_key * expect,const d3d12_shader_key * have)708 d3d12_compare_shader_keys(struct d3d12_selection_context* sel_ctx, const d3d12_shader_key *expect, const d3d12_shader_key *have)
709 {
710    assert(expect->stage == have->stage);
711    assert(expect);
712    assert(have);
713 
714    if (expect->hash != have->hash)
715       return false;
716 
717    switch (expect->stage) {
718    case PIPE_SHADER_VERTEX:
719       if (expect->vs.needs_format_emulation != have->vs.needs_format_emulation)
720          return false;
721 
722       if (expect->vs.needs_format_emulation) {
723          if (memcmp(expect->vs.format_conversion, have->vs.format_conversion,
724             sel_ctx->ctx->gfx_pipeline_state.ves->num_elements * sizeof(enum pipe_format)))
725             return false;
726       }
727       break;
728    case PIPE_SHADER_GEOMETRY:
729       if (expect->gs.all != have->gs.all)
730          return false;
731       break;
732    case PIPE_SHADER_TESS_CTRL:
733       if (expect->hs.all != have->hs.all)
734          return false;
735       break;
736    case PIPE_SHADER_TESS_EVAL:
737       if (expect->ds.tcs_vertices_out != have->ds.tcs_vertices_out ||
738           expect->ds.prev_patch_outputs != have->ds.prev_patch_outputs)
739          return false;
740       break;
741    case PIPE_SHADER_FRAGMENT:
742       if (expect->fs.all != have->fs.all)
743          return false;
744       break;
745    case PIPE_SHADER_COMPUTE:
746       if (memcmp(expect->cs.workgroup_size, have->cs.workgroup_size,
747                  sizeof(have->cs.workgroup_size)))
748          return false;
749       break;
750    default:
751       unreachable("invalid stage");
752    }
753 
754    if (expect->n_texture_states != have->n_texture_states)
755       return false;
756 
757    if (expect->n_images != have->n_images)
758       return false;
759 
760    if (expect->n_texture_states > 0 &&
761        memcmp(expect->tex_wrap_states, have->tex_wrap_states,
762               expect->n_texture_states * sizeof(dxil_wrap_sampler_state)))
763       return false;
764 
765    if (memcmp(expect->swizzle_state, have->swizzle_state,
766               expect->n_texture_states * sizeof(dxil_texture_swizzle_state)))
767       return false;
768 
769    if (memcmp(expect->sampler_compare_funcs, have->sampler_compare_funcs,
770               expect->n_texture_states * sizeof(enum compare_func)))
771       return false;
772 
773    if (memcmp(expect->image_format_conversion, have->image_format_conversion,
774       expect->n_images * sizeof(struct d3d12_image_format_conversion_info)))
775       return false;
776 
777    if (!(expect->next_varying_inputs == have->next_varying_inputs &&
778          expect->prev_varying_outputs == have->prev_varying_outputs &&
779          expect->common_all == have->common_all &&
780          expect->tex_saturate_s == have->tex_saturate_s &&
781          expect->tex_saturate_r == have->tex_saturate_r &&
782          expect->tex_saturate_t == have->tex_saturate_t))
783       return false;
784 
785    if (expect->next_has_frac_inputs &&
786        expect->next_varying_frac_inputs != have->next_varying_frac_inputs &&
787        memcmp(expect->next_varying_frac_inputs, have->next_varying_frac_inputs, sizeof(d3d12_shader_selector::varying_frac_inputs)))
788       return false;
789    if (expect->prev_has_frac_outputs &&
790        expect->prev_varying_frac_outputs != have->prev_varying_frac_outputs &&
791        memcmp(expect->prev_varying_frac_outputs, have->prev_varying_frac_outputs, sizeof(d3d12_shader_selector::varying_frac_outputs)))
792       return false;
793    return true;
794 }
795 
796 static uint32_t
d3d12_shader_key_hash(const d3d12_shader_key * key)797 d3d12_shader_key_hash(const d3d12_shader_key *key)
798 {
799    uint32_t hash;
800 
801    hash = (uint32_t)key->stage;
802 
803    hash += static_cast<uint32_t>(key->next_varying_inputs);
804    hash += static_cast<uint32_t>(key->prev_varying_outputs);
805    hash += key->common_all;
806    if (key->next_has_frac_inputs)
807       hash = _mesa_hash_data_with_seed(key->next_varying_frac_inputs, sizeof(d3d12_shader_selector::varying_frac_inputs), hash);
808    if (key->prev_has_frac_outputs)
809       hash = _mesa_hash_data_with_seed(key->prev_varying_frac_outputs, sizeof(d3d12_shader_selector::varying_frac_outputs), hash);
810    switch (key->stage) {
811    case PIPE_SHADER_VERTEX:
812       /* (Probably) not worth the bit extraction for needs_format_emulation and
813        * the rest of the the format_conversion data is large.  Don't bother
814        * hashing for now until this is shown to be worthwhile. */
815        break;
816    case PIPE_SHADER_GEOMETRY:
817       hash += static_cast<uint32_t>(key->gs.all);
818       break;
819    case PIPE_SHADER_FRAGMENT:
820       hash += key->fs.all;
821       break;
822    case PIPE_SHADER_COMPUTE:
823       hash = _mesa_hash_data_with_seed(&key->cs, sizeof(key->cs), hash);
824       break;
825    case PIPE_SHADER_TESS_CTRL:
826       hash += static_cast<uint32_t>(key->hs.all);
827       break;
828    case PIPE_SHADER_TESS_EVAL:
829       hash += key->ds.tcs_vertices_out;
830       hash += key->ds.prev_patch_outputs;
831       break;
832    default:
833       /* No type specific information to hash for other stages. */
834       break;
835    }
836 
837    hash += key->n_texture_states;
838    hash += key->n_images;
839    return hash;
840 }
841 
842 static void
d3d12_fill_shader_key(struct d3d12_selection_context * sel_ctx,d3d12_shader_key * key,d3d12_shader_selector * sel,d3d12_shader_selector * prev,d3d12_shader_selector * next)843 d3d12_fill_shader_key(struct d3d12_selection_context *sel_ctx,
844                       d3d12_shader_key *key, d3d12_shader_selector *sel,
845                       d3d12_shader_selector *prev, d3d12_shader_selector *next)
846 {
847    pipe_shader_type stage = sel->stage;
848 
849    memset(key, 0, offsetof(d3d12_shader_key, vs));
850    key->stage = stage;
851 
852    switch (stage)
853    {
854    case PIPE_SHADER_VERTEX:
855       key->vs.needs_format_emulation = 0;
856       break;
857    case PIPE_SHADER_FRAGMENT:
858       key->fs.all = 0;
859       break;
860    case PIPE_SHADER_GEOMETRY:
861       key->gs.all = 0;
862       break;
863    case PIPE_SHADER_TESS_CTRL:
864       key->hs.all = 0;
865       break;
866    case PIPE_SHADER_TESS_EVAL:
867       key->ds.tcs_vertices_out = 0;
868       key->ds.prev_patch_outputs = 0;
869       break;
870    case PIPE_SHADER_COMPUTE:
871       memset(key->cs.workgroup_size, 0, sizeof(key->cs.workgroup_size));
872       break;
873    default: unreachable("Invalid stage type");
874    }
875 
876    key->n_texture_states = 0;
877    key->tex_wrap_states = sel_ctx->ctx->tex_wrap_states_shader_key;
878    key->n_images = 0;
879 
880    if (prev) {
881       key->prev_varying_outputs = prev->initial->info.outputs_written;
882       key->prev_has_frac_outputs = prev->has_frac_outputs;
883       key->prev_varying_frac_outputs = prev->varying_frac_outputs;
884 
885       if (stage == PIPE_SHADER_TESS_EVAL)
886          key->ds.prev_patch_outputs = prev->initial->info.patch_outputs_written;
887 
888       /* Set the provoking vertex based on the previous shader output. Only set the
889        * key value if the driver actually supports changing the provoking vertex though */
890       if (stage == PIPE_SHADER_FRAGMENT && sel_ctx->ctx->gfx_pipeline_state.rast &&
891           !sel_ctx->needs_vertex_reordering &&
892           d3d12_screen(sel_ctx->ctx->base.screen)->have_load_at_vertex)
893          key->fs.provoking_vertex = sel_ctx->provoking_vertex;
894 
895       /* Get the input clip distance size. The info's clip_distance_array_size corresponds
896        * to the output, and in cases of TES or GS you could have differently-sized inputs
897        * and outputs. For FS, there is no output, so it's repurposed to mean input.
898        */
899       if (stage != PIPE_SHADER_FRAGMENT)
900          key->input_clip_size = prev->initial->info.clip_distance_array_size;
901    }
902 
903    if (next) {
904       if (stage == PIPE_SHADER_TESS_CTRL)
905          key->hs.next_patch_inputs = next->initial->info.patch_outputs_read;
906       key->next_varying_inputs = next->initial->info.inputs_read;
907       if (BITSET_TEST(next->initial->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID))
908          key->next_varying_inputs |= VARYING_SLOT_PRIMITIVE_ID;
909       key->next_has_frac_inputs = next->has_frac_inputs;
910       key->next_varying_frac_inputs = next->varying_frac_inputs;
911    }
912 
913    if (stage == PIPE_SHADER_GEOMETRY ||
914        ((stage == PIPE_SHADER_VERTEX || stage == PIPE_SHADER_TESS_EVAL) &&
915           (!next || next->stage == PIPE_SHADER_FRAGMENT))) {
916       key->last_vertex_processing_stage = 1;
917       key->invert_depth = sel_ctx->ctx->reverse_depth_range;
918       key->halfz = sel_ctx->ctx->gfx_pipeline_state.rast ?
919          sel_ctx->ctx->gfx_pipeline_state.rast->base.clip_halfz : false;
920       if (sel_ctx->ctx->pstipple.enabled &&
921          sel_ctx->ctx->gfx_pipeline_state.rast->base.poly_stipple_enable)
922          key->next_varying_inputs |= VARYING_BIT_POS;
923    }
924 
925    if (stage == PIPE_SHADER_GEOMETRY && sel_ctx->ctx->gfx_pipeline_state.rast) {
926       struct pipe_rasterizer_state *rast = &sel_ctx->ctx->gfx_pipeline_state.rast->base;
927       if (sel_ctx->needs_point_sprite_lowering) {
928          key->gs.writes_psize = 1;
929          key->gs.point_size_per_vertex = rast->point_size_per_vertex;
930          key->gs.sprite_coord_enable = rast->sprite_coord_enable;
931          key->gs.sprite_origin_upper_left = (rast->sprite_coord_mode != PIPE_SPRITE_COORD_LOWER_LEFT);
932          if (sel_ctx->ctx->flip_y < 0)
933             key->gs.sprite_origin_upper_left = !key->gs.sprite_origin_upper_left;
934          key->gs.aa_point = rast->point_smooth;
935          key->gs.stream_output_factor = 6;
936       } else if (sel_ctx->fill_mode_lowered == PIPE_POLYGON_MODE_LINE) {
937          key->gs.stream_output_factor = 2;
938       } else if (sel_ctx->needs_vertex_reordering && !sel->is_variant) {
939          key->gs.triangle_strip = 1;
940       }
941 
942       if (sel->is_variant && next) {
943          if (next->initial->info.inputs_read & VARYING_BIT_FACE)
944             key->next_varying_inputs = (key->next_varying_inputs | VARYING_BIT_VAR(12)) & ~VARYING_BIT_FACE;
945          if (next->initial->info.inputs_read & VARYING_BIT_PRIMITIVE_ID)
946             key->gs.primitive_id = 1;
947       }
948    } else if (stage == PIPE_SHADER_FRAGMENT) {
949       key->fs.missing_dual_src_outputs = sel_ctx->missing_dual_src_outputs;
950       key->fs.frag_result_color_lowering = sel_ctx->frag_result_color_lowering;
951       key->fs.manual_depth_range = sel_ctx->manual_depth_range;
952       key->fs.polygon_stipple = sel_ctx->ctx->pstipple.enabled &&
953          sel_ctx->ctx->gfx_pipeline_state.rast->base.poly_stipple_enable;
954       key->fs.multisample_disabled = sel_ctx->ctx->gfx_pipeline_state.rast &&
955          !sel_ctx->ctx->gfx_pipeline_state.rast->desc.MultisampleEnable;
956       if (sel_ctx->ctx->gfx_pipeline_state.blend &&
957           sel_ctx->ctx->gfx_pipeline_state.blend->desc.RenderTarget[0].LogicOpEnable &&
958           !sel_ctx->ctx->gfx_pipeline_state.has_float_rtv) {
959          key->fs.cast_to_uint = util_format_is_unorm(sel_ctx->ctx->fb.cbufs[0]->format);
960          key->fs.cast_to_int = !key->fs.cast_to_uint;
961       }
962       if (sel_ctx->needs_point_sprite_lowering) {
963          if (sel->initial->info.inputs_read & VARYING_BIT_FACE)
964             key->prev_varying_outputs = (key->prev_varying_outputs | VARYING_BIT_VAR(12)) & ~VARYING_BIT_FACE;
965          key->prev_varying_outputs |= sel->initial->info.inputs_read & (VARYING_BIT_PNTC | BITFIELD64_RANGE(VARYING_SLOT_TEX0, 8));
966       }
967    } else if (stage == PIPE_SHADER_TESS_CTRL) {
968       if (next && next->initial->info.stage == MESA_SHADER_TESS_EVAL) {
969          key->hs.primitive_mode = next->initial->info.tess._primitive_mode;
970          key->hs.ccw = next->initial->info.tess.ccw;
971          key->hs.point_mode = next->initial->info.tess.point_mode;
972          key->hs.spacing = next->initial->info.tess.spacing;
973       } else {
974          key->hs.primitive_mode = TESS_PRIMITIVE_QUADS;
975          key->hs.ccw = true;
976          key->hs.point_mode = false;
977          key->hs.spacing = TESS_SPACING_EQUAL;
978       }
979       key->hs.patch_vertices_in = MAX2(sel_ctx->ctx->patch_vertices, 1);
980    } else if (stage == PIPE_SHADER_TESS_EVAL) {
981       if (prev && prev->initial->info.stage == MESA_SHADER_TESS_CTRL)
982          key->ds.tcs_vertices_out = prev->initial->info.tess.tcs_vertices_out;
983       else
984          key->ds.tcs_vertices_out = 32;
985    }
986 
987    if (sel->samples_int_textures) {
988       key->samples_int_textures = sel->samples_int_textures;
989       key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
990       /* Copy only states with integer textures */
991       for(int i = 0; i < key->n_texture_states; ++i) {
992          auto& wrap_state = sel_ctx->ctx->tex_wrap_states[stage][i];
993          if (wrap_state.is_int_sampler) {
994             memcpy(&key->tex_wrap_states[i], &wrap_state, sizeof(wrap_state));
995             key->swizzle_state[i] = sel_ctx->ctx->tex_swizzle_state[stage][i];
996          } else {
997             memset(&key->tex_wrap_states[i], 0, sizeof(key->tex_wrap_states[i]));
998             key->swizzle_state[i] = { PIPE_SWIZZLE_X,  PIPE_SWIZZLE_Y,  PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W };
999          }
1000       }
1001    }
1002 
1003    for (unsigned i = 0, e = sel_ctx->ctx->num_samplers[stage]; i < e; ++i) {
1004       if (!sel_ctx->ctx->samplers[stage][i] ||
1005           sel_ctx->ctx->samplers[stage][i]->filter == PIPE_TEX_FILTER_NEAREST)
1006          continue;
1007 
1008       if (sel_ctx->ctx->samplers[stage][i]->wrap_r == PIPE_TEX_WRAP_CLAMP)
1009          key->tex_saturate_r |= 1 << i;
1010       if (sel_ctx->ctx->samplers[stage][i]->wrap_s == PIPE_TEX_WRAP_CLAMP)
1011          key->tex_saturate_s |= 1 << i;
1012       if (sel_ctx->ctx->samplers[stage][i]->wrap_t == PIPE_TEX_WRAP_CLAMP)
1013          key->tex_saturate_t |= 1 << i;
1014    }
1015 
1016    if (sel->compare_with_lod_bias_grad) {
1017       key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
1018       memcpy(key->sampler_compare_funcs, sel_ctx->ctx->tex_compare_func[stage],
1019              key->n_texture_states * sizeof(enum compare_func));
1020       memcpy(key->swizzle_state, sel_ctx->ctx->tex_swizzle_state[stage],
1021              key->n_texture_states * sizeof(dxil_texture_swizzle_state));
1022       if (!sel->samples_int_textures)
1023          memset(key->tex_wrap_states, 0, sizeof(key->tex_wrap_states[0]) * key->n_texture_states);
1024    }
1025 
1026    if (stage == PIPE_SHADER_VERTEX && sel_ctx->ctx->gfx_pipeline_state.ves) {
1027       key->vs.needs_format_emulation = sel_ctx->ctx->gfx_pipeline_state.ves->needs_format_emulation;
1028       if (key->vs.needs_format_emulation) {
1029          unsigned num_elements = sel_ctx->ctx->gfx_pipeline_state.ves->num_elements;
1030 
1031          memset(key->vs.format_conversion + num_elements,
1032                   0,
1033                   sizeof(key->vs.format_conversion) - (num_elements * sizeof(enum pipe_format)));
1034 
1035          memcpy(key->vs.format_conversion, sel_ctx->ctx->gfx_pipeline_state.ves->format_conversion,
1036                   num_elements * sizeof(enum pipe_format));
1037       }
1038    }
1039 
1040    if (stage == PIPE_SHADER_FRAGMENT &&
1041        sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY] &&
1042        sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant &&
1043        sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->gs_key.has_front_face) {
1044       key->fs.remap_front_facing = 1;
1045    }
1046 
1047    if (stage == PIPE_SHADER_COMPUTE && sel_ctx->variable_workgroup_size) {
1048       memcpy(key->cs.workgroup_size, sel_ctx->variable_workgroup_size, sizeof(key->cs.workgroup_size));
1049    }
1050 
1051    key->n_images = sel_ctx->ctx->num_image_views[stage];
1052    for (unsigned i = 0; i < key->n_images; ++i) {
1053       key->image_format_conversion[i].emulated_format = sel_ctx->ctx->image_view_emulation_formats[stage][i];
1054       if (key->image_format_conversion[i].emulated_format != PIPE_FORMAT_NONE)
1055          key->image_format_conversion[i].view_format = sel_ctx->ctx->image_views[stage][i].format;
1056    }
1057 
1058    key->hash = d3d12_shader_key_hash(key);
1059 }
1060 
1061 static void
select_shader_variant(struct d3d12_selection_context * sel_ctx,d3d12_shader_selector * sel,d3d12_shader_selector * prev,d3d12_shader_selector * next)1062 select_shader_variant(struct d3d12_selection_context *sel_ctx, d3d12_shader_selector *sel,
1063                      d3d12_shader_selector *prev, d3d12_shader_selector *next)
1064 {
1065    struct d3d12_context *ctx = sel_ctx->ctx;
1066    d3d12_shader_key key;
1067    nir_shader *new_nir_variant;
1068    unsigned pstipple_binding = UINT32_MAX;
1069 
1070    d3d12_fill_shader_key(sel_ctx, &key, sel, prev, next);
1071 
1072    /* Check for an existing variant */
1073    for (d3d12_shader *variant = sel->first; variant;
1074         variant = variant->next_variant) {
1075 
1076       if (d3d12_compare_shader_keys(sel_ctx, &key, &variant->key)) {
1077          sel->current = variant;
1078          return;
1079       }
1080    }
1081 
1082    /* Clone the NIR shader */
1083    new_nir_variant = nir_shader_clone(sel, sel->initial);
1084 
1085    /* Apply any needed lowering passes */
1086    if (key.stage == PIPE_SHADER_GEOMETRY) {
1087       if (key.gs.writes_psize) {
1088          NIR_PASS_V(new_nir_variant, d3d12_lower_point_sprite,
1089                     !key.gs.sprite_origin_upper_left,
1090                     key.gs.point_size_per_vertex,
1091                     key.gs.sprite_coord_enable,
1092                     key.next_varying_inputs);
1093       }
1094 
1095       if (key.gs.primitive_id)
1096          NIR_PASS_V(new_nir_variant, d3d12_lower_primitive_id);
1097 
1098       if (key.gs.triangle_strip)
1099          NIR_PASS_V(new_nir_variant, d3d12_lower_triangle_strip);
1100    }
1101    else if (key.stage == PIPE_SHADER_FRAGMENT)
1102    {
1103       if (key.fs.polygon_stipple) {
1104          NIR_PASS_V(new_nir_variant, nir_lower_pstipple_fs,
1105                     &pstipple_binding, 0, false, nir_type_bool1);
1106       }
1107 
1108       if (key.fs.remap_front_facing)
1109          dxil_nir_forward_front_face(new_nir_variant);
1110 
1111       if (key.fs.missing_dual_src_outputs) {
1112          NIR_PASS_V(new_nir_variant, d3d12_add_missing_dual_src_target,
1113                     key.fs.missing_dual_src_outputs);
1114       } else if (key.fs.frag_result_color_lowering) {
1115          NIR_PASS_V(new_nir_variant, nir_lower_fragcolor,
1116                     key.fs.frag_result_color_lowering);
1117       }
1118 
1119       if (key.fs.manual_depth_range)
1120          NIR_PASS_V(new_nir_variant, d3d12_lower_depth_range);
1121    }
1122 
1123 
1124    if (sel->compare_with_lod_bias_grad) {
1125       STATIC_ASSERT(sizeof(dxil_texture_swizzle_state) ==
1126                     sizeof(nir_lower_tex_shadow_swizzle));
1127 
1128       NIR_PASS_V(new_nir_variant,
1129                  nir_lower_tex_shadow,
1130                  key.n_texture_states,
1131                  key.sampler_compare_funcs,
1132                  (nir_lower_tex_shadow_swizzle *) key.swizzle_state,
1133                  false);
1134    }
1135 
1136    if (key.stage == PIPE_SHADER_FRAGMENT) {
1137       if (key.fs.cast_to_uint)
1138          NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, false);
1139       if (key.fs.cast_to_int)
1140          NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, true);
1141    }
1142 
1143    if (key.n_images) {
1144       d3d12_image_format_conversion_info_arr image_format_arr = { key.n_images, key.image_format_conversion };
1145       NIR_PASS_V(new_nir_variant, d3d12_lower_image_casts, &image_format_arr);
1146    }
1147 
1148    if (key.stage == PIPE_SHADER_COMPUTE && sel->workgroup_size_variable) {
1149       new_nir_variant->info.workgroup_size[0] = static_cast<uint16_t>(key.cs.workgroup_size[0]);
1150       new_nir_variant->info.workgroup_size[1] = static_cast<uint16_t>(key.cs.workgroup_size[1]);
1151       new_nir_variant->info.workgroup_size[2] = static_cast<uint16_t>(key.cs.workgroup_size[2]);
1152    }
1153 
1154    if (new_nir_variant->info.stage == MESA_SHADER_TESS_CTRL) {
1155       new_nir_variant->info.tess._primitive_mode = (tess_primitive_mode)key.hs.primitive_mode;
1156       new_nir_variant->info.tess.ccw = key.hs.ccw;
1157       new_nir_variant->info.tess.point_mode = key.hs.point_mode;
1158       new_nir_variant->info.tess.spacing = key.hs.spacing;
1159 
1160       NIR_PASS_V(new_nir_variant, dxil_nir_set_tcs_patches_in, key.hs.patch_vertices_in);
1161    } else if (new_nir_variant->info.stage == MESA_SHADER_TESS_EVAL) {
1162       new_nir_variant->info.tess.tcs_vertices_out = static_cast<uint8_t>(key.ds.tcs_vertices_out);
1163    }
1164 
1165    {
1166       struct nir_lower_tex_options tex_options = { };
1167       tex_options.lower_txp = ~0u; /* No equivalent for textureProj */
1168       tex_options.lower_rect = true;
1169       tex_options.lower_rect_offset = true;
1170       tex_options.saturate_s = key.tex_saturate_s;
1171       tex_options.saturate_r = key.tex_saturate_r;
1172       tex_options.saturate_t = key.tex_saturate_t;
1173       tex_options.lower_invalid_implicit_lod = true;
1174       tex_options.lower_tg4_offsets = true;
1175 
1176       NIR_PASS_V(new_nir_variant, nir_lower_tex, &tex_options);
1177    }
1178 
1179    /* Remove not-written inputs, and re-sort */
1180    if (prev) {
1181       NIR_PASS_V(new_nir_variant, dxil_nir_kill_undefined_varyings, key.prev_varying_outputs,
1182                  prev->initial->info.patch_outputs_written, key.prev_varying_frac_outputs);
1183       dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_in, key.prev_varying_outputs,
1184                                      key.prev_varying_frac_outputs);
1185    }
1186 
1187    /* Remove not-read outputs and re-sort */
1188    if (next) {
1189       NIR_PASS_V(new_nir_variant, dxil_nir_kill_unused_outputs, key.next_varying_inputs,
1190                  next->initial->info.patch_inputs_read, key.next_varying_frac_inputs);
1191       dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_out, key.next_varying_inputs,
1192                                      key.next_varying_frac_inputs);
1193    }
1194 
1195    nir_shader_gather_info(new_nir_variant, nir_shader_get_entrypoint(new_nir_variant));
1196    d3d12_shader *new_variant = compile_nir(ctx, sel, &key, new_nir_variant);
1197    assert(new_variant);
1198 
1199    /* keep track of polygon stipple texture binding */
1200    new_variant->pstipple_binding = pstipple_binding;
1201 
1202    /* prepend the new shader in the selector chain and pick it */
1203    new_variant->next_variant = sel->first;
1204    sel->current = sel->first = new_variant;
1205 }
1206 
1207 static d3d12_shader_selector *
get_prev_shader(struct d3d12_context * ctx,pipe_shader_type current)1208 get_prev_shader(struct d3d12_context *ctx, pipe_shader_type current)
1209 {
1210    switch (current) {
1211    case PIPE_SHADER_VERTEX:
1212       return NULL;
1213    case PIPE_SHADER_FRAGMENT:
1214       if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
1215          return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1216       FALLTHROUGH;
1217    case PIPE_SHADER_GEOMETRY:
1218       if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
1219          return ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
1220       FALLTHROUGH;
1221    case PIPE_SHADER_TESS_EVAL:
1222       if (ctx->gfx_stages[PIPE_SHADER_TESS_CTRL])
1223          return ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
1224       FALLTHROUGH;
1225    case PIPE_SHADER_TESS_CTRL:
1226       return ctx->gfx_stages[PIPE_SHADER_VERTEX];
1227    default:
1228       unreachable("shader type not supported");
1229    }
1230 }
1231 
1232 static d3d12_shader_selector *
get_next_shader(struct d3d12_context * ctx,pipe_shader_type current)1233 get_next_shader(struct d3d12_context *ctx, pipe_shader_type current)
1234 {
1235    switch (current) {
1236    case PIPE_SHADER_VERTEX:
1237       if (ctx->gfx_stages[PIPE_SHADER_TESS_CTRL])
1238          return ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
1239       FALLTHROUGH;
1240    case PIPE_SHADER_TESS_CTRL:
1241       if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
1242          return ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
1243       FALLTHROUGH;
1244    case PIPE_SHADER_TESS_EVAL:
1245       if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
1246          return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1247       FALLTHROUGH;
1248    case PIPE_SHADER_GEOMETRY:
1249       return ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
1250    case PIPE_SHADER_FRAGMENT:
1251       return NULL;
1252    default:
1253       unreachable("shader type not supported");
1254    }
1255 }
1256 
1257 enum tex_scan_flags {
1258    TEX_SAMPLE_INTEGER_TEXTURE = 1 << 0,
1259    TEX_CMP_WITH_LOD_BIAS_GRAD = 1 << 1,
1260    TEX_SCAN_ALL_FLAGS         = (1 << 2) - 1
1261 };
1262 
1263 static unsigned
scan_texture_use(nir_shader * nir)1264 scan_texture_use(nir_shader *nir)
1265 {
1266    unsigned result = 0;
1267    nir_foreach_function_impl(impl, nir) {
1268       nir_foreach_block(block, impl) {
1269          nir_foreach_instr(instr, block) {
1270             if (instr->type == nir_instr_type_tex) {
1271                auto tex = nir_instr_as_tex(instr);
1272                switch (tex->op) {
1273                case nir_texop_txb:
1274                case nir_texop_txl:
1275                case nir_texop_txd:
1276                   if (tex->is_shadow)
1277                      result |= TEX_CMP_WITH_LOD_BIAS_GRAD;
1278                   FALLTHROUGH;
1279                case nir_texop_tex:
1280                   if (tex->dest_type & (nir_type_int | nir_type_uint))
1281                      result |= TEX_SAMPLE_INTEGER_TEXTURE;
1282                default:
1283                   ;
1284                }
1285             }
1286             if (TEX_SCAN_ALL_FLAGS == result)
1287                return result;
1288          }
1289       }
1290    }
1291    return result;
1292 }
1293 
1294 static uint64_t
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)1295 update_so_info(struct pipe_stream_output_info *so_info,
1296                uint64_t outputs_written)
1297 {
1298    uint64_t so_outputs = 0;
1299    uint8_t reverse_map[64] = {0};
1300    unsigned slot = 0;
1301 
1302    while (outputs_written)
1303       reverse_map[slot++] = static_cast<uint8_t>(u_bit_scan64(&outputs_written));
1304 
1305    for (unsigned i = 0; i < so_info->num_outputs; i++) {
1306       struct pipe_stream_output *output = &so_info->output[i];
1307 
1308       /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
1309       output->register_index = reverse_map[output->register_index];
1310 
1311       so_outputs |= 1ull << output->register_index;
1312    }
1313 
1314    return so_outputs;
1315 }
1316 
1317 static struct d3d12_shader_selector *
d3d12_create_shader_impl(struct d3d12_context * ctx,struct d3d12_shader_selector * sel,struct nir_shader * nir)1318 d3d12_create_shader_impl(struct d3d12_context *ctx,
1319                          struct d3d12_shader_selector *sel,
1320                          struct nir_shader *nir)
1321 {
1322    unsigned tex_scan_result = scan_texture_use(nir);
1323    sel->samples_int_textures = (tex_scan_result & TEX_SAMPLE_INTEGER_TEXTURE) != 0;
1324    sel->compare_with_lod_bias_grad = (tex_scan_result & TEX_CMP_WITH_LOD_BIAS_GRAD) != 0;
1325    sel->workgroup_size_variable = nir->info.workgroup_size_variable;
1326 
1327    /* Integer cube maps are not supported in DirectX because sampling is not supported
1328     * on integer textures and TextureLoad is not supported for cube maps, so we have to
1329     * lower integer cube maps to be handled like 2D textures arrays*/
1330    NIR_PASS_V(nir, dxil_nir_lower_int_cubemaps, true);
1331 
1332    NIR_PASS_V(nir, dxil_nir_lower_subgroup_id);
1333    NIR_PASS_V(nir, dxil_nir_lower_num_subgroups);
1334 
1335    nir_lower_subgroups_options subgroup_options = {};
1336    subgroup_options.ballot_bit_size = 32;
1337    subgroup_options.ballot_components = 4;
1338    subgroup_options.lower_subgroup_masks = true;
1339    subgroup_options.lower_to_scalar = true;
1340    subgroup_options.lower_relative_shuffle = true;
1341    subgroup_options.lower_inverse_ballot = true;
1342    if (nir->info.stage != MESA_SHADER_FRAGMENT && nir->info.stage != MESA_SHADER_COMPUTE)
1343       subgroup_options.lower_quad = true;
1344    NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
1345    NIR_PASS_V(nir, nir_lower_bit_size, [](const nir_instr *instr, void *) -> unsigned {
1346       if (instr->type != nir_instr_type_intrinsic)
1347          return 0;
1348       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1349       switch (intr->intrinsic) {
1350       case nir_intrinsic_quad_swap_horizontal:
1351       case nir_intrinsic_quad_swap_vertical:
1352       case nir_intrinsic_quad_swap_diagonal:
1353       case nir_intrinsic_reduce:
1354       case nir_intrinsic_inclusive_scan:
1355       case nir_intrinsic_exclusive_scan:
1356          return intr->def.bit_size == 1 ? 32 : 0;
1357       default:
1358          return 0;
1359       }
1360       }, NULL);
1361 
1362    // Ensure subgroup scans on bools are gone
1363    NIR_PASS_V(nir, nir_opt_dce);
1364    NIR_PASS_V(nir, dxil_nir_lower_unsupported_subgroup_scan);
1365 
1366    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1367 
1368    if (nir->info.stage == MESA_SHADER_COMPUTE)
1369       NIR_PASS_V(nir, d3d12_lower_compute_state_vars);
1370    NIR_PASS_V(nir, d3d12_lower_load_draw_params);
1371    NIR_PASS_V(nir, d3d12_lower_load_patch_vertices_in);
1372    NIR_PASS_V(nir, dxil_nir_lower_double_math);
1373 
1374    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) {
1375       if (var->data.location >= VARYING_SLOT_VAR0 && var->data.location_frac) {
1376          sel->has_frac_inputs = 1;
1377          BITSET_SET(sel->varying_frac_inputs, (var->data.location - VARYING_SLOT_VAR0) * 4 + var->data.location_frac);
1378       }
1379    }
1380    nir_foreach_variable_with_modes(var, nir, nir_var_shader_out) {
1381       if (var->data.location >= VARYING_SLOT_VAR0 && var->data.location_frac) {
1382          sel->has_frac_outputs = 1;
1383          BITSET_SET(sel->varying_frac_outputs, (var->data.location - VARYING_SLOT_VAR0) * 4 + var->data.location_frac);
1384       }
1385    }
1386 
1387    /* Keep this initial shader as the blue print for possible variants */
1388    sel->initial = nir;
1389    sel->initial_output_vars = nullptr;
1390    sel->initial_input_vars = nullptr;
1391    sel->gs_key.varyings = nullptr;
1392    sel->tcs_key.varyings = nullptr;
1393 
1394    return sel;
1395 }
1396 
1397 struct d3d12_shader_selector *
d3d12_create_shader(struct d3d12_context * ctx,pipe_shader_type stage,const struct pipe_shader_state * shader)1398 d3d12_create_shader(struct d3d12_context *ctx,
1399                     pipe_shader_type stage,
1400                     const struct pipe_shader_state *shader)
1401 {
1402    struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
1403    sel->stage = stage;
1404 
1405    struct nir_shader *nir = NULL;
1406 
1407    if (shader->type == PIPE_SHADER_IR_NIR) {
1408       nir = (nir_shader *)shader->ir.nir;
1409    } else {
1410       assert(shader->type == PIPE_SHADER_IR_TGSI);
1411       nir = tgsi_to_nir(shader->tokens, ctx->base.screen, false);
1412    }
1413 
1414    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1415    memcpy(&sel->so_info, &shader->stream_output, sizeof(sel->so_info));
1416    update_so_info(&sel->so_info, nir->info.outputs_written);
1417 
1418    assert(nir != NULL);
1419 
1420    NIR_PASS_V(nir, dxil_nir_split_clip_cull_distance);
1421    NIR_PASS_V(nir, d3d12_split_needed_varyings);
1422 
1423    if (nir->info.stage == MESA_SHADER_TESS_EVAL || nir->info.stage == MESA_SHADER_TESS_CTRL) {
1424       /* D3D requires exactly-matching patch constant signatures. Since tess ctrl must write these vars,
1425        * tess eval must have them. */
1426       for (uint32_t i = 0; i < 2; ++i) {
1427          unsigned loc = i == 0 ? VARYING_SLOT_TESS_LEVEL_OUTER : VARYING_SLOT_TESS_LEVEL_INNER;
1428          nir_variable_mode mode = nir->info.stage == MESA_SHADER_TESS_EVAL ? nir_var_shader_in : nir_var_shader_out;
1429          nir_variable *var = nir_find_variable_with_location(nir, mode, loc);
1430          uint32_t arr_size = i == 0 ? 4 : 2;
1431          if (!var) {
1432             var = nir_variable_create(nir, mode, glsl_array_type(glsl_float_type(), arr_size, 0), i == 0 ? "outer" : "inner");
1433             var->data.location = loc;
1434             var->data.patch = true;
1435             var->data.compact = true;
1436 
1437             if (mode == nir_var_shader_out) {
1438                nir_builder b = nir_builder_create(nir_shader_get_entrypoint(nir));
1439                b.cursor = nir_after_impl(b.impl);
1440                for (uint32_t j = 0; j < arr_size; ++j)
1441                   nir_store_deref(&b, nir_build_deref_array_imm(&b, nir_build_deref_var(&b, var), j), nir_imm_zero(&b, 1, 32), 1);
1442             }
1443          }
1444       }
1445    }
1446 
1447    if (nir->info.stage != MESA_SHADER_VERTEX) {
1448       dxil_reassign_driver_locations(nir, nir_var_shader_in, 0, NULL);
1449    } else {
1450       dxil_sort_by_driver_location(nir, nir_var_shader_in);
1451 
1452       uint32_t driver_loc = 0;
1453       nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) {
1454          var->data.driver_location = driver_loc;
1455          driver_loc += glsl_count_attribute_slots(var->type, false);
1456       }
1457    }
1458 
1459    if (nir->info.stage != MESA_SHADER_FRAGMENT) {
1460       dxil_reassign_driver_locations(nir, nir_var_shader_out, 0, NULL);
1461    } else {
1462       NIR_PASS_V(nir, nir_lower_fragcoord_wtrans);
1463       NIR_PASS_V(nir, dxil_nir_lower_sample_pos);
1464       dxil_sort_ps_outputs(nir);
1465    }
1466 
1467    return d3d12_create_shader_impl(ctx, sel, nir);
1468 }
1469 
1470 struct d3d12_shader_selector *
d3d12_create_compute_shader(struct d3d12_context * ctx,const struct pipe_compute_state * shader)1471 d3d12_create_compute_shader(struct d3d12_context *ctx,
1472                             const struct pipe_compute_state *shader)
1473 {
1474    struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
1475    sel->stage = PIPE_SHADER_COMPUTE;
1476 
1477    struct nir_shader *nir = NULL;
1478 
1479    if (shader->ir_type == PIPE_SHADER_IR_NIR) {
1480       nir = (nir_shader *)shader->prog;
1481    } else {
1482       assert(shader->ir_type == PIPE_SHADER_IR_TGSI);
1483       nir = tgsi_to_nir(shader->prog, ctx->base.screen, false);
1484    }
1485 
1486    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1487 
1488    return d3d12_create_shader_impl(ctx, sel, nir);
1489 }
1490 
1491 void
d3d12_select_shader_variants(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)1492 d3d12_select_shader_variants(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
1493 {
1494    struct d3d12_selection_context sel_ctx;
1495 
1496    sel_ctx.ctx = ctx;
1497    sel_ctx.needs_point_sprite_lowering = needs_point_sprite_lowering(ctx, dinfo);
1498    sel_ctx.fill_mode_lowered = fill_mode_lowered(ctx, dinfo);
1499    sel_ctx.cull_mode_lowered = cull_mode_lowered(ctx, sel_ctx.fill_mode_lowered);
1500    sel_ctx.provoking_vertex = get_provoking_vertex(&sel_ctx, &sel_ctx.alternate_tri, dinfo);
1501    sel_ctx.needs_vertex_reordering = needs_vertex_reordering(&sel_ctx, dinfo);
1502    sel_ctx.missing_dual_src_outputs = ctx->missing_dual_src_outputs;
1503    sel_ctx.frag_result_color_lowering = frag_result_color_lowering(ctx);
1504    sel_ctx.manual_depth_range = ctx->manual_depth_range;
1505 
1506    d3d12_shader_selector* gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1507    if (gs == nullptr || gs->is_variant) {
1508       if (sel_ctx.fill_mode_lowered != PIPE_POLYGON_MODE_FILL || sel_ctx.needs_point_sprite_lowering || sel_ctx.needs_vertex_reordering)
1509          validate_geometry_shader_variant(&sel_ctx);
1510       else if (gs != nullptr) {
1511          ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = NULL;
1512       }
1513    }
1514 
1515    validate_tess_ctrl_shader_variant(&sel_ctx);
1516 
1517    auto* stages = ctx->gfx_stages;
1518    d3d12_shader_selector* prev;
1519    d3d12_shader_selector* next;
1520    if (stages[PIPE_SHADER_VERTEX]) {
1521       next = get_next_shader(ctx, PIPE_SHADER_VERTEX);
1522       select_shader_variant(&sel_ctx, stages[PIPE_SHADER_VERTEX], nullptr, next);
1523    }
1524    if (stages[PIPE_SHADER_TESS_CTRL]) {
1525       prev = get_prev_shader(ctx, PIPE_SHADER_TESS_CTRL);
1526       next = get_next_shader(ctx, PIPE_SHADER_TESS_CTRL);
1527       select_shader_variant(&sel_ctx, stages[PIPE_SHADER_TESS_CTRL], prev, next);
1528    }
1529    if (stages[PIPE_SHADER_TESS_EVAL]) {
1530       prev = get_prev_shader(ctx, PIPE_SHADER_TESS_EVAL);
1531       next = get_next_shader(ctx, PIPE_SHADER_TESS_EVAL);
1532       select_shader_variant(&sel_ctx, stages[PIPE_SHADER_TESS_EVAL], prev, next);
1533    }
1534    if (stages[PIPE_SHADER_GEOMETRY]) {
1535       prev = get_prev_shader(ctx, PIPE_SHADER_GEOMETRY);
1536       next = get_next_shader(ctx, PIPE_SHADER_GEOMETRY);
1537       select_shader_variant(&sel_ctx, stages[PIPE_SHADER_GEOMETRY], prev, next);
1538    }
1539    if (stages[PIPE_SHADER_FRAGMENT]) {
1540       prev = get_prev_shader(ctx, PIPE_SHADER_FRAGMENT);
1541       select_shader_variant(&sel_ctx, stages[PIPE_SHADER_FRAGMENT], prev, nullptr);
1542    }
1543 }
1544 
1545 static const unsigned *
workgroup_size_variable(struct d3d12_context * ctx,const struct pipe_grid_info * info)1546 workgroup_size_variable(struct d3d12_context *ctx,
1547                         const struct pipe_grid_info *info)
1548 {
1549    if (ctx->compute_state->workgroup_size_variable)
1550       return info->block;
1551    return nullptr;
1552 }
1553 
1554 void
d3d12_select_compute_shader_variants(struct d3d12_context * ctx,const struct pipe_grid_info * info)1555 d3d12_select_compute_shader_variants(struct d3d12_context *ctx, const struct pipe_grid_info *info)
1556 {
1557    struct d3d12_selection_context sel_ctx = {};
1558 
1559    sel_ctx.ctx = ctx;
1560    sel_ctx.variable_workgroup_size = workgroup_size_variable(ctx, info);
1561 
1562    select_shader_variant(&sel_ctx, ctx->compute_state, nullptr, nullptr);
1563 }
1564 
1565 void
d3d12_shader_free(struct d3d12_shader_selector * sel)1566 d3d12_shader_free(struct d3d12_shader_selector *sel)
1567 {
1568    auto shader = sel->first;
1569    while (shader) {
1570       free(shader->bytecode);
1571       shader = shader->next_variant;
1572    }
1573 
1574    ralloc_free((void*)sel->initial);
1575    ralloc_free(sel);
1576 }
1577