• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "d3d12_compiler.h"
25 #include "d3d12_context.h"
26 #include "d3d12_debug.h"
27 #include "d3d12_screen.h"
28 #include "d3d12_nir_passes.h"
29 #include "nir_to_dxil.h"
30 #include "dxil_nir.h"
31 #include "dxil_nir_lower_int_cubemaps.h"
32 
33 #include "pipe/p_state.h"
34 
35 #include "nir.h"
36 #include "nir/nir_draw_helpers.h"
37 #include "nir/tgsi_to_nir.h"
38 #include "compiler/nir/nir_builder.h"
39 
40 #include "util/hash_table.h"
41 #include "util/u_memory.h"
42 #include "util/u_prim.h"
43 #include "util/u_simple_shaders.h"
44 #include "util/u_dl.h"
45 
46 #include <dxguids/dxguids.h>
47 
48 #ifdef _WIN32
49 #include "dxil_validator.h"
50 #endif
51 
52 const void *
d3d12_get_compiler_options(struct pipe_screen * screen,enum pipe_shader_ir ir,enum pipe_shader_type shader)53 d3d12_get_compiler_options(struct pipe_screen *screen,
54                            enum pipe_shader_ir ir,
55                            enum pipe_shader_type shader)
56 {
57    assert(ir == PIPE_SHADER_IR_NIR);
58    return &d3d12_screen(screen)->nir_options;
59 }
60 
61 static uint32_t
resource_dimension(enum glsl_sampler_dim dim)62 resource_dimension(enum glsl_sampler_dim dim)
63 {
64    switch (dim) {
65    case GLSL_SAMPLER_DIM_1D:
66       return RESOURCE_DIMENSION_TEXTURE1D;
67    case GLSL_SAMPLER_DIM_2D:
68       return RESOURCE_DIMENSION_TEXTURE2D;
69    case GLSL_SAMPLER_DIM_3D:
70       return RESOURCE_DIMENSION_TEXTURE3D;
71    case GLSL_SAMPLER_DIM_CUBE:
72       return RESOURCE_DIMENSION_TEXTURECUBE;
73    default:
74       return RESOURCE_DIMENSION_UNKNOWN;
75    }
76 }
77 
78 static bool
can_remove_dead_sampler(nir_variable * var,void * data)79 can_remove_dead_sampler(nir_variable *var, void *data)
80 {
81    const struct glsl_type *base_type = glsl_without_array(var->type);
82    return glsl_type_is_sampler(base_type) && !glsl_type_is_bare_sampler(base_type);
83 }
84 
85 static struct d3d12_shader *
compile_nir(struct d3d12_context * ctx,struct d3d12_shader_selector * sel,struct d3d12_shader_key * key,struct nir_shader * nir)86 compile_nir(struct d3d12_context *ctx, struct d3d12_shader_selector *sel,
87             struct d3d12_shader_key *key, struct nir_shader *nir)
88 {
89    struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
90    struct d3d12_shader *shader = rzalloc(sel, d3d12_shader);
91    shader->key = *key;
92 
93    if (shader->key.n_texture_states > 0) {
94       shader->key.tex_wrap_states = (dxil_wrap_sampler_state*)ralloc_size(sel, sizeof(dxil_wrap_sampler_state) * shader->key.n_texture_states);
95       memcpy(shader->key.tex_wrap_states, key->tex_wrap_states, sizeof(dxil_wrap_sampler_state) * shader->key.n_texture_states);
96    }
97    else
98       shader->key.tex_wrap_states = nullptr;
99 
100    shader->output_vars_fs = nullptr;
101    shader->output_vars_gs = nullptr;
102    shader->output_vars_default = nullptr;
103 
104    shader->input_vars_vs = nullptr;
105    shader->input_vars_default = nullptr;
106 
107    shader->tess_eval_output_vars = nullptr;
108    shader->tess_ctrl_input_vars = nullptr;
109    shader->nir = nir;
110    sel->current = shader;
111 
112    NIR_PASS_V(nir, nir_lower_samplers);
113    NIR_PASS_V(nir, dxil_nir_split_typed_samplers);
114 
115    NIR_PASS_V(nir, nir_opt_dce);
116    struct nir_remove_dead_variables_options dead_var_opts = {};
117    dead_var_opts.can_remove_var = can_remove_dead_sampler;
118    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform, &dead_var_opts);
119 
120    if (key->samples_int_textures)
121       NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex,
122                  key->n_texture_states, key->tex_wrap_states, key->swizzle_state,
123                  screen->base.get_paramf(&screen->base, PIPE_CAPF_MAX_TEXTURE_LOD_BIAS));
124 
125    if (key->stage == PIPE_SHADER_VERTEX && key->vs.needs_format_emulation)
126       dxil_nir_lower_vs_vertex_conversion(nir, key->vs.format_conversion);
127 
128    uint32_t num_ubos_before_lower_to_ubo = nir->info.num_ubos;
129    uint32_t num_uniforms_before_lower_to_ubo = nir->num_uniforms;
130    NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, false);
131    shader->has_default_ubo0 = num_uniforms_before_lower_to_ubo > 0 &&
132                               nir->info.num_ubos > num_ubos_before_lower_to_ubo;
133 
134    NIR_PASS_V(nir, dxil_nir_lower_subgroup_id);
135    NIR_PASS_V(nir, dxil_nir_lower_num_subgroups);
136 
137    nir_lower_subgroups_options subgroup_options = {};
138    subgroup_options.ballot_bit_size = 32;
139    subgroup_options.ballot_components = 4;
140    subgroup_options.lower_subgroup_masks = true;
141    subgroup_options.lower_to_scalar = true;
142    subgroup_options.lower_relative_shuffle = true;
143    subgroup_options.lower_inverse_ballot = true;
144    if (nir->info.stage != MESA_SHADER_FRAGMENT && nir->info.stage != MESA_SHADER_COMPUTE)
145       subgroup_options.lower_quad = true;
146    NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
147    NIR_PASS_V(nir, nir_lower_bit_size, [](const nir_instr *instr, void *) -> unsigned {
148          if (instr->type != nir_instr_type_intrinsic)
149             return 0;
150          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
151          switch (intr->intrinsic) {
152             case nir_intrinsic_quad_swap_horizontal:
153             case nir_intrinsic_quad_swap_vertical:
154             case nir_intrinsic_quad_swap_diagonal:
155             case nir_intrinsic_reduce:
156             case nir_intrinsic_inclusive_scan:
157             case nir_intrinsic_exclusive_scan:
158                return intr->def.bit_size == 1 ? 32 : 0;
159             default:
160                return 0;
161          }
162       }, NULL);
163 
164    // Ensure subgroup scans on bools are gone
165    NIR_PASS_V(nir, nir_opt_dce);
166    NIR_PASS_V(nir, dxil_nir_lower_unsupported_subgroup_scan);
167 
168    if (key->last_vertex_processing_stage) {
169       if (key->invert_depth)
170          NIR_PASS_V(nir, d3d12_nir_invert_depth, key->invert_depth, key->halfz);
171       if (!key->halfz)
172          NIR_PASS_V(nir, nir_lower_clip_halfz);
173       NIR_PASS_V(nir, d3d12_lower_yflip);
174    }
175    NIR_PASS_V(nir, d3d12_lower_load_draw_params);
176    NIR_PASS_V(nir, d3d12_lower_load_patch_vertices_in);
177    NIR_PASS_V(nir, d3d12_lower_state_vars, shader);
178    const struct dxil_nir_lower_loads_stores_options loads_stores_options = {};
179    NIR_PASS_V(nir, dxil_nir_lower_loads_stores_to_dxil, &loads_stores_options);
180    NIR_PASS_V(nir, dxil_nir_lower_double_math);
181 
182    if (key->stage == PIPE_SHADER_FRAGMENT && key->fs.multisample_disabled)
183       NIR_PASS_V(nir, d3d12_disable_multisampling);
184 
185    struct nir_to_dxil_options opts = {};
186    opts.interpolate_at_vertex = screen->have_load_at_vertex;
187    opts.lower_int16 = !screen->opts4.Native16BitShaderOpsSupported;
188    opts.no_ubo0 = !shader->has_default_ubo0;
189    opts.last_ubo_is_not_arrayed = shader->num_state_vars > 0;
190    if (key->stage == PIPE_SHADER_FRAGMENT)
191       opts.provoking_vertex = key->fs.provoking_vertex;
192    opts.input_clip_size = key->input_clip_size;
193    opts.environment = DXIL_ENVIRONMENT_GL;
194    opts.shader_model_max = screen->max_shader_model;
195 #ifdef _WIN32
196    opts.validator_version_max = dxil_get_validator_version(ctx->dxil_validator);
197 #endif
198 
199    struct blob tmp;
200    if (!nir_to_dxil(nir, &opts, NULL, &tmp)) {
201       debug_printf("D3D12: nir_to_dxil failed\n");
202       return NULL;
203    }
204 
205    // Non-ubo variables
206    shader->begin_srv_binding = (UINT_MAX);
207    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
208       auto type_no_array = glsl_without_array(var->type);
209       if (glsl_type_is_texture(type_no_array)) {
210          unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
211          for (unsigned i = 0; i < count; ++i) {
212             shader->srv_bindings[var->data.binding + i].dimension = resource_dimension(glsl_get_sampler_dim(type_no_array));
213          }
214          shader->begin_srv_binding = MIN2(var->data.binding, shader->begin_srv_binding);
215          shader->end_srv_binding = MAX2(var->data.binding + count, shader->end_srv_binding);
216       }
217    }
218 
219    nir_foreach_image_variable(var, nir) {
220       auto type_no_array = glsl_without_array(var->type);
221       unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
222       for (unsigned i = 0; i < count; ++i) {
223          shader->uav_bindings[var->data.driver_location + i].dimension = resource_dimension(glsl_get_sampler_dim(type_no_array));
224       }
225    }
226 
227    // Ubo variables
228    if(nir->info.num_ubos) {
229       // Ignore state_vars ubo as it is bound as root constants
230       unsigned num_ubo_bindings = nir->info.num_ubos - (shader->state_vars_used ? 1 : 0);
231       for(unsigned i = shader->has_default_ubo0 ? 0 : 1; i < num_ubo_bindings; ++i) {
232          shader->cb_bindings[shader->num_cb_bindings++].binding = i;
233       }
234    }
235 
236 #ifdef _WIN32
237    if (ctx->dxil_validator) {
238       if (!(d3d12_debug & D3D12_DEBUG_EXPERIMENTAL)) {
239          char *err;
240          if (!dxil_validate_module(ctx->dxil_validator, tmp.data,
241                                    tmp.size, &err) && err) {
242             debug_printf(
243                "== VALIDATION ERROR =============================================\n"
244                "%s\n"
245                "== END ==========================================================\n",
246                err);
247             ralloc_free(err);
248          }
249       }
250 
251       if (d3d12_debug & D3D12_DEBUG_DISASS) {
252          char *str = dxil_disasm_module(ctx->dxil_validator, tmp.data,
253                                         tmp.size);
254          fprintf(stderr,
255                  "== BEGIN SHADER ============================================\n"
256                  "%s\n"
257                  "== END SHADER ==============================================\n",
258                str);
259          ralloc_free(str);
260       }
261    }
262 #endif
263 
264    blob_finish_get_buffer(&tmp, &shader->bytecode, &shader->bytecode_length);
265 
266    if (d3d12_debug & D3D12_DEBUG_DXIL) {
267       char buf[256];
268       static int i;
269       snprintf(buf, sizeof(buf), "dump%02d.dxil", i++);
270       FILE *fp = fopen(buf, "wb");
271       fwrite(shader->bytecode, sizeof(char), shader->bytecode_length, fp);
272       fclose(fp);
273       fprintf(stderr, "wrote '%s'...\n", buf);
274    }
275    return shader;
276 }
277 
278 struct d3d12_selection_context {
279    struct d3d12_context *ctx;
280    bool needs_point_sprite_lowering;
281    bool needs_vertex_reordering;
282    unsigned provoking_vertex;
283    bool alternate_tri;
284    unsigned fill_mode_lowered;
285    unsigned cull_mode_lowered;
286    bool manual_depth_range;
287    unsigned missing_dual_src_outputs;
288    unsigned frag_result_color_lowering;
289    const unsigned *variable_workgroup_size;
290 };
291 
292 unsigned
missing_dual_src_outputs(struct d3d12_context * ctx)293 missing_dual_src_outputs(struct d3d12_context *ctx)
294 {
295    if (!ctx->gfx_pipeline_state.blend || !ctx->gfx_pipeline_state.blend->is_dual_src)
296       return 0;
297 
298    struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
299    if (!fs)
300       return 0;
301 
302    const nir_shader *s = fs->initial;
303 
304    unsigned indices_seen = 0;
305    nir_foreach_function_impl(impl, s) {
306       nir_foreach_block(block, impl) {
307          nir_foreach_instr(instr, block) {
308             if (instr->type != nir_instr_type_intrinsic)
309                continue;
310 
311             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
312             if (intr->intrinsic != nir_intrinsic_store_deref)
313                continue;
314 
315             nir_variable *var = nir_intrinsic_get_var(intr, 0);
316             if (var->data.mode != nir_var_shader_out)
317                continue;
318 
319             unsigned index = var->data.index;
320             if (var->data.location > FRAG_RESULT_DATA0)
321                index = var->data.location - FRAG_RESULT_DATA0;
322             else if (var->data.location != FRAG_RESULT_COLOR &&
323                      var->data.location != FRAG_RESULT_DATA0)
324                continue;
325 
326             indices_seen |= 1u << index;
327             if ((indices_seen & 3) == 3)
328                return 0;
329          }
330       }
331    }
332 
333    return 3 & ~indices_seen;
334 }
335 
336 static unsigned
frag_result_color_lowering(struct d3d12_context * ctx)337 frag_result_color_lowering(struct d3d12_context *ctx)
338 {
339    struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
340    assert(fs);
341 
342    if (fs->initial->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR))
343       return ctx->fb.nr_cbufs > 1 ? ctx->fb.nr_cbufs : 0;
344 
345    return 0;
346 }
347 
348 bool
manual_depth_range(struct d3d12_context * ctx)349 manual_depth_range(struct d3d12_context *ctx)
350 {
351    if (!d3d12_need_zero_one_depth_range(ctx))
352       return false;
353 
354    /**
355     * If we can't use the D3D12 zero-one depth-range, we might have to apply
356     * depth-range ourselves.
357     *
358     * Because we only need to override the depth-range to zero-one range in
359     * the case where we write frag-depth, we only need to apply manual
360     * depth-range to gl_FragCoord.z.
361     *
362     * No extra care is needed to be taken in the case where gl_FragDepth is
363     * written conditionally, because the GLSL 4.60 spec states:
364     *
365     *    If a shader statically assigns a value to gl_FragDepth, and there
366     *    is an execution path through the shader that does not set
367     *    gl_FragDepth, then the value of the fragment’s depth may be
368     *    undefined for executions of the shader that take that path. That
369     *    is, if the set of linked fragment shaders statically contain a
370     *    write to gl_FragDepth, then it is responsible for always writing
371     *    it.
372     */
373 
374    struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
375    return fs && fs->initial->info.inputs_read & VARYING_BIT_POS;
376 }
377 
378 static bool
needs_edge_flag_fix(enum mesa_prim mode)379 needs_edge_flag_fix(enum mesa_prim mode)
380 {
381    return (mode == MESA_PRIM_QUADS ||
382            mode == MESA_PRIM_QUAD_STRIP ||
383            mode == MESA_PRIM_POLYGON);
384 }
385 
386 static unsigned
fill_mode_lowered(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)387 fill_mode_lowered(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
388 {
389    struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
390 
391    if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
392         !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant) ||
393        ctx->gfx_pipeline_state.rast == NULL ||
394        (dinfo->mode != MESA_PRIM_TRIANGLES &&
395         dinfo->mode != MESA_PRIM_TRIANGLE_STRIP))
396       return PIPE_POLYGON_MODE_FILL;
397 
398    /* D3D12 supports line mode (wireframe) but doesn't support edge flags */
399    if (((ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_LINE &&
400          ctx->gfx_pipeline_state.rast->base.cull_face != PIPE_FACE_FRONT) ||
401         (ctx->gfx_pipeline_state.rast->base.fill_back == PIPE_POLYGON_MODE_LINE &&
402          ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_FRONT)) &&
403        (vs->initial->info.outputs_written & VARYING_BIT_EDGE ||
404         needs_edge_flag_fix(ctx->initial_api_prim)))
405       return PIPE_POLYGON_MODE_LINE;
406 
407    if (ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_POINT)
408       return PIPE_POLYGON_MODE_POINT;
409 
410    return PIPE_POLYGON_MODE_FILL;
411 }
412 
413 static bool
has_stream_out_for_streams(struct d3d12_context * ctx)414 has_stream_out_for_streams(struct d3d12_context *ctx)
415 {
416    unsigned mask = ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->initial->info.gs.active_stream_mask & ~1;
417    for (unsigned i = 0; i < ctx->gfx_pipeline_state.so_info.num_outputs; ++i) {
418       unsigned stream = ctx->gfx_pipeline_state.so_info.output[i].stream;
419       if (((1 << stream) & mask) &&
420          ctx->so_buffer_views[stream].SizeInBytes)
421          return true;
422    }
423    return false;
424 }
425 
426 static bool
needs_point_sprite_lowering(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)427 needs_point_sprite_lowering(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
428 {
429    struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
430    struct d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
431 
432    if (gs != NULL && !gs->is_variant) {
433       /* There is an user GS; Check if it outputs points with PSIZE */
434       return (gs->initial->info.gs.output_primitive == MESA_PRIM_POINTS &&
435               (gs->initial->info.outputs_written & VARYING_BIT_PSIZ ||
436                  ctx->gfx_pipeline_state.rast->base.point_size > 1.0) &&
437               (gs->initial->info.gs.active_stream_mask == 1 ||
438                  !has_stream_out_for_streams(ctx)));
439    } else {
440       /* No user GS; check if we are drawing wide points */
441       return ((dinfo->mode == MESA_PRIM_POINTS ||
442                fill_mode_lowered(ctx, dinfo) == PIPE_POLYGON_MODE_POINT) &&
443               (ctx->gfx_pipeline_state.rast->base.point_size > 1.0 ||
444                ctx->gfx_pipeline_state.rast->base.offset_point ||
445                (ctx->gfx_pipeline_state.rast->base.point_size_per_vertex &&
446                 vs->initial->info.outputs_written & VARYING_BIT_PSIZ)) &&
447               (vs->initial->info.outputs_written & VARYING_BIT_POS));
448    }
449 }
450 
451 static unsigned
cull_mode_lowered(struct d3d12_context * ctx,unsigned fill_mode)452 cull_mode_lowered(struct d3d12_context *ctx, unsigned fill_mode)
453 {
454    if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
455         !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant) ||
456        ctx->gfx_pipeline_state.rast == NULL ||
457        ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_NONE)
458       return PIPE_FACE_NONE;
459 
460    return ctx->gfx_pipeline_state.rast->base.cull_face;
461 }
462 
463 static unsigned
get_provoking_vertex(struct d3d12_selection_context * sel_ctx,bool * alternate,const struct pipe_draw_info * dinfo)464 get_provoking_vertex(struct d3d12_selection_context *sel_ctx, bool *alternate, const struct pipe_draw_info *dinfo)
465 {
466    if (dinfo->mode == GL_PATCHES) {
467       *alternate = false;
468       return 0;
469    }
470 
471    struct d3d12_shader_selector *vs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_VERTEX];
472    struct d3d12_shader_selector *gs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
473    struct d3d12_shader_selector *last_vertex_stage = gs && !gs->is_variant ? gs : vs;
474 
475    enum mesa_prim mode;
476    switch (last_vertex_stage->stage) {
477    case PIPE_SHADER_GEOMETRY:
478       mode = (enum mesa_prim)last_vertex_stage->current->nir->info.gs.output_primitive;
479       break;
480    case PIPE_SHADER_VERTEX:
481       mode = (enum mesa_prim)dinfo->mode;
482       break;
483    default:
484       unreachable("Tesselation shaders are not supported");
485    }
486 
487    bool flatshade_first = sel_ctx->ctx->gfx_pipeline_state.rast &&
488                           sel_ctx->ctx->gfx_pipeline_state.rast->base.flatshade_first;
489    *alternate = (mode == GL_TRIANGLE_STRIP || mode == GL_TRIANGLE_STRIP_ADJACENCY) &&
490                 (!gs || gs->is_variant ||
491                  gs->initial->info.gs.vertices_out > u_prim_vertex_count(mode)->min);
492    return flatshade_first ? 0 : u_prim_vertex_count(mode)->min - 1;
493 }
494 
495 bool
has_flat_varyings(struct d3d12_context * ctx)496 has_flat_varyings(struct d3d12_context *ctx)
497 {
498    struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
499 
500    if (!fs || !fs->current)
501       return false;
502 
503    nir_foreach_variable_with_modes(input, fs->current->nir,
504                                    nir_var_shader_in) {
505       if (input->data.interpolation == INTERP_MODE_FLAT &&
506           /* Disregard sysvals */
507           (input->data.location >= VARYING_SLOT_VAR0 ||
508              input->data.location <= VARYING_SLOT_TEX7))
509          return true;
510    }
511 
512    return false;
513 }
514 
515 static bool
needs_vertex_reordering(struct d3d12_selection_context * sel_ctx,const struct pipe_draw_info * dinfo)516 needs_vertex_reordering(struct d3d12_selection_context *sel_ctx, const struct pipe_draw_info *dinfo)
517 {
518    struct d3d12_context *ctx = sel_ctx->ctx;
519    bool flat = ctx->has_flat_varyings;
520    bool xfb = ctx->gfx_pipeline_state.num_so_targets > 0;
521 
522    if (fill_mode_lowered(ctx, dinfo) != PIPE_POLYGON_MODE_FILL)
523       return false;
524 
525    /* TODO add support for line primitives */
526 
527    /* When flat shading a triangle and provoking vertex is not the first one, we use load_at_vertex.
528       If not available for this adapter, or if it's a triangle strip, we need to reorder the vertices */
529    if (flat && sel_ctx->provoking_vertex >= 2 && (!d3d12_screen(ctx->base.screen)->have_load_at_vertex ||
530                                                   sel_ctx->alternate_tri))
531       return true;
532 
533    /* When transform feedback is enabled and the output is alternating (triangle strip or triangle
534       strip with adjacency), we need to reorder vertices to get the order expected by OpenGL. This
535       only works when there is no flat shading involved. In that scenario, we don't care about
536       the provoking vertex. */
537    if (xfb && !flat && sel_ctx->alternate_tri) {
538       sel_ctx->provoking_vertex = 0;
539       return true;
540    }
541 
542    return false;
543 }
544 
545 static nir_variable *
create_varying_from_info(nir_shader * nir,const struct d3d12_varying_info * info,unsigned slot,unsigned slot_frac,nir_variable_mode mode,bool patch)546 create_varying_from_info(nir_shader *nir, const struct d3d12_varying_info *info,
547                          unsigned slot, unsigned slot_frac, nir_variable_mode mode, bool patch)
548 {
549    nir_variable *var;
550    char tmp[100];
551 
552    snprintf(tmp, ARRAY_SIZE(tmp),
553             mode == nir_var_shader_in ? "in_%d" : "out_%d",
554             info->slots[slot].vars[slot_frac].driver_location);
555    var = nir_variable_create(nir, mode, info->slots[slot].types[slot_frac], tmp);
556    var->data.location = slot;
557    var->data.location_frac = slot_frac;
558    var->data.driver_location = info->slots[slot].vars[slot_frac].driver_location;
559    var->data.interpolation = info->slots[slot].vars[slot_frac].interpolation;
560    var->data.patch = info->slots[slot].patch;
561    var->data.compact = info->slots[slot].vars[slot_frac].compact;
562    if (patch)
563       var->data.location += VARYING_SLOT_PATCH0;
564 
565    if (mode == nir_var_shader_out)
566       NIR_PASS_V(nir, d3d12_write_0_to_new_varying, var);
567 
568    return var;
569 }
570 
571 void
create_varyings_from_info(nir_shader * nir,const struct d3d12_varying_info * info,unsigned slot,nir_variable_mode mode,bool patch)572 create_varyings_from_info(nir_shader *nir, const struct d3d12_varying_info *info,
573                           unsigned slot, nir_variable_mode mode, bool patch)
574 {
575    unsigned mask = info->slots[slot].location_frac_mask;
576    while (mask)
577       create_varying_from_info(nir, info, slot, u_bit_scan(&mask), mode, patch);
578 }
579 
580 static d3d12_varying_info*
fill_varyings(struct d3d12_context * ctx,const nir_shader * s,nir_variable_mode modes,uint64_t mask,bool patch)581 fill_varyings(struct d3d12_context *ctx, const nir_shader *s,
582               nir_variable_mode modes, uint64_t mask, bool patch)
583 {
584    struct d3d12_varying_info info;
585 
586    info.max = 0;
587    info.mask = 0;
588    info.hash = 0;
589 
590    nir_foreach_variable_with_modes(var, s, modes) {
591       unsigned slot = var->data.location;
592       bool is_generic_patch = slot >= VARYING_SLOT_PATCH0;
593       if (patch ^ is_generic_patch)
594          continue;
595       if (is_generic_patch)
596          slot -= VARYING_SLOT_PATCH0;
597       uint64_t slot_bit = BITFIELD64_BIT(slot);
598 
599       if (!(mask & slot_bit))
600          continue;
601 
602       if ((info.mask & slot_bit) == 0) {
603          memset(info.slots + slot, 0, sizeof(info.slots[0]));
604          info.max = MAX2(info.max, slot);
605       }
606 
607       const struct glsl_type *type = var->type;
608       if ((s->info.stage == MESA_SHADER_GEOMETRY ||
609            s->info.stage == MESA_SHADER_TESS_CTRL) &&
610           (modes & nir_var_shader_in) &&
611           glsl_type_is_array(type))
612          type = glsl_get_array_element(type);
613       info.slots[slot].types[var->data.location_frac] = type;
614 
615       info.slots[slot].patch = var->data.patch;
616       auto& var_slot = info.slots[slot].vars[var->data.location_frac];
617       var_slot.driver_location = var->data.driver_location;
618       var_slot.interpolation = var->data.interpolation;
619       var_slot.compact = var->data.compact;
620       info.mask |= slot_bit;
621       info.slots[slot].location_frac_mask |= (1 << var->data.location_frac);
622    }
623 
624    for (uint32_t i = 0; i <= info.max; ++i) {
625       if (((1llu << i) & info.mask) == 0)
626          memset(info.slots + i, 0, sizeof(info.slots[0]));
627       else
628          info.hash = _mesa_hash_data_with_seed(info.slots + i, sizeof(info.slots[0]), info.hash);
629    }
630    info.hash = _mesa_hash_data_with_seed(&info.mask, sizeof(info.mask), info.hash);
631 
632    struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
633 
634    mtx_lock(&screen->varying_info_mutex);
635    set_entry *pentry = _mesa_set_search_pre_hashed(screen->varying_info_set, info.hash, &info);
636    if (pentry != nullptr) {
637       mtx_unlock(&screen->varying_info_mutex);
638       return (d3d12_varying_info*)pentry->key;
639    }
640    else {
641       d3d12_varying_info *key = MALLOC_STRUCT(d3d12_varying_info);
642       *key = info;
643 
644       _mesa_set_add_pre_hashed(screen->varying_info_set, info.hash, key);
645 
646       mtx_unlock(&screen->varying_info_mutex);
647       return key;
648    }
649 }
650 
651 static void
fill_flat_varyings(struct d3d12_gs_variant_key * key,d3d12_shader_selector * fs)652 fill_flat_varyings(struct d3d12_gs_variant_key *key, d3d12_shader_selector *fs)
653 {
654    if (!fs || !fs->current)
655       return;
656 
657    nir_foreach_variable_with_modes(input, fs->current->nir,
658                                    nir_var_shader_in) {
659       if (input->data.interpolation == INTERP_MODE_FLAT)
660          key->flat_varyings |= BITFIELD64_BIT(input->data.location);
661    }
662 }
663 
664 bool
d3d12_compare_varying_info(const d3d12_varying_info * expect,const d3d12_varying_info * have)665 d3d12_compare_varying_info(const d3d12_varying_info *expect, const d3d12_varying_info *have)
666 {
667    if (expect == have)
668       return true;
669 
670    if (expect == nullptr || have == nullptr)
671       return false;
672 
673    if (expect->mask != have->mask
674       || expect->max != have->max)
675       return false;
676 
677    if (!expect->mask)
678       return true;
679 
680    /* 6 is a rough (wild) guess for a bulk memcmp cross-over point.  When there
681     * are a small number of slots present, individual   is much faster. */
682    if (util_bitcount64(expect->mask) < 6) {
683       uint64_t mask = expect->mask;
684       while (mask) {
685          int slot = u_bit_scan64(&mask);
686          if (memcmp(&expect->slots[slot], &have->slots[slot], sizeof(have->slots[slot])))
687             return false;
688       }
689 
690       return true;
691    }
692 
693    return !memcmp(expect->slots, have->slots, sizeof(expect->slots[0]) * expect->max);
694 }
695 
696 
varying_info_hash(const void * info)697 uint32_t varying_info_hash(const void *info) {
698    return ((d3d12_varying_info*)info)->hash;
699 }
varying_info_compare(const void * a,const void * b)700 bool varying_info_compare(const void *a, const void *b) {
701    return d3d12_compare_varying_info((d3d12_varying_info*)a, (d3d12_varying_info*)b);
702 }
varying_info_entry_destroy(set_entry * entry)703 void varying_info_entry_destroy(set_entry *entry) {
704    if (entry->key)
705       free((void*)entry->key);
706 }
707 
708 void
d3d12_varying_cache_init(struct d3d12_screen * screen)709 d3d12_varying_cache_init(struct d3d12_screen *screen) {
710    screen->varying_info_set = _mesa_set_create(nullptr, varying_info_hash, varying_info_compare);
711 }
712 
713 void
d3d12_varying_cache_destroy(struct d3d12_screen * screen)714 d3d12_varying_cache_destroy(struct d3d12_screen *screen) {
715    _mesa_set_destroy(screen->varying_info_set, varying_info_entry_destroy);
716 }
717 
718 
719 static void
validate_geometry_shader_variant(struct d3d12_selection_context * sel_ctx)720 validate_geometry_shader_variant(struct d3d12_selection_context *sel_ctx)
721 {
722    struct d3d12_context *ctx = sel_ctx->ctx;
723    d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
724 
725    /* Nothing to do if there is a user geometry shader bound */
726    if (gs != NULL && !gs->is_variant)
727       return;
728 
729    d3d12_shader_selector* vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
730    d3d12_shader_selector* fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
731 
732    struct d3d12_gs_variant_key key;
733    key.all = 0;
734    key.flat_varyings = 0;
735 
736    /* Fill the geometry shader variant key */
737    if (sel_ctx->fill_mode_lowered != PIPE_POLYGON_MODE_FILL) {
738       key.fill_mode = sel_ctx->fill_mode_lowered;
739       key.cull_mode = sel_ctx->cull_mode_lowered;
740       key.has_front_face = BITSET_TEST(fs->initial->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
741       if (key.cull_mode != PIPE_FACE_NONE || key.has_front_face)
742          key.front_ccw = ctx->gfx_pipeline_state.rast->base.front_ccw ^ (ctx->flip_y < 0);
743       key.edge_flag_fix = needs_edge_flag_fix(ctx->initial_api_prim);
744       fill_flat_varyings(&key, fs);
745       if (key.flat_varyings != 0)
746          key.flatshade_first = ctx->gfx_pipeline_state.rast->base.flatshade_first;
747    } else if (sel_ctx->needs_point_sprite_lowering) {
748       key.passthrough = true;
749    } else if (sel_ctx->needs_vertex_reordering) {
750       /* TODO support cases where flat shading (pv != 0) and xfb are enabled */
751       key.provoking_vertex = sel_ctx->provoking_vertex;
752       key.alternate_tri = sel_ctx->alternate_tri;
753    }
754 
755    if (vs->initial_output_vars == nullptr) {
756       vs->initial_output_vars = fill_varyings(sel_ctx->ctx, vs->initial, nir_var_shader_out,
757                                                 vs->initial->info.outputs_written, false);
758    }
759    key.varyings = vs->initial_output_vars;
760    gs = d3d12_get_gs_variant(ctx, &key);
761    ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = gs;
762 }
763 
764 static void
validate_tess_ctrl_shader_variant(struct d3d12_selection_context * sel_ctx)765 validate_tess_ctrl_shader_variant(struct d3d12_selection_context *sel_ctx)
766 {
767    struct d3d12_context *ctx = sel_ctx->ctx;
768    d3d12_shader_selector *tcs = ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
769 
770    /* Nothing to do if there is a user tess ctrl shader bound */
771    if (tcs != NULL && !tcs->is_variant)
772       return;
773 
774    d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
775    d3d12_shader_selector *tes = ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
776    struct d3d12_tcs_variant_key key = {0};
777 
778    bool variant_needed = tes != nullptr;
779 
780    /* Fill the variant key */
781    if (variant_needed) {
782       if (vs->initial_output_vars == nullptr) {
783          vs->initial_output_vars = fill_varyings(sel_ctx->ctx, vs->initial, nir_var_shader_out,
784                                                  vs->initial->info.outputs_written, false);
785       }
786       key.varyings = vs->initial_output_vars;
787       key.vertices_out = ctx->patch_vertices;
788    }
789 
790    /* Find/create the proper variant and bind it */
791    tcs = variant_needed ? d3d12_get_tcs_variant(ctx, &key) : NULL;
792    ctx->gfx_stages[PIPE_SHADER_TESS_CTRL] = tcs;
793 }
794 
795 static bool
d3d12_compare_shader_keys(struct d3d12_selection_context * sel_ctx,const d3d12_shader_key * expect,const d3d12_shader_key * have)796 d3d12_compare_shader_keys(struct d3d12_selection_context* sel_ctx, const d3d12_shader_key *expect, const d3d12_shader_key *have)
797 {
798    assert(expect->stage == have->stage);
799    assert(expect);
800    assert(have);
801 
802    if (expect->hash != have->hash)
803       return false;
804 
805    switch (expect->stage) {
806    case PIPE_SHADER_VERTEX:
807       if (expect->vs.needs_format_emulation != have->vs.needs_format_emulation)
808          return false;
809 
810       if (expect->vs.needs_format_emulation) {
811          if (memcmp(expect->vs.format_conversion, have->vs.format_conversion,
812             sel_ctx->ctx->gfx_pipeline_state.ves->num_elements * sizeof(enum pipe_format)))
813             return false;
814       }
815       break;
816    case PIPE_SHADER_GEOMETRY:
817       if (expect->gs.all != have->gs.all)
818          return false;
819       break;
820    case PIPE_SHADER_TESS_CTRL:
821       if (expect->hs.all != have->hs.all ||
822           expect->hs.required_patch_outputs != have->hs.required_patch_outputs)
823          return false;
824       break;
825    case PIPE_SHADER_TESS_EVAL:
826       if (expect->ds.tcs_vertices_out != have->ds.tcs_vertices_out ||
827           expect->ds.prev_patch_outputs != have->ds.prev_patch_outputs ||
828           expect->ds.required_patch_inputs != have->ds.required_patch_inputs)
829          return false;
830       break;
831    case PIPE_SHADER_FRAGMENT:
832       if (expect->fs.all != have->fs.all)
833          return false;
834       break;
835    case PIPE_SHADER_COMPUTE:
836       if (memcmp(expect->cs.workgroup_size, have->cs.workgroup_size,
837                  sizeof(have->cs.workgroup_size)))
838          return false;
839       break;
840    default:
841       unreachable("invalid stage");
842    }
843 
844    if (expect->n_texture_states != have->n_texture_states)
845       return false;
846 
847    if (expect->n_images != have->n_images)
848       return false;
849 
850    if (expect->n_texture_states > 0 &&
851        memcmp(expect->tex_wrap_states, have->tex_wrap_states,
852               expect->n_texture_states * sizeof(dxil_wrap_sampler_state)))
853       return false;
854 
855    if (memcmp(expect->swizzle_state, have->swizzle_state,
856               expect->n_texture_states * sizeof(dxil_texture_swizzle_state)))
857       return false;
858 
859    if (memcmp(expect->sampler_compare_funcs, have->sampler_compare_funcs,
860               expect->n_texture_states * sizeof(enum compare_func)))
861       return false;
862 
863    if (memcmp(expect->image_format_conversion, have->image_format_conversion,
864       expect->n_images * sizeof(struct d3d12_image_format_conversion_info)))
865       return false;
866 
867    return
868       expect->required_varying_inputs == have->required_varying_inputs &&
869       expect->required_varying_outputs == have->required_varying_outputs &&
870       expect->next_varying_inputs == have->next_varying_inputs &&
871       expect->prev_varying_outputs == have->prev_varying_outputs &&
872       expect->common_all == have->common_all &&
873       expect->tex_saturate_s == have->tex_saturate_s &&
874       expect->tex_saturate_r == have->tex_saturate_r &&
875       expect->tex_saturate_t == have->tex_saturate_t;
876 }
877 
878 static uint32_t
d3d12_shader_key_hash(const d3d12_shader_key * key)879 d3d12_shader_key_hash(const d3d12_shader_key *key)
880 {
881    uint32_t hash;
882 
883    hash = (uint32_t)key->stage;
884    hash += ((uint64_t)key->required_varying_inputs) +
885             (((uint64_t)key->required_varying_inputs) >> 32);
886    hash += ((uint64_t)key->required_varying_outputs) +
887             (((uint64_t)key->required_varying_outputs) >> 32);
888 
889    hash += key->next_varying_inputs;
890    hash += key->prev_varying_outputs;
891    switch (key->stage) {
892    case PIPE_SHADER_VERTEX:
893       /* (Probably) not worth the bit extraction for needs_format_emulation and
894        * the rest of the the format_conversion data is large.  Don't bother
895        * hashing for now until this is shown to be worthwhile. */
896        break;
897    case PIPE_SHADER_GEOMETRY:
898       hash += key->gs.all;
899       break;
900    case PIPE_SHADER_FRAGMENT:
901       hash += key->fs.all;
902       break;
903    case PIPE_SHADER_COMPUTE:
904       hash = _mesa_hash_data_with_seed(&key->cs, sizeof(key->cs), hash);
905       break;
906    case PIPE_SHADER_TESS_CTRL:
907       hash += key->hs.all;
908       hash += ((uint64_t)key->hs.required_patch_outputs) +
909                (((uint64_t)key->hs.required_patch_outputs) >> 32);
910       break;
911    case PIPE_SHADER_TESS_EVAL:
912       hash += key->ds.tcs_vertices_out;
913       hash += key->ds.prev_patch_outputs;
914       hash += ((uint64_t)key->ds.required_patch_inputs) +
915                (((uint64_t)key->ds.required_patch_inputs) >> 32);
916       break;
917    default:
918       /* No type specific information to hash for other stages. */
919       break;
920    }
921 
922    hash += key->n_texture_states;
923    hash += key->n_images;
924    return hash;
925 }
926 
927 static void
d3d12_fill_shader_key(struct d3d12_selection_context * sel_ctx,d3d12_shader_key * key,d3d12_shader_selector * sel,d3d12_shader_selector * prev,d3d12_shader_selector * next)928 d3d12_fill_shader_key(struct d3d12_selection_context *sel_ctx,
929                       d3d12_shader_key *key, d3d12_shader_selector *sel,
930                       d3d12_shader_selector *prev, d3d12_shader_selector *next)
931 {
932    pipe_shader_type stage = sel->stage;
933 
934    uint64_t system_generated_in_values =
935          VARYING_BIT_PNTC |
936          VARYING_BIT_PRIMITIVE_ID;
937 
938    uint64_t system_out_values =
939          VARYING_BIT_CLIP_DIST0 |
940          VARYING_BIT_CLIP_DIST1;
941 
942    memset(key, 0, offsetof(d3d12_shader_key, vs));
943    key->stage = stage;
944 
945    switch (stage)
946    {
947    case PIPE_SHADER_VERTEX:
948       key->vs.needs_format_emulation = 0;
949       break;
950    case PIPE_SHADER_FRAGMENT:
951       key->fs.all = 0;
952       break;
953    case PIPE_SHADER_GEOMETRY:
954       key->gs.all = 0;
955       break;
956    case PIPE_SHADER_TESS_CTRL:
957       key->hs.all = 0;
958       key->hs.required_patch_outputs = nullptr;
959       break;
960    case PIPE_SHADER_TESS_EVAL:
961       key->ds.tcs_vertices_out = 0;
962       key->ds.prev_patch_outputs = 0;
963       key->ds.required_patch_inputs = nullptr;
964       break;
965    case PIPE_SHADER_COMPUTE:
966       memset(key->cs.workgroup_size, 0, sizeof(key->cs.workgroup_size));
967       break;
968    default: unreachable("Invalid stage type");
969    }
970 
971    key->n_texture_states = 0;
972    key->tex_wrap_states = sel_ctx->ctx->tex_wrap_states_shader_key;
973    key->n_images = 0;
974 
975    if (prev) {
976       /* We require as inputs what the previous stage has written,
977        * except certain system values */
978 
979       struct d3d12_varying_info **output_vars = nullptr;
980 
981       switch (stage) {
982       case PIPE_SHADER_FRAGMENT:
983          system_out_values |= VARYING_BIT_POS | VARYING_BIT_PSIZ | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER;
984          output_vars = &prev->current->output_vars_fs;
985          break;
986       case PIPE_SHADER_GEOMETRY:
987          system_out_values |= VARYING_BIT_POS;
988          output_vars = &prev->current->output_vars_gs;
989          break;
990       default:
991          output_vars = &prev->current->output_vars_default;
992          break;
993       }
994 
995       uint64_t mask = prev->current->nir->info.outputs_written & ~system_out_values;
996 
997       if (*output_vars == nullptr) {
998          *output_vars = fill_varyings(sel_ctx->ctx, prev->current->nir,
999                                       nir_var_shader_out, mask, false);
1000       }
1001 
1002       key->required_varying_inputs = *output_vars;
1003 
1004       key->prev_varying_outputs = prev->current->nir->info.outputs_written;
1005 
1006       if (stage == PIPE_SHADER_TESS_EVAL) {
1007          uint32_t patch_mask = prev->current->nir->info.patch_outputs_written;
1008 
1009          if (prev->current->tess_eval_output_vars == nullptr) {
1010             prev->current->tess_eval_output_vars = fill_varyings(sel_ctx->ctx, prev->current->nir,
1011                                                                  nir_var_shader_out, patch_mask, true);
1012          }
1013 
1014          key->ds.required_patch_inputs = prev->current->tess_eval_output_vars;
1015          key->ds.prev_patch_outputs = patch_mask;
1016       }
1017 
1018       /* Set the provoking vertex based on the previous shader output. Only set the
1019        * key value if the driver actually supports changing the provoking vertex though */
1020       if (stage == PIPE_SHADER_FRAGMENT && sel_ctx->ctx->gfx_pipeline_state.rast &&
1021           !sel_ctx->needs_vertex_reordering &&
1022           d3d12_screen(sel_ctx->ctx->base.screen)->have_load_at_vertex)
1023          key->fs.provoking_vertex = sel_ctx->provoking_vertex;
1024 
1025       /* Get the input clip distance size. The info's clip_distance_array_size corresponds
1026        * to the output, and in cases of TES or GS you could have differently-sized inputs
1027        * and outputs. For FS, there is no output, so it's repurposed to mean input.
1028        */
1029       if (stage != PIPE_SHADER_FRAGMENT)
1030          key->input_clip_size = prev->current->nir->info.clip_distance_array_size;
1031    }
1032 
1033    /* We require as outputs what the next stage reads,
1034     * except certain system values */
1035    if (next) {
1036       if (!next->is_variant) {
1037 
1038          struct d3d12_varying_info **input_vars = &next->current->input_vars_default;
1039 
1040          if (stage == PIPE_SHADER_VERTEX) {
1041             system_generated_in_values |= VARYING_BIT_POS;
1042             input_vars = &next->current->input_vars_vs;
1043          }
1044          uint64_t mask = next->current->nir->info.inputs_read & ~system_generated_in_values;
1045 
1046 
1047          if (*input_vars == nullptr) {
1048             *input_vars = fill_varyings(sel_ctx->ctx, next->current->nir,
1049                                         nir_var_shader_in, mask, false);
1050          }
1051 
1052          key->required_varying_outputs = *input_vars;
1053 
1054 
1055          if (stage == PIPE_SHADER_TESS_CTRL) {
1056             uint32_t patch_mask = next->current->nir->info.patch_outputs_read;
1057 
1058             if (prev->current->tess_ctrl_input_vars == nullptr){
1059                prev->current->tess_ctrl_input_vars = fill_varyings(sel_ctx->ctx, prev->current->nir,
1060                                                                    nir_var_shader_in, patch_mask, true);
1061             }
1062 
1063             key->hs.required_patch_outputs = prev->current->tess_ctrl_input_vars;
1064             key->hs.next_patch_inputs = patch_mask;
1065          }
1066       }
1067       key->next_varying_inputs = next->current->nir->info.inputs_read;
1068 
1069    }
1070 
1071    if (stage == PIPE_SHADER_GEOMETRY ||
1072        ((stage == PIPE_SHADER_VERTEX || stage == PIPE_SHADER_TESS_EVAL) &&
1073           (!next || next->stage == PIPE_SHADER_FRAGMENT))) {
1074       key->last_vertex_processing_stage = 1;
1075       key->invert_depth = sel_ctx->ctx->reverse_depth_range;
1076       key->halfz = sel_ctx->ctx->gfx_pipeline_state.rast ?
1077          sel_ctx->ctx->gfx_pipeline_state.rast->base.clip_halfz : false;
1078       if (sel_ctx->ctx->pstipple.enabled &&
1079          sel_ctx->ctx->gfx_pipeline_state.rast->base.poly_stipple_enable)
1080          key->next_varying_inputs |= VARYING_BIT_POS;
1081    }
1082 
1083    if (stage == PIPE_SHADER_GEOMETRY && sel_ctx->ctx->gfx_pipeline_state.rast) {
1084       struct pipe_rasterizer_state *rast = &sel_ctx->ctx->gfx_pipeline_state.rast->base;
1085       if (sel_ctx->needs_point_sprite_lowering) {
1086          key->gs.writes_psize = 1;
1087          key->gs.point_size_per_vertex = rast->point_size_per_vertex;
1088          key->gs.sprite_coord_enable = rast->sprite_coord_enable;
1089          key->gs.sprite_origin_upper_left = (rast->sprite_coord_mode != PIPE_SPRITE_COORD_LOWER_LEFT);
1090          if (sel_ctx->ctx->flip_y < 0)
1091             key->gs.sprite_origin_upper_left = !key->gs.sprite_origin_upper_left;
1092          key->gs.aa_point = rast->point_smooth;
1093          key->gs.stream_output_factor = 6;
1094       } else if (sel_ctx->fill_mode_lowered == PIPE_POLYGON_MODE_LINE) {
1095          key->gs.stream_output_factor = 2;
1096       } else if (sel_ctx->needs_vertex_reordering && !sel->is_variant) {
1097          key->gs.triangle_strip = 1;
1098       }
1099 
1100       if (sel->is_variant && next && next->initial->info.inputs_read & VARYING_BIT_PRIMITIVE_ID)
1101          key->gs.primitive_id = 1;
1102    } else if (stage == PIPE_SHADER_FRAGMENT) {
1103       key->fs.missing_dual_src_outputs = sel_ctx->missing_dual_src_outputs;
1104       key->fs.frag_result_color_lowering = sel_ctx->frag_result_color_lowering;
1105       key->fs.manual_depth_range = sel_ctx->manual_depth_range;
1106       key->fs.polygon_stipple = sel_ctx->ctx->pstipple.enabled &&
1107          sel_ctx->ctx->gfx_pipeline_state.rast->base.poly_stipple_enable;
1108       key->fs.multisample_disabled = sel_ctx->ctx->gfx_pipeline_state.rast &&
1109          !sel_ctx->ctx->gfx_pipeline_state.rast->desc.MultisampleEnable;
1110       if (sel_ctx->ctx->gfx_pipeline_state.blend &&
1111           sel_ctx->ctx->gfx_pipeline_state.blend->desc.RenderTarget[0].LogicOpEnable &&
1112           !sel_ctx->ctx->gfx_pipeline_state.has_float_rtv) {
1113          key->fs.cast_to_uint = util_format_is_unorm(sel_ctx->ctx->fb.cbufs[0]->format);
1114          key->fs.cast_to_int = !key->fs.cast_to_uint;
1115       }
1116    } else if (stage == PIPE_SHADER_TESS_CTRL) {
1117       if (next && next->current->nir->info.stage == MESA_SHADER_TESS_EVAL) {
1118          key->hs.primitive_mode = next->current->nir->info.tess._primitive_mode;
1119          key->hs.ccw = next->current->nir->info.tess.ccw;
1120          key->hs.point_mode = next->current->nir->info.tess.point_mode;
1121          key->hs.spacing = next->current->nir->info.tess.spacing;
1122       } else {
1123          key->hs.primitive_mode = TESS_PRIMITIVE_QUADS;
1124          key->hs.ccw = true;
1125          key->hs.point_mode = false;
1126          key->hs.spacing = TESS_SPACING_EQUAL;
1127       }
1128       key->hs.patch_vertices_in = MAX2(sel_ctx->ctx->patch_vertices, 1);
1129    } else if (stage == PIPE_SHADER_TESS_EVAL) {
1130       if (prev && prev->current->nir->info.stage == MESA_SHADER_TESS_CTRL)
1131          key->ds.tcs_vertices_out = prev->current->nir->info.tess.tcs_vertices_out;
1132       else
1133          key->ds.tcs_vertices_out = 32;
1134    }
1135 
1136    if (sel->samples_int_textures) {
1137       key->samples_int_textures = sel->samples_int_textures;
1138       key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
1139       /* Copy only states with integer textures */
1140       for(int i = 0; i < key->n_texture_states; ++i) {
1141          auto& wrap_state = sel_ctx->ctx->tex_wrap_states[stage][i];
1142          if (wrap_state.is_int_sampler) {
1143             memcpy(&key->tex_wrap_states[i], &wrap_state, sizeof(wrap_state));
1144             key->swizzle_state[i] = sel_ctx->ctx->tex_swizzle_state[stage][i];
1145          } else {
1146             memset(&key->tex_wrap_states[i], 0, sizeof(key->tex_wrap_states[i]));
1147             key->swizzle_state[i] = { PIPE_SWIZZLE_X,  PIPE_SWIZZLE_Y,  PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W };
1148          }
1149       }
1150    }
1151 
1152    for (unsigned i = 0, e = sel_ctx->ctx->num_samplers[stage]; i < e; ++i) {
1153       if (!sel_ctx->ctx->samplers[stage][i] ||
1154           sel_ctx->ctx->samplers[stage][i]->filter == PIPE_TEX_FILTER_NEAREST)
1155          continue;
1156 
1157       if (sel_ctx->ctx->samplers[stage][i]->wrap_r == PIPE_TEX_WRAP_CLAMP)
1158          key->tex_saturate_r |= 1 << i;
1159       if (sel_ctx->ctx->samplers[stage][i]->wrap_s == PIPE_TEX_WRAP_CLAMP)
1160          key->tex_saturate_s |= 1 << i;
1161       if (sel_ctx->ctx->samplers[stage][i]->wrap_t == PIPE_TEX_WRAP_CLAMP)
1162          key->tex_saturate_t |= 1 << i;
1163    }
1164 
1165    if (sel->compare_with_lod_bias_grad) {
1166       key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
1167       memcpy(key->sampler_compare_funcs, sel_ctx->ctx->tex_compare_func[stage],
1168              key->n_texture_states * sizeof(enum compare_func));
1169       memcpy(key->swizzle_state, sel_ctx->ctx->tex_swizzle_state[stage],
1170              key->n_texture_states * sizeof(dxil_texture_swizzle_state));
1171       if (!sel->samples_int_textures)
1172          memset(key->tex_wrap_states, 0, sizeof(key->tex_wrap_states[0]) * key->n_texture_states);
1173    }
1174 
1175    if (stage == PIPE_SHADER_VERTEX && sel_ctx->ctx->gfx_pipeline_state.ves) {
1176       key->vs.needs_format_emulation = sel_ctx->ctx->gfx_pipeline_state.ves->needs_format_emulation;
1177       if (key->vs.needs_format_emulation) {
1178          unsigned num_elements = sel_ctx->ctx->gfx_pipeline_state.ves->num_elements;
1179 
1180          memset(key->vs.format_conversion + num_elements,
1181                   0,
1182                   sizeof(key->vs.format_conversion) - (num_elements * sizeof(enum pipe_format)));
1183 
1184          memcpy(key->vs.format_conversion, sel_ctx->ctx->gfx_pipeline_state.ves->format_conversion,
1185                   num_elements * sizeof(enum pipe_format));
1186       }
1187    }
1188 
1189    if (stage == PIPE_SHADER_FRAGMENT &&
1190        sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY] &&
1191        sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant &&
1192        sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->gs_key.has_front_face) {
1193       key->fs.remap_front_facing = 1;
1194    }
1195 
1196    if (stage == PIPE_SHADER_COMPUTE && sel_ctx->variable_workgroup_size) {
1197       memcpy(key->cs.workgroup_size, sel_ctx->variable_workgroup_size, sizeof(key->cs.workgroup_size));
1198    }
1199 
1200    key->n_images = sel_ctx->ctx->num_image_views[stage];
1201    for (int i = 0; i < key->n_images; ++i) {
1202       key->image_format_conversion[i].emulated_format = sel_ctx->ctx->image_view_emulation_formats[stage][i];
1203       if (key->image_format_conversion[i].emulated_format != PIPE_FORMAT_NONE)
1204          key->image_format_conversion[i].view_format = sel_ctx->ctx->image_views[stage][i].format;
1205    }
1206 
1207    key->hash = d3d12_shader_key_hash(key);
1208 }
1209 
1210 static void
select_shader_variant(struct d3d12_selection_context * sel_ctx,d3d12_shader_selector * sel,d3d12_shader_selector * prev,d3d12_shader_selector * next)1211 select_shader_variant(struct d3d12_selection_context *sel_ctx, d3d12_shader_selector *sel,
1212                      d3d12_shader_selector *prev, d3d12_shader_selector *next)
1213 {
1214    struct d3d12_context *ctx = sel_ctx->ctx;
1215    d3d12_shader_key key;
1216    nir_shader *new_nir_variant;
1217    unsigned pstipple_binding = UINT32_MAX;
1218 
1219    d3d12_fill_shader_key(sel_ctx, &key, sel, prev, next);
1220 
1221    /* Check for an existing variant */
1222    for (d3d12_shader *variant = sel->first; variant;
1223         variant = variant->next_variant) {
1224 
1225       if (d3d12_compare_shader_keys(sel_ctx, &key, &variant->key)) {
1226          sel->current = variant;
1227          return;
1228       }
1229    }
1230 
1231    /* Clone the NIR shader */
1232    new_nir_variant = nir_shader_clone(sel, sel->initial);
1233 
1234    /* Apply any needed lowering passes */
1235    if (key.stage == PIPE_SHADER_GEOMETRY) {
1236       if (key.gs.writes_psize) {
1237          NIR_PASS_V(new_nir_variant, d3d12_lower_point_sprite,
1238                     !key.gs.sprite_origin_upper_left,
1239                     key.gs.point_size_per_vertex,
1240                     key.gs.sprite_coord_enable,
1241                     key.next_varying_inputs);
1242 
1243          nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
1244          nir_shader_gather_info(new_nir_variant, impl);
1245       }
1246 
1247       if (key.gs.primitive_id) {
1248          NIR_PASS_V(new_nir_variant, d3d12_lower_primitive_id);
1249 
1250          nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
1251          nir_shader_gather_info(new_nir_variant, impl);
1252       }
1253 
1254       if (key.gs.triangle_strip)
1255          NIR_PASS_V(new_nir_variant, d3d12_lower_triangle_strip);
1256    }
1257    else if (key.stage == PIPE_SHADER_FRAGMENT)
1258    {
1259       if (key.fs.polygon_stipple) {
1260          NIR_PASS_V(new_nir_variant, nir_lower_pstipple_fs,
1261                     &pstipple_binding, 0, false, nir_type_bool1);
1262 
1263          nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
1264          nir_shader_gather_info(new_nir_variant, impl);
1265       }
1266 
1267       if (key.fs.remap_front_facing) {
1268          dxil_nir_forward_front_face(new_nir_variant);
1269 
1270          nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
1271          nir_shader_gather_info(new_nir_variant, impl);
1272       }
1273 
1274       if (key.fs.missing_dual_src_outputs) {
1275          NIR_PASS_V(new_nir_variant, d3d12_add_missing_dual_src_target,
1276                     key.fs.missing_dual_src_outputs);
1277       } else if (key.fs.frag_result_color_lowering) {
1278          NIR_PASS_V(new_nir_variant, nir_lower_fragcolor,
1279                     key.fs.frag_result_color_lowering);
1280       }
1281 
1282       if (key.fs.manual_depth_range)
1283          NIR_PASS_V(new_nir_variant, d3d12_lower_depth_range);
1284    }
1285 
1286 
1287    if (sel->compare_with_lod_bias_grad) {
1288       STATIC_ASSERT(sizeof(dxil_texture_swizzle_state) ==
1289                     sizeof(nir_lower_tex_shadow_swizzle));
1290 
1291       NIR_PASS_V(new_nir_variant, nir_lower_tex_shadow, key.n_texture_states,
1292                  key.sampler_compare_funcs, (nir_lower_tex_shadow_swizzle *)key.swizzle_state);
1293    }
1294 
1295    if (key.stage == PIPE_SHADER_FRAGMENT) {
1296       if (key.fs.cast_to_uint)
1297          NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, false);
1298       if (key.fs.cast_to_int)
1299          NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, true);
1300    }
1301 
1302    if (key.n_images) {
1303       d3d12_image_format_conversion_info_arr image_format_arr = { key.n_images, key.image_format_conversion };
1304       NIR_PASS_V(new_nir_variant, d3d12_lower_image_casts, &image_format_arr);
1305    }
1306 
1307    if (key.stage == PIPE_SHADER_COMPUTE && sel->workgroup_size_variable) {
1308       new_nir_variant->info.workgroup_size[0] = key.cs.workgroup_size[0];
1309       new_nir_variant->info.workgroup_size[1] = key.cs.workgroup_size[1];
1310       new_nir_variant->info.workgroup_size[2] = key.cs.workgroup_size[2];
1311    }
1312 
1313    if (new_nir_variant->info.stage == MESA_SHADER_TESS_CTRL) {
1314       new_nir_variant->info.tess._primitive_mode = (tess_primitive_mode)key.hs.primitive_mode;
1315       new_nir_variant->info.tess.ccw = key.hs.ccw;
1316       new_nir_variant->info.tess.point_mode = key.hs.point_mode;
1317       new_nir_variant->info.tess.spacing = key.hs.spacing;
1318 
1319       NIR_PASS_V(new_nir_variant, dxil_nir_set_tcs_patches_in, key.hs.patch_vertices_in);
1320    } else if (new_nir_variant->info.stage == MESA_SHADER_TESS_EVAL) {
1321       new_nir_variant->info.tess.tcs_vertices_out = key.ds.tcs_vertices_out;
1322    }
1323 
1324    {
1325       struct nir_lower_tex_options tex_options = { };
1326       tex_options.lower_txp = ~0u; /* No equivalent for textureProj */
1327       tex_options.lower_rect = true;
1328       tex_options.lower_rect_offset = true;
1329       tex_options.saturate_s = key.tex_saturate_s;
1330       tex_options.saturate_r = key.tex_saturate_r;
1331       tex_options.saturate_t = key.tex_saturate_t;
1332       tex_options.lower_invalid_implicit_lod = true;
1333       tex_options.lower_tg4_offsets = true;
1334 
1335       NIR_PASS_V(new_nir_variant, nir_lower_tex, &tex_options);
1336    }
1337 
1338    /* Add the needed in and outputs, and re-sort */
1339    if (prev) {
1340       if (key.required_varying_inputs != nullptr) {
1341          uint64_t mask = key.required_varying_inputs->mask & ~new_nir_variant->info.inputs_read;
1342          new_nir_variant->info.inputs_read |= mask;
1343          while (mask) {
1344             int slot = u_bit_scan64(&mask);
1345             create_varyings_from_info(new_nir_variant, key.required_varying_inputs, slot, nir_var_shader_in, false);
1346          }
1347       }
1348 
1349       if (sel->stage == PIPE_SHADER_TESS_EVAL) {
1350          uint32_t patch_mask = (uint32_t)key.ds.required_patch_inputs->mask & ~new_nir_variant->info.patch_inputs_read;
1351          new_nir_variant->info.patch_inputs_read |= patch_mask;
1352          while (patch_mask) {
1353             int slot = u_bit_scan(&patch_mask);
1354             create_varyings_from_info(new_nir_variant, key.ds.required_patch_inputs, slot, nir_var_shader_in, true);
1355          }
1356       }
1357       dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_in,
1358                                       key.prev_varying_outputs);
1359    }
1360 
1361 
1362    if (next) {
1363       if (key.required_varying_outputs != nullptr) {
1364          uint64_t mask = key.required_varying_outputs->mask & ~new_nir_variant->info.outputs_written;
1365          new_nir_variant->info.outputs_written |= mask;
1366          while (mask) {
1367             int slot = u_bit_scan64(&mask);
1368             create_varyings_from_info(new_nir_variant, key.required_varying_outputs, slot, nir_var_shader_out, false);
1369          }
1370       }
1371 
1372       if (sel->stage == PIPE_SHADER_TESS_CTRL &&
1373             key.hs.required_patch_outputs != nullptr) {
1374          uint32_t patch_mask = (uint32_t)key.hs.required_patch_outputs->mask & ~new_nir_variant->info.patch_outputs_written;
1375          new_nir_variant->info.patch_outputs_written |= patch_mask;
1376          while (patch_mask) {
1377             int slot = u_bit_scan(&patch_mask);
1378             create_varyings_from_info(new_nir_variant, key.hs.required_patch_outputs, slot, nir_var_shader_out, true);
1379          }
1380       }
1381       dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_out,
1382                                      key.next_varying_inputs);
1383    }
1384 
1385    d3d12_shader *new_variant = compile_nir(ctx, sel, &key, new_nir_variant);
1386    assert(new_variant);
1387 
1388    /* keep track of polygon stipple texture binding */
1389    new_variant->pstipple_binding = pstipple_binding;
1390 
1391    /* prepend the new shader in the selector chain and pick it */
1392    new_variant->next_variant = sel->first;
1393    sel->current = sel->first = new_variant;
1394 }
1395 
1396 static d3d12_shader_selector *
get_prev_shader(struct d3d12_context * ctx,pipe_shader_type current)1397 get_prev_shader(struct d3d12_context *ctx, pipe_shader_type current)
1398 {
1399    switch (current) {
1400    case PIPE_SHADER_VERTEX:
1401       return NULL;
1402    case PIPE_SHADER_FRAGMENT:
1403       if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
1404          return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1405       FALLTHROUGH;
1406    case PIPE_SHADER_GEOMETRY:
1407       if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
1408          return ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
1409       FALLTHROUGH;
1410    case PIPE_SHADER_TESS_EVAL:
1411       if (ctx->gfx_stages[PIPE_SHADER_TESS_CTRL])
1412          return ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
1413       FALLTHROUGH;
1414    case PIPE_SHADER_TESS_CTRL:
1415       return ctx->gfx_stages[PIPE_SHADER_VERTEX];
1416    default:
1417       unreachable("shader type not supported");
1418    }
1419 }
1420 
1421 static d3d12_shader_selector *
get_next_shader(struct d3d12_context * ctx,pipe_shader_type current)1422 get_next_shader(struct d3d12_context *ctx, pipe_shader_type current)
1423 {
1424    switch (current) {
1425    case PIPE_SHADER_VERTEX:
1426       if (ctx->gfx_stages[PIPE_SHADER_TESS_CTRL])
1427          return ctx->gfx_stages[PIPE_SHADER_TESS_CTRL];
1428       FALLTHROUGH;
1429    case PIPE_SHADER_TESS_CTRL:
1430       if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL])
1431          return ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
1432       FALLTHROUGH;
1433    case PIPE_SHADER_TESS_EVAL:
1434       if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
1435          return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1436       FALLTHROUGH;
1437    case PIPE_SHADER_GEOMETRY:
1438       return ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
1439    case PIPE_SHADER_FRAGMENT:
1440       return NULL;
1441    default:
1442       unreachable("shader type not supported");
1443    }
1444 }
1445 
1446 enum tex_scan_flags {
1447    TEX_SAMPLE_INTEGER_TEXTURE = 1 << 0,
1448    TEX_CMP_WITH_LOD_BIAS_GRAD = 1 << 1,
1449    TEX_SCAN_ALL_FLAGS         = (1 << 2) - 1
1450 };
1451 
1452 static unsigned
scan_texture_use(nir_shader * nir)1453 scan_texture_use(nir_shader *nir)
1454 {
1455    unsigned result = 0;
1456    nir_foreach_function_impl(impl, nir) {
1457       nir_foreach_block(block, impl) {
1458          nir_foreach_instr(instr, block) {
1459             if (instr->type == nir_instr_type_tex) {
1460                auto tex = nir_instr_as_tex(instr);
1461                switch (tex->op) {
1462                case nir_texop_txb:
1463                case nir_texop_txl:
1464                case nir_texop_txd:
1465                   if (tex->is_shadow)
1466                      result |= TEX_CMP_WITH_LOD_BIAS_GRAD;
1467                   FALLTHROUGH;
1468                case nir_texop_tex:
1469                   if (tex->dest_type & (nir_type_int | nir_type_uint))
1470                      result |= TEX_SAMPLE_INTEGER_TEXTURE;
1471                default:
1472                   ;
1473                }
1474             }
1475             if (TEX_SCAN_ALL_FLAGS == result)
1476                return result;
1477          }
1478       }
1479    }
1480    return result;
1481 }
1482 
1483 static uint64_t
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)1484 update_so_info(struct pipe_stream_output_info *so_info,
1485                uint64_t outputs_written)
1486 {
1487    uint64_t so_outputs = 0;
1488    uint8_t reverse_map[64] = {0};
1489    unsigned slot = 0;
1490 
1491    while (outputs_written)
1492       reverse_map[slot++] = u_bit_scan64(&outputs_written);
1493 
1494    for (unsigned i = 0; i < so_info->num_outputs; i++) {
1495       struct pipe_stream_output *output = &so_info->output[i];
1496 
1497       /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
1498       output->register_index = reverse_map[output->register_index];
1499 
1500       so_outputs |= 1ull << output->register_index;
1501    }
1502 
1503    return so_outputs;
1504 }
1505 
1506 static struct d3d12_shader_selector *
d3d12_create_shader_impl(struct d3d12_context * ctx,struct d3d12_shader_selector * sel,struct nir_shader * nir,struct d3d12_shader_selector * prev,struct d3d12_shader_selector * next)1507 d3d12_create_shader_impl(struct d3d12_context *ctx,
1508                          struct d3d12_shader_selector *sel,
1509                          struct nir_shader *nir,
1510                          struct d3d12_shader_selector *prev,
1511                          struct d3d12_shader_selector *next)
1512 {
1513    unsigned tex_scan_result = scan_texture_use(nir);
1514    sel->samples_int_textures = (tex_scan_result & TEX_SAMPLE_INTEGER_TEXTURE) != 0;
1515    sel->compare_with_lod_bias_grad = (tex_scan_result & TEX_CMP_WITH_LOD_BIAS_GRAD) != 0;
1516    sel->workgroup_size_variable = nir->info.workgroup_size_variable;
1517 
1518    /* Integer cube maps are not supported in DirectX because sampling is not supported
1519     * on integer textures and TextureLoad is not supported for cube maps, so we have to
1520     * lower integer cube maps to be handled like 2D textures arrays*/
1521    NIR_PASS_V(nir, dxil_nir_lower_int_cubemaps, true);
1522 
1523    /* Keep this initial shader as the blue print for possible variants */
1524    sel->initial = nir;
1525    sel->initial_output_vars = nullptr;
1526    sel->gs_key.varyings = nullptr;
1527    sel->tcs_key.varyings = nullptr;
1528 
1529    /*
1530     * We must compile some shader here, because if the previous or a next shaders exists later
1531     * when the shaders are bound, then the key evaluation in the shader selector will access
1532     * the current variant of these  prev and next shader, and we can only assign
1533     * a current variant when it has been successfully compiled.
1534     *
1535     * For shaders that require lowering because certain instructions are not available
1536     * and their emulation is state depended (like sampling an integer texture that must be
1537     * emulated and needs handling of boundary conditions, or shadow compare sampling with LOD),
1538     * we must go through the shader selector here to create a compilable variant.
1539     * For shaders that are not depended on the state this is just compiling the original
1540     * shader.
1541     *
1542     * TODO: get rid of having to compiling the shader here if it can be forseen that it will
1543     * be thrown away (i.e. it depends on states that are likely to change before the shader is
1544     * used for the first time)
1545     */
1546    struct d3d12_selection_context sel_ctx = {0};
1547    sel_ctx.ctx = ctx;
1548    select_shader_variant(&sel_ctx, sel, prev, next);
1549 
1550    if (!sel->current) {
1551       ralloc_free(sel);
1552       return NULL;
1553    }
1554 
1555    return sel;
1556 }
1557 
1558 struct d3d12_shader_selector *
d3d12_create_shader(struct d3d12_context * ctx,pipe_shader_type stage,const struct pipe_shader_state * shader)1559 d3d12_create_shader(struct d3d12_context *ctx,
1560                     pipe_shader_type stage,
1561                     const struct pipe_shader_state *shader)
1562 {
1563    struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
1564    sel->stage = stage;
1565 
1566    struct nir_shader *nir = NULL;
1567 
1568    if (shader->type == PIPE_SHADER_IR_NIR) {
1569       nir = (nir_shader *)shader->ir.nir;
1570    } else {
1571       assert(shader->type == PIPE_SHADER_IR_TGSI);
1572       nir = tgsi_to_nir(shader->tokens, ctx->base.screen, false);
1573    }
1574 
1575    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1576    memcpy(&sel->so_info, &shader->stream_output, sizeof(sel->so_info));
1577    update_so_info(&sel->so_info, nir->info.outputs_written);
1578 
1579    assert(nir != NULL);
1580    d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage);
1581    d3d12_shader_selector *next = get_next_shader(ctx, sel->stage);
1582 
1583    NIR_PASS_V(nir, dxil_nir_split_clip_cull_distance);
1584    NIR_PASS_V(nir, d3d12_split_needed_varyings);
1585 
1586    if (nir->info.stage != MESA_SHADER_VERTEX) {
1587       nir->info.inputs_read =
1588       dxil_reassign_driver_locations(nir, nir_var_shader_in,
1589                                      prev ? prev->current->nir->info.outputs_written : 0);
1590    } else {
1591       nir->info.inputs_read = dxil_sort_by_driver_location(nir, nir_var_shader_in);
1592 
1593       uint32_t driver_loc = 0;
1594       nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) {
1595          var->data.driver_location = driver_loc;
1596          driver_loc += glsl_count_attribute_slots(var->type, false);
1597       }
1598    }
1599 
1600    if (nir->info.stage != MESA_SHADER_FRAGMENT) {
1601       nir->info.outputs_written =
1602             dxil_reassign_driver_locations(nir, nir_var_shader_out,
1603                                             next ? next->current->nir->info.inputs_read : 0);
1604    } else {
1605       NIR_PASS_V(nir, nir_lower_fragcoord_wtrans);
1606       NIR_PASS_V(nir, dxil_nir_lower_sample_pos);
1607       dxil_sort_ps_outputs(nir);
1608    }
1609 
1610    return d3d12_create_shader_impl(ctx, sel, nir, prev, next);
1611 }
1612 
1613 struct d3d12_shader_selector *
d3d12_create_compute_shader(struct d3d12_context * ctx,const struct pipe_compute_state * shader)1614 d3d12_create_compute_shader(struct d3d12_context *ctx,
1615                             const struct pipe_compute_state *shader)
1616 {
1617    struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
1618    sel->stage = PIPE_SHADER_COMPUTE;
1619 
1620    struct nir_shader *nir = NULL;
1621 
1622    if (shader->ir_type == PIPE_SHADER_IR_NIR) {
1623       nir = (nir_shader *)shader->prog;
1624    } else {
1625       assert(shader->ir_type == PIPE_SHADER_IR_TGSI);
1626       nir = tgsi_to_nir(shader->prog, ctx->base.screen, false);
1627    }
1628 
1629    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1630 
1631    NIR_PASS_V(nir, d3d12_lower_compute_state_vars);
1632 
1633    return d3d12_create_shader_impl(ctx, sel, nir, nullptr, nullptr);
1634 }
1635 
1636 void
d3d12_select_shader_variants(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)1637 d3d12_select_shader_variants(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
1638 {
1639    struct d3d12_selection_context sel_ctx;
1640 
1641    sel_ctx.ctx = ctx;
1642    sel_ctx.needs_point_sprite_lowering = needs_point_sprite_lowering(ctx, dinfo);
1643    sel_ctx.fill_mode_lowered = fill_mode_lowered(ctx, dinfo);
1644    sel_ctx.cull_mode_lowered = cull_mode_lowered(ctx, sel_ctx.fill_mode_lowered);
1645    sel_ctx.provoking_vertex = get_provoking_vertex(&sel_ctx, &sel_ctx.alternate_tri, dinfo);
1646    sel_ctx.needs_vertex_reordering = needs_vertex_reordering(&sel_ctx, dinfo);
1647    sel_ctx.missing_dual_src_outputs = ctx->missing_dual_src_outputs;
1648    sel_ctx.frag_result_color_lowering = frag_result_color_lowering(ctx);
1649    sel_ctx.manual_depth_range = ctx->manual_depth_range;
1650 
1651    d3d12_shader_selector* gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
1652    if (gs == nullptr || gs->is_variant) {
1653       if (sel_ctx.fill_mode_lowered != PIPE_POLYGON_MODE_FILL || sel_ctx.needs_point_sprite_lowering || sel_ctx.needs_vertex_reordering)
1654          validate_geometry_shader_variant(&sel_ctx);
1655       else if (gs != nullptr) {
1656          ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = NULL;
1657       }
1658    }
1659 
1660    validate_tess_ctrl_shader_variant(&sel_ctx);
1661 
1662    auto* stages = ctx->gfx_stages;
1663    d3d12_shader_selector* prev;
1664    d3d12_shader_selector* next;
1665    if (stages[PIPE_SHADER_VERTEX]) {
1666       next = get_next_shader(ctx, PIPE_SHADER_VERTEX);
1667       select_shader_variant(&sel_ctx, stages[PIPE_SHADER_VERTEX], nullptr, next);
1668    }
1669    if (stages[PIPE_SHADER_TESS_CTRL]) {
1670       prev = get_prev_shader(ctx, PIPE_SHADER_TESS_CTRL);
1671       next = get_next_shader(ctx, PIPE_SHADER_TESS_CTRL);
1672       select_shader_variant(&sel_ctx, stages[PIPE_SHADER_TESS_CTRL], prev, next);
1673    }
1674    if (stages[PIPE_SHADER_TESS_EVAL]) {
1675       prev = get_prev_shader(ctx, PIPE_SHADER_TESS_EVAL);
1676       next = get_next_shader(ctx, PIPE_SHADER_TESS_EVAL);
1677       select_shader_variant(&sel_ctx, stages[PIPE_SHADER_TESS_EVAL], prev, next);
1678    }
1679    if (stages[PIPE_SHADER_GEOMETRY]) {
1680       prev = get_prev_shader(ctx, PIPE_SHADER_GEOMETRY);
1681       next = get_next_shader(ctx, PIPE_SHADER_GEOMETRY);
1682       select_shader_variant(&sel_ctx, stages[PIPE_SHADER_GEOMETRY], prev, next);
1683    }
1684    if (stages[PIPE_SHADER_FRAGMENT]) {
1685       prev = get_prev_shader(ctx, PIPE_SHADER_FRAGMENT);
1686       select_shader_variant(&sel_ctx, stages[PIPE_SHADER_FRAGMENT], prev, nullptr);
1687    }
1688 }
1689 
1690 static const unsigned *
workgroup_size_variable(struct d3d12_context * ctx,const struct pipe_grid_info * info)1691 workgroup_size_variable(struct d3d12_context *ctx,
1692                         const struct pipe_grid_info *info)
1693 {
1694    if (ctx->compute_state->workgroup_size_variable)
1695       return info->block;
1696    return nullptr;
1697 }
1698 
1699 void
d3d12_select_compute_shader_variants(struct d3d12_context * ctx,const struct pipe_grid_info * info)1700 d3d12_select_compute_shader_variants(struct d3d12_context *ctx, const struct pipe_grid_info *info)
1701 {
1702    struct d3d12_selection_context sel_ctx = {};
1703 
1704    sel_ctx.ctx = ctx;
1705    sel_ctx.variable_workgroup_size = workgroup_size_variable(ctx, info);
1706 
1707    select_shader_variant(&sel_ctx, ctx->compute_state, nullptr, nullptr);
1708 }
1709 
1710 void
d3d12_shader_free(struct d3d12_shader_selector * sel)1711 d3d12_shader_free(struct d3d12_shader_selector *sel)
1712 {
1713    auto shader = sel->first;
1714    while (shader) {
1715       free(shader->bytecode);
1716       shader = shader->next_variant;
1717    }
1718 
1719    ralloc_free((void*)sel->initial);
1720    ralloc_free(sel);
1721 }
1722