• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "d3d12_compiler.h"
25 #include "d3d12_context.h"
26 #include "d3d12_debug.h"
27 #include "d3d12_screen.h"
28 #include "d3d12_nir_passes.h"
29 #include "nir_to_dxil.h"
30 #include "dxil_nir.h"
31 
32 #include "pipe/p_state.h"
33 
34 #include "nir.h"
35 #include "nir/nir_draw_helpers.h"
36 #include "nir/tgsi_to_nir.h"
37 #include "compiler/nir/nir_builder.h"
38 #include "tgsi/tgsi_from_mesa.h"
39 #include "tgsi/tgsi_ureg.h"
40 
41 #include "util/u_memory.h"
42 #include "util/u_prim.h"
43 #include "util/u_simple_shaders.h"
44 #include "util/u_dl.h"
45 
46 #include <directx/d3d12.h>
47 #include <dxguids/dxguids.h>
48 
49 #include <dxcapi.h>
50 #include <wrl/client.h>
51 
52 extern "C" {
53 #include "tgsi/tgsi_parse.h"
54 #include "tgsi/tgsi_point_sprite.h"
55 }
56 
57 using Microsoft::WRL::ComPtr;
58 
59 struct d3d12_validation_tools
60 {
61    d3d12_validation_tools();
62 
63    bool validate_and_sign(struct blob *dxil);
64 
65    void disassemble(struct blob *dxil);
66 
67    void load_dxil_dll();
68 
69    struct HModule {
70       HModule();
71       ~HModule();
72 
73       bool load(LPCSTR file_name);
74       operator util_dl_library *() const;
75    private:
76       util_dl_library *module;
77    };
78 
79    HModule dxil_module;
80    HModule dxc_compiler_module;
81    ComPtr<IDxcCompiler> compiler;
82    ComPtr<IDxcValidator> validator;
83    ComPtr<IDxcLibrary> library;
84 };
85 
d3d12_validator_create()86 struct d3d12_validation_tools *d3d12_validator_create()
87 {
88    d3d12_validation_tools *tools = new d3d12_validation_tools();
89    if (tools->validator)
90       return tools;
91    delete tools;
92    return nullptr;
93 }
94 
d3d12_validator_destroy(struct d3d12_validation_tools * validator)95 void d3d12_validator_destroy(struct d3d12_validation_tools *validator)
96 {
97    delete validator;
98 }
99 
100 
101 const void *
d3d12_get_compiler_options(struct pipe_screen * screen,enum pipe_shader_ir ir,enum pipe_shader_type shader)102 d3d12_get_compiler_options(struct pipe_screen *screen,
103                            enum pipe_shader_ir ir,
104                            enum pipe_shader_type shader)
105 {
106    assert(ir == PIPE_SHADER_IR_NIR);
107    return dxil_get_nir_compiler_options();
108 }
109 
110 static uint32_t
resource_dimension(enum glsl_sampler_dim dim)111 resource_dimension(enum glsl_sampler_dim dim)
112 {
113    switch (dim) {
114    case GLSL_SAMPLER_DIM_1D:
115       return RESOURCE_DIMENSION_TEXTURE1D;
116    case GLSL_SAMPLER_DIM_2D:
117       return RESOURCE_DIMENSION_TEXTURE2D;
118    case GLSL_SAMPLER_DIM_3D:
119       return RESOURCE_DIMENSION_TEXTURE3D;
120    case GLSL_SAMPLER_DIM_CUBE:
121       return RESOURCE_DIMENSION_TEXTURECUBE;
122    default:
123       return RESOURCE_DIMENSION_UNKNOWN;
124    }
125 }
126 
127 static struct d3d12_shader *
compile_nir(struct d3d12_context * ctx,struct d3d12_shader_selector * sel,struct d3d12_shader_key * key,struct nir_shader * nir)128 compile_nir(struct d3d12_context *ctx, struct d3d12_shader_selector *sel,
129             struct d3d12_shader_key *key, struct nir_shader *nir)
130 {
131    struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
132    struct d3d12_shader *shader = rzalloc(sel, d3d12_shader);
133    shader->key = *key;
134    shader->nir = nir;
135    sel->current = shader;
136 
137    NIR_PASS_V(nir, nir_lower_samplers);
138    NIR_PASS_V(nir, dxil_nir_create_bare_samplers);
139 
140    if (key->samples_int_textures)
141       NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex,
142                  key->tex_wrap_states, key->swizzle_state,
143                  screen->base.get_paramf(&screen->base, PIPE_CAPF_MAX_TEXTURE_LOD_BIAS));
144 
145    if (key->vs.needs_format_emulation)
146       d3d12_nir_lower_vs_vertex_conversion(nir, key->vs.format_conversion);
147 
148    uint32_t num_ubos_before_lower_to_ubo = nir->info.num_ubos;
149    uint32_t num_uniforms_before_lower_to_ubo = nir->num_uniforms;
150    NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, false);
151    shader->has_default_ubo0 = num_uniforms_before_lower_to_ubo > 0 &&
152                               nir->info.num_ubos > num_ubos_before_lower_to_ubo;
153 
154    if (key->last_vertex_processing_stage) {
155       if (key->invert_depth)
156          NIR_PASS_V(nir, d3d12_nir_invert_depth);
157       NIR_PASS_V(nir, nir_lower_clip_halfz);
158       NIR_PASS_V(nir, d3d12_lower_yflip);
159    }
160    NIR_PASS_V(nir, nir_lower_packed_ubo_loads);
161    NIR_PASS_V(nir, d3d12_lower_load_first_vertex);
162    NIR_PASS_V(nir, d3d12_lower_state_vars, shader);
163    NIR_PASS_V(nir, dxil_nir_lower_bool_input);
164 
165    struct nir_to_dxil_options opts = {};
166    opts.interpolate_at_vertex = screen->have_load_at_vertex;
167    opts.lower_int16 = !screen->opts4.Native16BitShaderOpsSupported;
168    opts.ubo_binding_offset = shader->has_default_ubo0 ? 0 : 1;
169    opts.provoking_vertex = key->fs.provoking_vertex;
170 
171    struct blob tmp;
172    if (!nir_to_dxil(nir, &opts, &tmp)) {
173       debug_printf("D3D12: nir_to_dxil failed\n");
174       return NULL;
175    }
176 
177    // Non-ubo variables
178    shader->begin_srv_binding = (UINT_MAX);
179    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
180       auto type = glsl_without_array(var->type);
181       if (glsl_type_is_sampler(type) && glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
182          unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
183          for (unsigned i = 0; i < count; ++i) {
184             shader->srv_bindings[var->data.binding + i].binding = var->data.binding;
185             shader->srv_bindings[var->data.binding + i].dimension = resource_dimension(glsl_get_sampler_dim(type));
186          }
187          shader->begin_srv_binding = MIN2(var->data.binding, shader->begin_srv_binding);
188          shader->end_srv_binding = MAX2(var->data.binding + count, shader->end_srv_binding);
189       }
190    }
191 
192    // Ubo variables
193    if(nir->info.num_ubos) {
194       // Ignore state_vars ubo as it is bound as root constants
195       unsigned num_ubo_bindings = nir->info.num_ubos - (shader->state_vars_used ? 1 : 0);
196       for(unsigned i = opts.ubo_binding_offset; i < num_ubo_bindings; ++i) {
197          shader->cb_bindings[shader->num_cb_bindings++].binding = i;
198       }
199    }
200    if (ctx->validation_tools) {
201       ctx->validation_tools->validate_and_sign(&tmp);
202 
203       if (d3d12_debug & D3D12_DEBUG_DISASS) {
204          ctx->validation_tools->disassemble(&tmp);
205       }
206    }
207 
208    blob_finish_get_buffer(&tmp, &shader->bytecode, &shader->bytecode_length);
209 
210    if (d3d12_debug & D3D12_DEBUG_DXIL) {
211       char buf[256];
212       static int i;
213       snprintf(buf, sizeof(buf), "dump%02d.dxil", i++);
214       FILE *fp = fopen(buf, "wb");
215       fwrite(shader->bytecode, sizeof(char), shader->bytecode_length, fp);
216       fclose(fp);
217       fprintf(stderr, "wrote '%s'...\n", buf);
218    }
219    return shader;
220 }
221 
222 struct d3d12_selection_context {
223    struct d3d12_context *ctx;
224    const struct pipe_draw_info *dinfo;
225    bool needs_point_sprite_lowering;
226    bool needs_vertex_reordering;
227    unsigned provoking_vertex;
228    bool alternate_tri;
229    unsigned fill_mode_lowered;
230    unsigned cull_mode_lowered;
231    bool manual_depth_range;
232    unsigned missing_dual_src_outputs;
233    unsigned frag_result_color_lowering;
234 };
235 
236 static unsigned
missing_dual_src_outputs(struct d3d12_context * ctx)237 missing_dual_src_outputs(struct d3d12_context *ctx)
238 {
239    if (!ctx->gfx_pipeline_state.blend->is_dual_src)
240       return 0;
241 
242    struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
243    nir_shader *s = fs->initial;
244 
245    unsigned indices_seen = 0;
246    nir_foreach_function(function, s) {
247       if (function->impl) {
248          nir_foreach_block(block, function->impl) {
249             nir_foreach_instr(instr, block) {
250                if (instr->type != nir_instr_type_intrinsic)
251                   continue;
252 
253                nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
254                if (intr->intrinsic != nir_intrinsic_store_deref)
255                   continue;
256 
257                nir_variable *var = nir_intrinsic_get_var(intr, 0);
258                if (var->data.mode != nir_var_shader_out ||
259                    (var->data.location != FRAG_RESULT_COLOR &&
260                     var->data.location != FRAG_RESULT_DATA0))
261                   continue;
262 
263                indices_seen |= 1u << var->data.index;
264                if ((indices_seen & 3) == 3)
265                   return 0;
266             }
267          }
268       }
269    }
270 
271    return 3 & ~indices_seen;
272 }
273 
274 static unsigned
frag_result_color_lowering(struct d3d12_context * ctx)275 frag_result_color_lowering(struct d3d12_context *ctx)
276 {
277    struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
278    assert(fs);
279 
280    if (fs->initial->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR))
281       return ctx->fb.nr_cbufs > 1 ? ctx->fb.nr_cbufs : 0;
282 
283    return 0;
284 }
285 
286 static bool
manual_depth_range(struct d3d12_context * ctx)287 manual_depth_range(struct d3d12_context *ctx)
288 {
289    if (!d3d12_need_zero_one_depth_range(ctx))
290       return false;
291 
292    /**
293     * If we can't use the D3D12 zero-one depth-range, we might have to apply
294     * depth-range ourselves.
295     *
296     * Because we only need to override the depth-range to zero-one range in
297     * the case where we write frag-depth, we only need to apply manual
298     * depth-range to gl_FragCoord.z.
299     *
300     * No extra care is needed to be taken in the case where gl_FragDepth is
301     * written conditionally, because the GLSL 4.60 spec states:
302     *
303     *    If a shader statically assigns a value to gl_FragDepth, and there
304     *    is an execution path through the shader that does not set
305     *    gl_FragDepth, then the value of the fragment’s depth may be
306     *    undefined for executions of the shader that take that path. That
307     *    is, if the set of linked fragment shaders statically contain a
308     *    write to gl_FragDepth, then it is responsible for always writing
309     *    it.
310     */
311 
312    struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
313    return fs && fs->initial->info.inputs_read & VARYING_BIT_POS;
314 }
315 
316 static bool
needs_edge_flag_fix(enum pipe_prim_type mode)317 needs_edge_flag_fix(enum pipe_prim_type mode)
318 {
319    return (mode == PIPE_PRIM_QUADS ||
320            mode == PIPE_PRIM_QUAD_STRIP ||
321            mode == PIPE_PRIM_POLYGON);
322 }
323 
324 static unsigned
fill_mode_lowered(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)325 fill_mode_lowered(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
326 {
327    struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
328 
329    if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
330         !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_gs_variant) ||
331        ctx->gfx_pipeline_state.rast == NULL ||
332        (dinfo->mode != PIPE_PRIM_TRIANGLES &&
333         dinfo->mode != PIPE_PRIM_TRIANGLE_STRIP))
334       return PIPE_POLYGON_MODE_FILL;
335 
336    /* D3D12 supports line mode (wireframe) but doesn't support edge flags */
337    if (((ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_LINE &&
338          ctx->gfx_pipeline_state.rast->base.cull_face != PIPE_FACE_FRONT) ||
339         (ctx->gfx_pipeline_state.rast->base.fill_back == PIPE_POLYGON_MODE_LINE &&
340          ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_FRONT)) &&
341        (vs->initial->info.outputs_written & VARYING_BIT_EDGE ||
342         needs_edge_flag_fix(ctx->initial_api_prim)))
343       return PIPE_POLYGON_MODE_LINE;
344 
345    if (ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_POINT)
346       return PIPE_POLYGON_MODE_POINT;
347 
348    return PIPE_POLYGON_MODE_FILL;
349 }
350 
351 static bool
needs_point_sprite_lowering(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)352 needs_point_sprite_lowering(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
353 {
354    struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
355    struct d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
356 
357    if (gs != NULL && !gs->is_gs_variant) {
358       /* There is an user GS; Check if it outputs points with PSIZE */
359       return (gs->initial->info.gs.output_primitive == GL_POINTS &&
360               gs->initial->info.outputs_written & VARYING_BIT_PSIZ);
361    } else {
362       /* No user GS; check if we are drawing wide points */
363       return ((dinfo->mode == PIPE_PRIM_POINTS ||
364                fill_mode_lowered(ctx, dinfo) == PIPE_POLYGON_MODE_POINT) &&
365               (ctx->gfx_pipeline_state.rast->base.point_size > 1.0 ||
366                ctx->gfx_pipeline_state.rast->base.offset_point ||
367                (ctx->gfx_pipeline_state.rast->base.point_size_per_vertex &&
368                 vs->initial->info.outputs_written & VARYING_BIT_PSIZ)) &&
369               (vs->initial->info.outputs_written & VARYING_BIT_POS));
370    }
371 }
372 
373 static unsigned
cull_mode_lowered(struct d3d12_context * ctx,unsigned fill_mode)374 cull_mode_lowered(struct d3d12_context *ctx, unsigned fill_mode)
375 {
376    if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
377         !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_gs_variant) ||
378        ctx->gfx_pipeline_state.rast == NULL ||
379        ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_NONE)
380       return PIPE_FACE_NONE;
381 
382    return ctx->gfx_pipeline_state.rast->base.cull_face;
383 }
384 
385 static unsigned
get_provoking_vertex(struct d3d12_selection_context * sel_ctx,bool * alternate)386 get_provoking_vertex(struct d3d12_selection_context *sel_ctx, bool *alternate)
387 {
388    struct d3d12_shader_selector *vs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_VERTEX];
389    struct d3d12_shader_selector *gs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
390    struct d3d12_shader_selector *last_vertex_stage = gs && !gs->is_gs_variant ? gs : vs;
391 
392    /* Make sure GL prims match Gallium prims */
393    STATIC_ASSERT(GL_POINTS == PIPE_PRIM_POINTS);
394    STATIC_ASSERT(GL_LINES == PIPE_PRIM_LINES);
395    STATIC_ASSERT(GL_LINE_STRIP == PIPE_PRIM_LINE_STRIP);
396 
397    enum pipe_prim_type mode;
398    switch (last_vertex_stage->stage) {
399    case PIPE_SHADER_GEOMETRY:
400       mode = (enum pipe_prim_type)last_vertex_stage->current->nir->info.gs.output_primitive;
401       break;
402    case PIPE_SHADER_VERTEX:
403       mode = sel_ctx->dinfo ? (enum pipe_prim_type)sel_ctx->dinfo->mode : PIPE_PRIM_TRIANGLES;
404       break;
405    default:
406       unreachable("Tesselation shaders are not supported");
407    }
408 
409    bool flatshade_first = sel_ctx->ctx->gfx_pipeline_state.rast &&
410                           sel_ctx->ctx->gfx_pipeline_state.rast->base.flatshade_first;
411    *alternate = (mode == GL_TRIANGLE_STRIP || mode == GL_TRIANGLE_STRIP_ADJACENCY) &&
412                 (!gs || gs->is_gs_variant ||
413                  gs->initial->info.gs.vertices_out > u_prim_vertex_count(mode)->min);
414    return flatshade_first ? 0 : u_prim_vertex_count(mode)->min - 1;
415 }
416 
417 static bool
has_flat_varyings(struct d3d12_context * ctx)418 has_flat_varyings(struct d3d12_context *ctx)
419 {
420    struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
421 
422    if (!fs || !fs->current)
423       return false;
424 
425    nir_foreach_variable_with_modes(input, fs->current->nir,
426                                    nir_var_shader_in) {
427       if (input->data.interpolation == INTERP_MODE_FLAT)
428          return true;
429    }
430 
431    return false;
432 }
433 
434 static bool
needs_vertex_reordering(struct d3d12_selection_context * sel_ctx)435 needs_vertex_reordering(struct d3d12_selection_context *sel_ctx)
436 {
437    struct d3d12_context *ctx = sel_ctx->ctx;
438    bool flat = has_flat_varyings(ctx);
439    bool xfb = ctx->gfx_pipeline_state.num_so_targets > 0;
440 
441    if (fill_mode_lowered(ctx, sel_ctx->dinfo) != PIPE_POLYGON_MODE_FILL)
442       return false;
443 
444    /* TODO add support for line primitives */
445 
446    /* When flat shading a triangle and provoking vertex is not the first one, we use load_at_vertex.
447       If not available for this adapter, or if it's a triangle strip, we need to reorder the vertices */
448    if (flat && sel_ctx->provoking_vertex >= 2 && (!d3d12_screen(ctx->base.screen)->have_load_at_vertex ||
449                                                   sel_ctx->alternate_tri))
450       return true;
451 
452    /* When transform feedback is enabled and the output is alternating (triangle strip or triangle
453       strip with adjacency), we need to reorder vertices to get the order expected by OpenGL. This
454       only works when there is no flat shading involved. In that scenario, we don't care about
455       the provoking vertex. */
456    if (xfb && !flat && sel_ctx->alternate_tri) {
457       sel_ctx->provoking_vertex = 0;
458       return true;
459    }
460 
461    return false;
462 }
463 
464 static nir_variable *
create_varying_from_info(nir_shader * nir,struct d3d12_varying_info * info,unsigned slot,nir_variable_mode mode)465 create_varying_from_info(nir_shader *nir, struct d3d12_varying_info *info,
466                          unsigned slot, nir_variable_mode mode)
467 {
468    nir_variable *var;
469    char tmp[100];
470 
471    snprintf(tmp, ARRAY_SIZE(tmp),
472             mode == nir_var_shader_in ? "in_%d" : "out_%d",
473             info->vars[slot].driver_location);
474    var = nir_variable_create(nir, mode, info->vars[slot].type, tmp);
475    var->data.location = slot;
476    var->data.driver_location = info->vars[slot].driver_location;
477    var->data.interpolation = info->vars[slot].interpolation;
478 
479    return var;
480 }
481 
482 static void
fill_varyings(struct d3d12_varying_info * info,nir_shader * s,nir_variable_mode modes,uint64_t mask)483 fill_varyings(struct d3d12_varying_info *info, nir_shader *s,
484               nir_variable_mode modes, uint64_t mask)
485 {
486    nir_foreach_variable_with_modes(var, s, modes) {
487       unsigned slot = var->data.location;
488       uint64_t slot_bit = BITFIELD64_BIT(slot);
489 
490       if (!(mask & slot_bit))
491          continue;
492       info->vars[slot].driver_location = var->data.driver_location;
493       info->vars[slot].type = var->type;
494       info->vars[slot].interpolation = var->data.interpolation;
495       info->mask |= slot_bit;
496    }
497 }
498 
499 static void
fill_flat_varyings(struct d3d12_gs_variant_key * key,d3d12_shader_selector * fs)500 fill_flat_varyings(struct d3d12_gs_variant_key *key, d3d12_shader_selector *fs)
501 {
502    if (!fs || !fs->current)
503       return;
504 
505    nir_foreach_variable_with_modes(input, fs->current->nir,
506                                    nir_var_shader_in) {
507       if (input->data.interpolation == INTERP_MODE_FLAT)
508          key->flat_varyings |= BITFIELD64_BIT(input->data.location);
509    }
510 }
511 
512 static void
validate_geometry_shader_variant(struct d3d12_selection_context * sel_ctx)513 validate_geometry_shader_variant(struct d3d12_selection_context *sel_ctx)
514 {
515    struct d3d12_context *ctx = sel_ctx->ctx;
516    d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
517    d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
518    struct d3d12_gs_variant_key key = {0};
519    bool variant_needed = false;
520 
521    d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
522 
523    /* Nothing to do if there is a user geometry shader bound */
524    if (gs != NULL && !gs->is_gs_variant)
525       return;
526 
527    /* Fill the geometry shader variant key */
528    if (sel_ctx->fill_mode_lowered != PIPE_POLYGON_MODE_FILL) {
529       key.fill_mode = sel_ctx->fill_mode_lowered;
530       key.cull_mode = sel_ctx->cull_mode_lowered;
531       key.has_front_face = BITSET_TEST(fs->initial->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
532       if (key.cull_mode != PIPE_FACE_NONE || key.has_front_face)
533          key.front_ccw = ctx->gfx_pipeline_state.rast->base.front_ccw ^ (ctx->flip_y < 0);
534       key.edge_flag_fix = needs_edge_flag_fix(ctx->initial_api_prim);
535       fill_flat_varyings(&key, fs);
536       if (key.flat_varyings != 0)
537          key.flatshade_first = ctx->gfx_pipeline_state.rast->base.flatshade_first;
538       variant_needed = true;
539    } else if (sel_ctx->needs_point_sprite_lowering) {
540       key.passthrough = true;
541       variant_needed = true;
542    } else if (sel_ctx->needs_vertex_reordering) {
543       /* TODO support cases where flat shading (pv != 0) and xfb are enabled */
544       key.provoking_vertex = sel_ctx->provoking_vertex;
545       key.alternate_tri = sel_ctx->alternate_tri;
546       variant_needed = true;
547    }
548 
549    if (variant_needed) {
550       fill_varyings(&key.varyings, vs->initial, nir_var_shader_out,
551                     vs->initial->info.outputs_written);
552    }
553 
554    /* Check if the currently bound geometry shader variant is correct */
555    if (gs && memcmp(&gs->gs_key, &key, sizeof(key)) == 0)
556       return;
557 
558    /* Find/create the proper variant and bind it */
559    gs = variant_needed ? d3d12_get_gs_variant(ctx, &key) : NULL;
560    ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = gs;
561 }
562 
563 static bool
d3d12_compare_shader_keys(const d3d12_shader_key * expect,const d3d12_shader_key * have)564 d3d12_compare_shader_keys(const d3d12_shader_key *expect, const d3d12_shader_key *have)
565 {
566    assert(expect->stage == have->stage);
567    assert(expect);
568    assert(have);
569 
570    /* Because we only add varyings we check that a shader has at least the expected in-
571     * and outputs. */
572    if (memcmp(&expect->required_varying_inputs, &have->required_varying_inputs,
573               sizeof(struct d3d12_varying_info)) ||
574        memcmp(&expect->required_varying_outputs, &have->required_varying_outputs,
575               sizeof(struct d3d12_varying_info)) ||
576        (expect->next_varying_inputs != have->next_varying_inputs) ||
577        (expect->prev_varying_outputs != have->prev_varying_outputs))
578       return false;
579 
580    if (expect->stage == PIPE_SHADER_GEOMETRY) {
581       if (expect->gs.writes_psize) {
582          if (!have->gs.writes_psize ||
583              expect->gs.point_pos_stream_out != have->gs.point_pos_stream_out ||
584              expect->gs.sprite_coord_enable != have->gs.sprite_coord_enable ||
585              expect->gs.sprite_origin_upper_left != have->gs.sprite_origin_upper_left ||
586              expect->gs.point_size_per_vertex != have->gs.point_size_per_vertex)
587             return false;
588       } else if (have->gs.writes_psize) {
589          return false;
590       }
591       if (expect->gs.primitive_id != have->gs.primitive_id ||
592           expect->gs.triangle_strip != have->gs.triangle_strip)
593          return false;
594    } else if (expect->stage == PIPE_SHADER_FRAGMENT) {
595       if (expect->fs.frag_result_color_lowering != have->fs.frag_result_color_lowering ||
596           expect->fs.manual_depth_range != have->fs.manual_depth_range ||
597           expect->fs.polygon_stipple != have->fs.polygon_stipple ||
598           expect->fs.cast_to_uint != have->fs.cast_to_uint ||
599           expect->fs.cast_to_int != have->fs.cast_to_int)
600          return false;
601    }
602 
603    if (expect->tex_saturate_s != have->tex_saturate_s ||
604        expect->tex_saturate_r != have->tex_saturate_r ||
605        expect->tex_saturate_t != have->tex_saturate_t)
606       return false;
607 
608    if (expect->samples_int_textures != have->samples_int_textures)
609       return false;
610 
611    if (expect->n_texture_states != have->n_texture_states)
612       return false;
613 
614    if (memcmp(expect->tex_wrap_states, have->tex_wrap_states,
615               expect->n_texture_states * sizeof(dxil_wrap_sampler_state)))
616       return false;
617 
618    if (memcmp(expect->swizzle_state, have->swizzle_state,
619               expect->n_texture_states * sizeof(dxil_texture_swizzle_state)))
620       return false;
621 
622    if (memcmp(expect->sampler_compare_funcs, have->sampler_compare_funcs,
623               expect->n_texture_states * sizeof(enum compare_func)))
624       return false;
625 
626    if (expect->invert_depth != have->invert_depth)
627       return false;
628 
629    if (expect->stage == PIPE_SHADER_VERTEX) {
630       if (expect->vs.needs_format_emulation != have->vs.needs_format_emulation)
631          return false;
632 
633       if (expect->vs.needs_format_emulation) {
634          if (memcmp(expect->vs.format_conversion, have->vs.format_conversion,
635                     PIPE_MAX_ATTRIBS * sizeof (enum pipe_format)))
636             return false;
637       }
638    }
639 
640    if (expect->fs.provoking_vertex != have->fs.provoking_vertex)
641       return false;
642 
643    return true;
644 }
645 
646 static void
d3d12_fill_shader_key(struct d3d12_selection_context * sel_ctx,d3d12_shader_key * key,d3d12_shader_selector * sel,d3d12_shader_selector * prev,d3d12_shader_selector * next)647 d3d12_fill_shader_key(struct d3d12_selection_context *sel_ctx,
648                       d3d12_shader_key *key, d3d12_shader_selector *sel,
649                       d3d12_shader_selector *prev, d3d12_shader_selector *next)
650 {
651    pipe_shader_type stage = sel->stage;
652 
653    uint64_t system_generated_in_values =
654          VARYING_BIT_PNTC |
655          VARYING_BIT_PRIMITIVE_ID;
656 
657    uint64_t system_out_values =
658          VARYING_BIT_CLIP_DIST0 |
659          VARYING_BIT_CLIP_DIST1;
660 
661    memset(key, 0, sizeof(d3d12_shader_key));
662    key->stage = stage;
663 
664    if (prev) {
665       /* We require as inputs what the previous stage has written,
666        * except certain system values */
667       if (stage == PIPE_SHADER_FRAGMENT || stage == PIPE_SHADER_GEOMETRY)
668          system_out_values |= VARYING_BIT_POS;
669       if (stage == PIPE_SHADER_FRAGMENT)
670          system_out_values |= VARYING_BIT_PSIZ;
671       uint64_t mask = prev->current->nir->info.outputs_written & ~system_out_values;
672       fill_varyings(&key->required_varying_inputs, prev->current->nir,
673                     nir_var_shader_out, mask);
674       key->prev_varying_outputs = prev->current->nir->info.outputs_written;
675 
676       /* Set the provoking vertex based on the previous shader output. Only set the
677        * key value if the driver actually supports changing the provoking vertex though */
678       if (stage == PIPE_SHADER_FRAGMENT && sel_ctx->ctx->gfx_pipeline_state.rast &&
679           !sel_ctx->needs_vertex_reordering &&
680           d3d12_screen(sel_ctx->ctx->base.screen)->have_load_at_vertex)
681          key->fs.provoking_vertex = sel_ctx->provoking_vertex;
682    }
683 
684    /* We require as outputs what the next stage reads,
685     * except certain system values */
686    if (next) {
687       if (!next->is_gs_variant) {
688          if (stage == PIPE_SHADER_VERTEX)
689             system_generated_in_values |= VARYING_BIT_POS;
690          uint64_t mask = next->current->nir->info.inputs_read & ~system_generated_in_values;
691          fill_varyings(&key->required_varying_outputs, next->current->nir,
692                        nir_var_shader_in, mask);
693       }
694       key->next_varying_inputs = next->current->nir->info.inputs_read;
695    }
696 
697    if (stage == PIPE_SHADER_GEOMETRY ||
698        (stage == PIPE_SHADER_VERTEX && (!next || next->stage != PIPE_SHADER_GEOMETRY))) {
699       key->last_vertex_processing_stage = 1;
700       key->invert_depth = sel_ctx->ctx->reverse_depth_range;
701       if (sel_ctx->ctx->pstipple.enabled)
702          key->next_varying_inputs |= VARYING_BIT_POS;
703    }
704 
705    if (stage == PIPE_SHADER_GEOMETRY && sel_ctx->ctx->gfx_pipeline_state.rast) {
706       struct pipe_rasterizer_state *rast = &sel_ctx->ctx->gfx_pipeline_state.rast->base;
707       if (sel_ctx->needs_point_sprite_lowering) {
708          key->gs.writes_psize = 1;
709          key->gs.point_size_per_vertex = rast->point_size_per_vertex;
710          key->gs.sprite_coord_enable = rast->sprite_coord_enable;
711          key->gs.sprite_origin_upper_left = (rast->sprite_coord_mode != PIPE_SPRITE_COORD_LOWER_LEFT);
712          if (sel_ctx->ctx->flip_y < 0)
713             key->gs.sprite_origin_upper_left = !key->gs.sprite_origin_upper_left;
714          key->gs.aa_point = rast->point_smooth;
715          key->gs.stream_output_factor = 6;
716       } else if (sel_ctx->fill_mode_lowered == PIPE_POLYGON_MODE_LINE) {
717          key->gs.stream_output_factor = 2;
718       } else if (sel_ctx->needs_vertex_reordering && !sel->is_gs_variant) {
719          key->gs.triangle_strip = 1;
720       }
721 
722       if (sel->is_gs_variant && next && next->initial->info.inputs_read & VARYING_BIT_PRIMITIVE_ID)
723          key->gs.primitive_id = 1;
724    } else if (stage == PIPE_SHADER_FRAGMENT) {
725       key->fs.missing_dual_src_outputs = sel_ctx->missing_dual_src_outputs;
726       key->fs.frag_result_color_lowering = sel_ctx->frag_result_color_lowering;
727       key->fs.manual_depth_range = sel_ctx->manual_depth_range;
728       key->fs.polygon_stipple = sel_ctx->ctx->pstipple.enabled;
729       if (sel_ctx->ctx->gfx_pipeline_state.blend &&
730           sel_ctx->ctx->gfx_pipeline_state.blend->desc.RenderTarget[0].LogicOpEnable &&
731           !sel_ctx->ctx->gfx_pipeline_state.has_float_rtv) {
732          key->fs.cast_to_uint = util_format_is_unorm(sel_ctx->ctx->fb.cbufs[0]->format);
733          key->fs.cast_to_int = !key->fs.cast_to_uint;
734       }
735    }
736 
737    if (sel->samples_int_textures) {
738       key->samples_int_textures = sel->samples_int_textures;
739       key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
740       /* Copy only states with integer textures */
741       for(int i = 0; i < key->n_texture_states; ++i) {
742          auto& wrap_state = sel_ctx->ctx->tex_wrap_states[stage][i];
743          if (wrap_state.is_int_sampler) {
744             memcpy(&key->tex_wrap_states[i], &wrap_state, sizeof(wrap_state));
745             key->swizzle_state[i] = sel_ctx->ctx->tex_swizzle_state[stage][i];
746          }
747       }
748    }
749 
750    for (unsigned i = 0; i < sel_ctx->ctx->num_samplers[stage]; ++i) {
751       if (!sel_ctx->ctx->samplers[stage][i] ||
752           sel_ctx->ctx->samplers[stage][i]->filter == PIPE_TEX_FILTER_NEAREST)
753          continue;
754 
755       if (sel_ctx->ctx->samplers[stage][i]->wrap_r == PIPE_TEX_WRAP_CLAMP)
756          key->tex_saturate_r |= 1 << i;
757       if (sel_ctx->ctx->samplers[stage][i]->wrap_s == PIPE_TEX_WRAP_CLAMP)
758          key->tex_saturate_s |= 1 << i;
759       if (sel_ctx->ctx->samplers[stage][i]->wrap_t == PIPE_TEX_WRAP_CLAMP)
760          key->tex_saturate_t |= 1 << i;
761    }
762 
763    if (sel->compare_with_lod_bias_grad) {
764       key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
765       memcpy(key->sampler_compare_funcs, sel_ctx->ctx->tex_compare_func[stage],
766              key->n_texture_states * sizeof(enum compare_func));
767       memcpy(key->swizzle_state, sel_ctx->ctx->tex_swizzle_state[stage],
768              key->n_texture_states * sizeof(dxil_texture_swizzle_state));
769    }
770 
771    if (stage == PIPE_SHADER_VERTEX && sel_ctx->ctx->gfx_pipeline_state.ves) {
772       key->vs.needs_format_emulation = sel_ctx->ctx->gfx_pipeline_state.ves->needs_format_emulation;
773       if (key->vs.needs_format_emulation) {
774          memcpy(key->vs.format_conversion, sel_ctx->ctx->gfx_pipeline_state.ves->format_conversion,
775                 sel_ctx->ctx->gfx_pipeline_state.ves->num_elements * sizeof(enum pipe_format));
776       }
777    }
778 
779    if (stage == PIPE_SHADER_FRAGMENT &&
780        sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY] &&
781        sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_gs_variant &&
782        sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->gs_key.has_front_face) {
783       key->fs.remap_front_facing = 1;
784    }
785 }
786 
787 static void
select_shader_variant(struct d3d12_selection_context * sel_ctx,d3d12_shader_selector * sel,d3d12_shader_selector * prev,d3d12_shader_selector * next)788 select_shader_variant(struct d3d12_selection_context *sel_ctx, d3d12_shader_selector *sel,
789                      d3d12_shader_selector *prev, d3d12_shader_selector *next)
790 {
791    struct d3d12_context *ctx = sel_ctx->ctx;
792    d3d12_shader_key key;
793    nir_shader *new_nir_variant;
794    unsigned pstipple_binding = UINT32_MAX;
795 
796    d3d12_fill_shader_key(sel_ctx, &key, sel, prev, next);
797 
798    /* Check for an existing variant */
799    for (d3d12_shader *variant = sel->first; variant;
800         variant = variant->next_variant) {
801 
802       if (d3d12_compare_shader_keys(&key, &variant->key)) {
803          sel->current = variant;
804          return;
805       }
806    }
807 
808    /* Clone the NIR shader */
809    new_nir_variant = nir_shader_clone(sel, sel->initial);
810 
811    /* Apply any needed lowering passes */
812    if (key.gs.writes_psize) {
813       NIR_PASS_V(new_nir_variant, d3d12_lower_point_sprite,
814                  !key.gs.sprite_origin_upper_left,
815                  key.gs.point_size_per_vertex,
816                  key.gs.sprite_coord_enable,
817                  key.next_varying_inputs);
818 
819       nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
820       nir_shader_gather_info(new_nir_variant, impl);
821    }
822 
823    if (key.gs.primitive_id) {
824       NIR_PASS_V(new_nir_variant, d3d12_lower_primitive_id);
825 
826       nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
827       nir_shader_gather_info(new_nir_variant, impl);
828    }
829 
830    if (key.gs.triangle_strip)
831       NIR_PASS_V(new_nir_variant, d3d12_lower_triangle_strip);
832 
833    if (key.fs.polygon_stipple) {
834       NIR_PASS_V(new_nir_variant, nir_lower_pstipple_fs,
835                  &pstipple_binding, 0, false);
836 
837       nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
838       nir_shader_gather_info(new_nir_variant, impl);
839    }
840 
841    if (key.fs.remap_front_facing) {
842       d3d12_forward_front_face(new_nir_variant);
843 
844       nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
845       nir_shader_gather_info(new_nir_variant, impl);
846    }
847 
848    if (key.fs.missing_dual_src_outputs) {
849       NIR_PASS_V(new_nir_variant, d3d12_add_missing_dual_src_target,
850                  key.fs.missing_dual_src_outputs);
851    } else if (key.fs.frag_result_color_lowering) {
852       NIR_PASS_V(new_nir_variant, nir_lower_fragcolor,
853                  key.fs.frag_result_color_lowering);
854    }
855 
856    if (key.fs.manual_depth_range)
857       NIR_PASS_V(new_nir_variant, d3d12_lower_depth_range);
858 
859    if (sel->compare_with_lod_bias_grad)
860       NIR_PASS_V(new_nir_variant, d3d12_lower_sample_tex_compare, key.n_texture_states,
861                  key.sampler_compare_funcs, key.swizzle_state);
862 
863    if (key.fs.cast_to_uint)
864       NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, false);
865    if (key.fs.cast_to_int)
866       NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, true);
867 
868    {
869       struct nir_lower_tex_options tex_options = { };
870       tex_options.lower_txp = ~0u; /* No equivalent for textureProj */
871       tex_options.lower_rect = true;
872       tex_options.lower_rect_offset = true;
873       tex_options.saturate_s = key.tex_saturate_s;
874       tex_options.saturate_r = key.tex_saturate_r;
875       tex_options.saturate_t = key.tex_saturate_t;
876 
877       NIR_PASS_V(new_nir_variant, nir_lower_tex, &tex_options);
878    }
879 
880    /* Add the needed in and outputs, and re-sort */
881    uint64_t mask = key.required_varying_inputs.mask & ~new_nir_variant->info.inputs_read;
882 
883    if (prev) {
884       while (mask) {
885          int slot = u_bit_scan64(&mask);
886          create_varying_from_info(new_nir_variant, &key.required_varying_inputs, slot, nir_var_shader_in);
887       }
888       dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_in,
889                                       key.prev_varying_outputs);
890    }
891 
892    mask = key.required_varying_outputs.mask & ~new_nir_variant->info.outputs_written;
893 
894    if (next) {
895       while (mask) {
896          int slot = u_bit_scan64(&mask);
897          create_varying_from_info(new_nir_variant, &key.required_varying_outputs, slot, nir_var_shader_out);
898       }
899       dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_out,
900                                       key.next_varying_inputs);
901    }
902 
903    d3d12_shader *new_variant = compile_nir(ctx, sel, &key, new_nir_variant);
904    assert(new_variant);
905 
906    /* keep track of polygon stipple texture binding */
907    new_variant->pstipple_binding = pstipple_binding;
908 
909    /* prepend the new shader in the selector chain and pick it */
910    new_variant->next_variant = sel->first;
911    sel->current = sel->first = new_variant;
912 }
913 
914 static d3d12_shader_selector *
get_prev_shader(struct d3d12_context * ctx,pipe_shader_type current)915 get_prev_shader(struct d3d12_context *ctx, pipe_shader_type current)
916 {
917    /* No TESS_CTRL or TESS_EVAL yet */
918 
919    switch (current) {
920    case PIPE_SHADER_VERTEX:
921       return NULL;
922    case PIPE_SHADER_FRAGMENT:
923       if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
924          return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
925       FALLTHROUGH;
926    case PIPE_SHADER_GEOMETRY:
927       return ctx->gfx_stages[PIPE_SHADER_VERTEX];
928    default:
929       unreachable("shader type not supported");
930    }
931 }
932 
933 static d3d12_shader_selector *
get_next_shader(struct d3d12_context * ctx,pipe_shader_type current)934 get_next_shader(struct d3d12_context *ctx, pipe_shader_type current)
935 {
936    /* No TESS_CTRL or TESS_EVAL yet */
937 
938    switch (current) {
939    case PIPE_SHADER_VERTEX:
940       if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
941          return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
942       FALLTHROUGH;
943    case PIPE_SHADER_GEOMETRY:
944       return ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
945    case PIPE_SHADER_FRAGMENT:
946       return NULL;
947    default:
948       unreachable("shader type not supported");
949    }
950 }
951 
952 enum tex_scan_flags {
953    TEX_SAMPLE_INTEGER_TEXTURE = 1 << 0,
954    TEX_CMP_WITH_LOD_BIAS_GRAD = 1 << 1,
955    TEX_SCAN_ALL_FLAGS         = (1 << 2) - 1
956 };
957 
958 static unsigned
scan_texture_use(nir_shader * nir)959 scan_texture_use(nir_shader *nir)
960 {
961    unsigned result = 0;
962    nir_foreach_function(func, nir) {
963       nir_foreach_block(block, func->impl) {
964          nir_foreach_instr(instr, block) {
965             if (instr->type == nir_instr_type_tex) {
966                auto tex = nir_instr_as_tex(instr);
967                switch (tex->op) {
968                case nir_texop_txb:
969                case nir_texop_txl:
970                case nir_texop_txd:
971                   if (tex->is_shadow)
972                      result |= TEX_CMP_WITH_LOD_BIAS_GRAD;
973                   FALLTHROUGH;
974                case nir_texop_tex:
975                   if (tex->dest_type & (nir_type_int | nir_type_uint))
976                      result |= TEX_SAMPLE_INTEGER_TEXTURE;
977                default:
978                   ;
979                }
980             }
981             if (TEX_SCAN_ALL_FLAGS == result)
982                return result;
983          }
984       }
985    }
986    return result;
987 }
988 
989 static uint64_t
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)990 update_so_info(struct pipe_stream_output_info *so_info,
991                uint64_t outputs_written)
992 {
993    uint64_t so_outputs = 0;
994    uint8_t reverse_map[64] = {0};
995    unsigned slot = 0;
996 
997    while (outputs_written)
998       reverse_map[slot++] = u_bit_scan64(&outputs_written);
999 
1000    for (unsigned i = 0; i < so_info->num_outputs; i++) {
1001       struct pipe_stream_output *output = &so_info->output[i];
1002 
1003       /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
1004       output->register_index = reverse_map[output->register_index];
1005 
1006       so_outputs |= 1ull << output->register_index;
1007    }
1008 
1009    return so_outputs;
1010 }
1011 
1012 struct d3d12_shader_selector *
d3d12_create_shader(struct d3d12_context * ctx,pipe_shader_type stage,const struct pipe_shader_state * shader)1013 d3d12_create_shader(struct d3d12_context *ctx,
1014                     pipe_shader_type stage,
1015                     const struct pipe_shader_state *shader)
1016 {
1017    struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
1018    sel->stage = stage;
1019 
1020    struct nir_shader *nir = NULL;
1021 
1022    if (shader->type == PIPE_SHADER_IR_NIR) {
1023       nir = (nir_shader *)shader->ir.nir;
1024    } else {
1025       assert(shader->type == PIPE_SHADER_IR_TGSI);
1026       nir = tgsi_to_nir(shader->tokens, ctx->base.screen, false);
1027    }
1028 
1029    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1030 
1031    unsigned tex_scan_result = scan_texture_use(nir);
1032    sel->samples_int_textures = (tex_scan_result & TEX_SAMPLE_INTEGER_TEXTURE) != 0;
1033    sel->compare_with_lod_bias_grad = (tex_scan_result & TEX_CMP_WITH_LOD_BIAS_GRAD) != 0;
1034 
1035    memcpy(&sel->so_info, &shader->stream_output, sizeof(sel->so_info));
1036    update_so_info(&sel->so_info, nir->info.outputs_written);
1037 
1038    assert(nir != NULL);
1039    d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage);
1040    d3d12_shader_selector *next = get_next_shader(ctx, sel->stage);
1041 
1042    uint64_t in_mask = nir->info.stage == MESA_SHADER_VERTEX ?
1043                          0 : VARYING_BIT_PRIMITIVE_ID;
1044 
1045    uint64_t out_mask = nir->info.stage == MESA_SHADER_FRAGMENT ?
1046                           (1ull << FRAG_RESULT_STENCIL) :
1047                           VARYING_BIT_PRIMITIVE_ID;
1048 
1049    d3d12_fix_io_uint_type(nir, in_mask, out_mask);
1050    NIR_PASS_V(nir, dxil_nir_split_clip_cull_distance);
1051 
1052    if (nir->info.stage != MESA_SHADER_VERTEX)
1053       nir->info.inputs_read =
1054             dxil_reassign_driver_locations(nir, nir_var_shader_in,
1055                                             prev ? prev->current->nir->info.outputs_written : 0);
1056    else
1057       nir->info.inputs_read = dxil_sort_by_driver_location(nir, nir_var_shader_in);
1058 
1059    if (nir->info.stage != MESA_SHADER_FRAGMENT) {
1060       nir->info.outputs_written =
1061             dxil_reassign_driver_locations(nir, nir_var_shader_out,
1062                                             next ? next->current->nir->info.inputs_read : 0);
1063    } else {
1064       NIR_PASS_V(nir, nir_lower_fragcoord_wtrans);
1065       dxil_sort_ps_outputs(nir);
1066    }
1067 
1068    /* Integer cube maps are not supported in DirectX because sampling is not supported
1069     * on integer textures and TextureLoad is not supported for cube maps, so we have to
1070     * lower integer cube maps to be handled like 2D textures arrays*/
1071    NIR_PASS_V(nir, d3d12_lower_int_cubmap_to_array);
1072 
1073    /* Keep this initial shader as the blue print for possible variants */
1074    sel->initial = nir;
1075 
1076    /*
1077     * We must compile some shader here, because if the previous or a next shaders exists later
1078     * when the shaders are bound, then the key evaluation in the shader selector will access
1079     * the current variant of these  prev and next shader, and we can only assign
1080     * a current variant when it has been successfully compiled.
1081     *
1082     * For shaders that require lowering because certain instructions are not available
1083     * and their emulation is state depended (like sampling an integer texture that must be
1084     * emulated and needs handling of boundary conditions, or shadow compare sampling with LOD),
1085     * we must go through the shader selector here to create a compilable variant.
1086     * For shaders that are not depended on the state this is just compiling the original
1087     * shader.
1088     *
1089     * TODO: get rid of having to compiling the shader here if it can be forseen that it will
1090     * be thrown away (i.e. it depends on states that are likely to change before the shader is
1091     * used for the first time)
1092     */
1093    struct d3d12_selection_context sel_ctx = {0};
1094    sel_ctx.ctx = ctx;
1095    select_shader_variant(&sel_ctx, sel, prev, next);
1096 
1097    if (!sel->current) {
1098       ralloc_free(sel);
1099       return NULL;
1100    }
1101 
1102    return sel;
1103 }
1104 
1105 void
d3d12_select_shader_variants(struct d3d12_context * ctx,const struct pipe_draw_info * dinfo)1106 d3d12_select_shader_variants(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
1107 {
1108    static unsigned order[] = {PIPE_SHADER_VERTEX, PIPE_SHADER_GEOMETRY, PIPE_SHADER_FRAGMENT};
1109    struct d3d12_selection_context sel_ctx;
1110 
1111    sel_ctx.ctx = ctx;
1112    sel_ctx.dinfo = dinfo;
1113    sel_ctx.needs_point_sprite_lowering = needs_point_sprite_lowering(ctx, dinfo);
1114    sel_ctx.fill_mode_lowered = fill_mode_lowered(ctx, dinfo);
1115    sel_ctx.cull_mode_lowered = cull_mode_lowered(ctx, sel_ctx.fill_mode_lowered);
1116    sel_ctx.provoking_vertex = get_provoking_vertex(&sel_ctx, &sel_ctx.alternate_tri);
1117    sel_ctx.needs_vertex_reordering = needs_vertex_reordering(&sel_ctx);
1118    sel_ctx.missing_dual_src_outputs = missing_dual_src_outputs(ctx);
1119    sel_ctx.frag_result_color_lowering = frag_result_color_lowering(ctx);
1120    sel_ctx.manual_depth_range = manual_depth_range(ctx);
1121 
1122    validate_geometry_shader_variant(&sel_ctx);
1123 
1124    for (unsigned i = 0; i < ARRAY_SIZE(order); ++i) {
1125       auto sel = ctx->gfx_stages[order[i]];
1126       if (!sel)
1127          continue;
1128 
1129       d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage);
1130       d3d12_shader_selector *next = get_next_shader(ctx, sel->stage);
1131 
1132       select_shader_variant(&sel_ctx, sel, prev, next);
1133    }
1134 }
1135 
1136 void
d3d12_shader_free(struct d3d12_shader_selector * sel)1137 d3d12_shader_free(struct d3d12_shader_selector *sel)
1138 {
1139    auto shader = sel->first;
1140    while (shader) {
1141       free(shader->bytecode);
1142       shader = shader->next_variant;
1143    }
1144    ralloc_free(sel->initial);
1145    ralloc_free(sel);
1146 }
1147 
1148 #ifdef _WIN32
1149 // Used to get path to self
1150 extern "C" extern IMAGE_DOS_HEADER __ImageBase;
1151 #endif
1152 
load_dxil_dll()1153 void d3d12_validation_tools::load_dxil_dll()
1154 {
1155    if (!dxil_module.load(UTIL_DL_PREFIX "dxil" UTIL_DL_EXT)) {
1156 #ifdef _WIN32
1157       char selfPath[MAX_PATH] = "";
1158       uint32_t pathSize = GetModuleFileNameA((HINSTANCE)&__ImageBase, selfPath, sizeof(selfPath));
1159       if (pathSize == 0 || pathSize == sizeof(selfPath)) {
1160          debug_printf("D3D12: Unable to get path to self");
1161          return;
1162       }
1163 
1164       auto lastSlash = strrchr(selfPath, '\\');
1165       if (!lastSlash) {
1166          debug_printf("D3D12: Unable to get path to self");
1167          return;
1168       }
1169 
1170       *(lastSlash + 1) = '\0';
1171       if (strcat_s(selfPath, "dxil.dll") != 0) {
1172          debug_printf("D3D12: Unable to get path to dxil.dll next to self");
1173          return;
1174       }
1175 
1176       dxil_module.load(selfPath);
1177 #endif
1178    }
1179 }
1180 
d3d12_validation_tools()1181 d3d12_validation_tools::d3d12_validation_tools()
1182 {
1183    load_dxil_dll();
1184    DxcCreateInstanceProc dxil_create_func = (DxcCreateInstanceProc)util_dl_get_proc_address(dxil_module, "DxcCreateInstance");
1185 
1186    if (dxil_create_func) {
1187       HRESULT hr = dxil_create_func(CLSID_DxcValidator,  IID_PPV_ARGS(&validator));
1188       if (FAILED(hr)) {
1189          debug_printf("D3D12: Unable to create validator\n");
1190       }
1191    }
1192 #ifdef _WIN32
1193    else if (!(d3d12_debug & D3D12_DEBUG_EXPERIMENTAL)) {
1194       debug_printf("D3D12: Unable to load DXIL.dll\n");
1195    }
1196 #endif
1197 
1198    DxcCreateInstanceProc compiler_create_func  = nullptr;
1199    if(dxc_compiler_module.load("dxcompiler.dll"))
1200       compiler_create_func = (DxcCreateInstanceProc)util_dl_get_proc_address(dxc_compiler_module, "DxcCreateInstance");
1201 
1202    if (compiler_create_func) {
1203       HRESULT hr = compiler_create_func(CLSID_DxcLibrary, IID_PPV_ARGS(&library));
1204       if (FAILED(hr)) {
1205          debug_printf("D3D12: Unable to create library instance: %x\n", hr);
1206       }
1207 
1208       if (d3d12_debug & D3D12_DEBUG_DISASS) {
1209          hr = compiler_create_func(CLSID_DxcCompiler, IID_PPV_ARGS(&compiler));
1210          if (FAILED(hr)) {
1211             debug_printf("D3D12: Unable to create compiler instance\n");
1212          }
1213       }
1214    } else if (d3d12_debug & D3D12_DEBUG_DISASS) {
1215       debug_printf("D3D12: Disassembly requested but compiler couldn't be loaded\n");
1216    }
1217 }
1218 
HModule()1219 d3d12_validation_tools::HModule::HModule():
1220    module(0)
1221 {
1222 }
1223 
~HModule()1224 d3d12_validation_tools::HModule::~HModule()
1225 {
1226    if (module)
1227       util_dl_close(module);
1228 }
1229 
1230 inline
operator util_dl_library*() const1231 d3d12_validation_tools::HModule::operator util_dl_library * () const
1232 {
1233    return module;
1234 }
1235 
1236 bool
load(LPCSTR file_name)1237 d3d12_validation_tools::HModule::load(LPCSTR file_name)
1238 {
1239    module = util_dl_open(file_name);
1240    return module != nullptr;
1241 }
1242 
1243 
1244 class ShaderBlob : public IDxcBlob {
1245 public:
ShaderBlob(blob * data)1246    ShaderBlob(blob* data) : m_data(data) {}
1247 
GetBufferPointer(void)1248    LPVOID STDMETHODCALLTYPE GetBufferPointer(void) override { return m_data->data; }
1249 
GetBufferSize()1250    SIZE_T STDMETHODCALLTYPE GetBufferSize() override { return m_data->size; }
1251 
QueryInterface(REFIID,void **)1252    HRESULT STDMETHODCALLTYPE QueryInterface(REFIID, void**) override { return E_NOINTERFACE; }
1253 
AddRef()1254    ULONG STDMETHODCALLTYPE AddRef() override { return 1; }
1255 
Release()1256    ULONG STDMETHODCALLTYPE Release() override { return 0; }
1257 
1258    blob* m_data;
1259 };
1260 
validate_and_sign(struct blob * dxil)1261 bool d3d12_validation_tools::validate_and_sign(struct blob *dxil)
1262 {
1263    ShaderBlob source(dxil);
1264 
1265    ComPtr<IDxcOperationResult> result;
1266 
1267    validator->Validate(&source, DxcValidatorFlags_InPlaceEdit, &result);
1268    HRESULT validationStatus;
1269    result->GetStatus(&validationStatus);
1270    if (FAILED(validationStatus) && library) {
1271       ComPtr<IDxcBlobEncoding> printBlob, printBlobUtf8;
1272       result->GetErrorBuffer(&printBlob);
1273       library->GetBlobAsUtf8(printBlob.Get(), printBlobUtf8.GetAddressOf());
1274 
1275       char *errorString;
1276       if (printBlobUtf8) {
1277          errorString = reinterpret_cast<char*>(printBlobUtf8->GetBufferPointer());
1278 
1279          errorString[printBlobUtf8->GetBufferSize() - 1] = 0;
1280          debug_printf("== VALIDATION ERROR =============================================\n%s\n"
1281                      "== END ==========================================================\n",
1282                      errorString);
1283       }
1284 
1285       return false;
1286    }
1287    return true;
1288 
1289 }
1290 
disassemble(struct blob * dxil)1291 void d3d12_validation_tools::disassemble(struct blob *dxil)
1292 {
1293    if (!compiler) {
1294       fprintf(stderr, "D3D12: No Disassembler\n");
1295       return;
1296    }
1297    ShaderBlob source(dxil);
1298    IDxcBlobEncoding* pDisassembly = nullptr;
1299 
1300    if (FAILED(compiler->Disassemble(&source, &pDisassembly))) {
1301       fprintf(stderr, "D3D12: Disassembler failed\n");
1302       return;
1303    }
1304 
1305    ComPtr<IDxcBlobEncoding> dissassably(pDisassembly);
1306    ComPtr<IDxcBlobEncoding> blobUtf8;
1307    library->GetBlobAsUtf8(pDisassembly, blobUtf8.GetAddressOf());
1308    if (!blobUtf8) {
1309       fprintf(stderr, "D3D12: Unable to get utf8 encoding\n");
1310       return;
1311    }
1312 
1313    char *disassembly = reinterpret_cast<char*>(blobUtf8->GetBufferPointer());
1314    disassembly[blobUtf8->GetBufferSize() - 1] = 0;
1315 
1316    fprintf(stderr, "== BEGIN SHADER ============================================\n"
1317            "%s\n"
1318            "== END SHADER ==============================================\n",
1319            disassembly);
1320 }
1321