• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "dzn_private.h"
25 
26 #include "spirv/nir_spirv.h"
27 
28 #include "dxil_nir.h"
29 #include "nir_to_dxil.h"
30 #include "dxil_spirv_nir.h"
31 #include "spirv_to_dxil.h"
32 
33 #include "dxil_validator.h"
34 
35 #include "vk_alloc.h"
36 #include "vk_util.h"
37 #include "vk_format.h"
38 #include "vk_pipeline.h"
39 #include "vk_pipeline_cache.h"
40 
41 #include "util/u_debug.h"
42 
43 #define d3d12_pipeline_state_stream_new_desc(__stream, __maxstreamsz, __id, __type, __desc) \
44    __type *__desc; \
45    do { \
46       struct { \
47          D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type; \
48          __type desc; \
49       } *__wrapper; \
50       (__stream)->SizeInBytes = ALIGN_POT((__stream)->SizeInBytes, alignof(void *)); \
51       __wrapper = (void *)((uint8_t *)(__stream)->pPipelineStateSubobjectStream + (__stream)->SizeInBytes); \
52       (__stream)->SizeInBytes += sizeof(*__wrapper); \
53       assert((__stream)->SizeInBytes <= __maxstreamsz); \
54       __wrapper->type = __id; \
55       __desc = &__wrapper->desc; \
56       memset(__desc, 0, sizeof(*__desc)); \
57    } while (0)
58 
59 #define d3d12_pipeline_state_stream_new_desc_abbrev(__stream, __maxstreamsz, __id, __type, __desc) \
60    d3d12_pipeline_state_stream_new_desc(__stream, __maxstreamsz, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_ ## __id, __type, __desc)
61 
62 #define d3d12_gfx_pipeline_state_stream_new_desc(__stream, __id, __type, __desc) \
63    d3d12_pipeline_state_stream_new_desc_abbrev(__stream, MAX_GFX_PIPELINE_STATE_STREAM_SIZE, __id, __type, __desc)
64 
65 #define d3d12_compute_pipeline_state_stream_new_desc(__stream, __id, __type, __desc) \
66    d3d12_pipeline_state_stream_new_desc_abbrev(__stream, MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE, __id, __type, __desc)
67 
68 static bool
gfx_pipeline_variant_key_equal(const void * a,const void * b)69 gfx_pipeline_variant_key_equal(const void *a, const void *b)
70 {
71    return !memcmp(a, b, sizeof(struct dzn_graphics_pipeline_variant_key));
72 }
73 
74 static uint32_t
gfx_pipeline_variant_key_hash(const void * key)75 gfx_pipeline_variant_key_hash(const void *key)
76 {
77    return _mesa_hash_data(key, sizeof(struct dzn_graphics_pipeline_variant_key));
78 }
79 
80 struct dzn_cached_blob {
81    struct vk_pipeline_cache_object base;
82    uint8_t hash[SHA1_DIGEST_LENGTH];
83    const void *data;
84    size_t size;
85 };
86 
87 static bool
dzn_cached_blob_serialize(struct vk_pipeline_cache_object * object,struct blob * blob)88 dzn_cached_blob_serialize(struct vk_pipeline_cache_object *object,
89                           struct blob *blob)
90 {
91    struct dzn_cached_blob *cached_blob =
92       container_of(object, struct dzn_cached_blob, base);
93 
94    blob_write_bytes(blob, cached_blob->data, cached_blob->size);
95    return true;
96 }
97 
98 static void
dzn_cached_blob_destroy(struct vk_device * device,struct vk_pipeline_cache_object * object)99 dzn_cached_blob_destroy(struct vk_device *device,
100                         struct vk_pipeline_cache_object *object)
101 {
102    struct dzn_cached_blob *shader =
103       container_of(object, struct dzn_cached_blob, base);
104 
105    vk_free(&device->alloc, shader);
106 }
107 
108 static struct vk_pipeline_cache_object *
109 dzn_cached_blob_create(struct vk_device *device,
110                        const void *hash,
111                        const void *data,
112                        size_t data_size);
113 
114 static struct vk_pipeline_cache_object *
dzn_cached_blob_deserialize(struct vk_pipeline_cache * cache,const void * key_data,size_t key_size,struct blob_reader * blob)115 dzn_cached_blob_deserialize(struct vk_pipeline_cache *cache,
116                             const void *key_data, size_t key_size,
117                             struct blob_reader *blob)
118 {
119    size_t data_size = blob->end - blob->current;
120    assert(key_size == SHA1_DIGEST_LENGTH);
121 
122    return dzn_cached_blob_create(cache->base.device, key_data,
123                                  blob_read_bytes(blob, data_size), data_size);
124 }
125 
126 const struct vk_pipeline_cache_object_ops dzn_cached_blob_ops = {
127    .serialize = dzn_cached_blob_serialize,
128    .deserialize = dzn_cached_blob_deserialize,
129    .destroy = dzn_cached_blob_destroy,
130 };
131 
132 
133 static struct vk_pipeline_cache_object *
dzn_cached_blob_create(struct vk_device * device,const void * hash,const void * data,size_t data_size)134 dzn_cached_blob_create(struct vk_device *device,
135                        const void *hash,
136                        const void *data,
137                        size_t data_size)
138 {
139    VK_MULTIALLOC(ma);
140    VK_MULTIALLOC_DECL(&ma, struct dzn_cached_blob, blob, 1);
141    VK_MULTIALLOC_DECL(&ma, uint8_t, copy, data_size);
142 
143    if (!vk_multialloc_alloc(&ma, &device->alloc,
144                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
145       return NULL;
146 
147    memcpy(blob->hash, hash, sizeof(blob->hash));
148 
149    vk_pipeline_cache_object_init(device, &blob->base,
150                                  &dzn_cached_blob_ops,
151                                  blob->hash, sizeof(blob->hash));
152 
153    if (data)
154       memcpy(copy, data, data_size);
155    blob->data = copy;
156    blob->size = data_size;
157 
158    return &blob->base;
159 }
160 
161 static VkResult
dzn_graphics_pipeline_prepare_for_variants(struct dzn_device * device,struct dzn_graphics_pipeline * pipeline)162 dzn_graphics_pipeline_prepare_for_variants(struct dzn_device *device,
163                                            struct dzn_graphics_pipeline *pipeline)
164 {
165    if (pipeline->variants)
166       return VK_SUCCESS;
167 
168    pipeline->variants =
169       _mesa_hash_table_create(NULL,
170                               gfx_pipeline_variant_key_hash,
171                               gfx_pipeline_variant_key_equal);
172    if (!pipeline->variants)
173       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
174 
175    return VK_SUCCESS;
176 }
177 
178 static dxil_spirv_shader_stage
to_dxil_shader_stage(VkShaderStageFlagBits in)179 to_dxil_shader_stage(VkShaderStageFlagBits in)
180 {
181    switch (in) {
182    case VK_SHADER_STAGE_VERTEX_BIT: return DXIL_SPIRV_SHADER_VERTEX;
183    case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return DXIL_SPIRV_SHADER_TESS_CTRL;
184    case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return DXIL_SPIRV_SHADER_TESS_EVAL;
185    case VK_SHADER_STAGE_GEOMETRY_BIT: return DXIL_SPIRV_SHADER_GEOMETRY;
186    case VK_SHADER_STAGE_FRAGMENT_BIT: return DXIL_SPIRV_SHADER_FRAGMENT;
187    case VK_SHADER_STAGE_COMPUTE_BIT: return DXIL_SPIRV_SHADER_COMPUTE;
188    default: unreachable("Unsupported stage");
189    }
190 }
191 
192 struct dzn_nir_options {
193    enum dxil_spirv_yz_flip_mode yz_flip_mode;
194    uint16_t y_flip_mask, z_flip_mask;
195    bool force_sample_rate_shading;
196    bool lower_view_index;
197    bool lower_view_index_to_rt_layer;
198    enum pipe_format *vi_conversions;
199    const nir_shader_compiler_options *nir_opts;
200    enum gl_subgroup_size subgroup_size;
201 };
202 
203 static VkResult
dzn_pipeline_get_nir_shader(struct dzn_device * device,const struct dzn_pipeline_layout * layout,struct vk_pipeline_cache * cache,const uint8_t * hash,const VkPipelineShaderStageCreateInfo * stage_info,gl_shader_stage stage,const struct dzn_nir_options * options,nir_shader ** nir)204 dzn_pipeline_get_nir_shader(struct dzn_device *device,
205                             const struct dzn_pipeline_layout *layout,
206                             struct vk_pipeline_cache *cache,
207                             const uint8_t *hash,
208                             const VkPipelineShaderStageCreateInfo *stage_info,
209                             gl_shader_stage stage,
210                             const struct dzn_nir_options *options,
211                             nir_shader **nir)
212 {
213    if (cache) {
214       *nir = vk_pipeline_cache_lookup_nir(cache, hash, SHA1_DIGEST_LENGTH,
215                                           options->nir_opts, NULL, NULL);
216        if (*nir)
217           return VK_SUCCESS;
218    }
219 
220    struct dzn_physical_device *pdev =
221       container_of(device->vk.physical, struct dzn_physical_device, vk);
222    VK_FROM_HANDLE(vk_shader_module, module, stage_info->module);
223    const struct spirv_to_nir_options *spirv_opts = dxil_spirv_nir_get_spirv_options();
224 
225    VkResult result =
226       vk_shader_module_to_nir(&device->vk, module, stage,
227                               stage_info->pName, stage_info->pSpecializationInfo,
228                               spirv_opts, options->nir_opts, NULL, nir);
229    if (result != VK_SUCCESS)
230       return result;
231 
232    struct dxil_spirv_runtime_conf conf = {
233       .runtime_data_cbv = {
234          .register_space = DZN_REGISTER_SPACE_SYSVALS,
235          .base_shader_register = 0,
236       },
237       .push_constant_cbv = {
238          .register_space = DZN_REGISTER_SPACE_PUSH_CONSTANT,
239          .base_shader_register = 0,
240       },
241       .zero_based_vertex_instance_id = false,
242       .zero_based_compute_workgroup_id = false,
243       .yz_flip = {
244          .mode = options->yz_flip_mode,
245          .y_mask = options->y_flip_mask,
246          .z_mask = options->z_flip_mask,
247       },
248       .declared_read_only_images_as_srvs = !device->bindless,
249       .inferred_read_only_images_as_srvs = !device->bindless,
250       .force_sample_rate_shading = options->force_sample_rate_shading,
251       .lower_view_index = options->lower_view_index,
252       .lower_view_index_to_rt_layer = options->lower_view_index_to_rt_layer,
253       .shader_model_max = dzn_get_shader_model(pdev),
254    };
255 
256    bool requires_runtime_data;
257    dxil_spirv_nir_passes(*nir, &conf, &requires_runtime_data);
258 
259    if (stage == MESA_SHADER_VERTEX) {
260       bool needs_conv = false;
261       for (uint32_t i = 0; i < MAX_VERTEX_GENERIC_ATTRIBS; i++) {
262          if (options->vi_conversions[i] != PIPE_FORMAT_NONE)
263             needs_conv = true;
264       }
265 
266       if (needs_conv)
267          NIR_PASS_V(*nir, dxil_nir_lower_vs_vertex_conversion, options->vi_conversions);
268    }
269    (*nir)->info.subgroup_size = options->subgroup_size;
270 
271    if (cache)
272       vk_pipeline_cache_add_nir(cache, hash, SHA1_DIGEST_LENGTH, *nir);
273 
274    return VK_SUCCESS;
275 }
276 
277 static bool
adjust_resource_index_binding(struct nir_builder * builder,nir_intrinsic_instr * intrin,void * cb_data)278 adjust_resource_index_binding(struct nir_builder *builder,
279                               nir_intrinsic_instr *intrin,
280                               void *cb_data)
281 {
282    if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
283       return false;
284 
285    const struct dzn_pipeline_layout *layout = cb_data;
286    unsigned set = nir_intrinsic_desc_set(intrin);
287    unsigned binding = nir_intrinsic_binding(intrin);
288 
289    if (set >= layout->set_count ||
290        binding >= layout->binding_translation[set].binding_count)
291       return false;
292 
293    binding = layout->binding_translation[set].base_reg[binding];
294    nir_intrinsic_set_binding(intrin, binding);
295 
296    return true;
297 }
298 
299 static void
adjust_to_bindless_cb(struct dxil_spirv_binding_remapping * inout,void * context)300 adjust_to_bindless_cb(struct dxil_spirv_binding_remapping *inout, void *context)
301 {
302    const struct dzn_pipeline_layout *layout = context;
303    assert(inout->descriptor_set < layout->set_count);
304    uint32_t new_binding = layout->binding_translation[inout->descriptor_set].base_reg[inout->binding];
305    switch (layout->binding_translation[inout->descriptor_set].binding_class[inout->binding]) {
306    case DZN_PIPELINE_BINDING_DYNAMIC_BUFFER:
307       inout->descriptor_set = layout->set_count;
308       FALLTHROUGH;
309    case DZN_PIPELINE_BINDING_STATIC_SAMPLER:
310       if (inout->is_sampler) {
311          inout->descriptor_set = ~0;
312          break;
313       }
314       FALLTHROUGH;
315    case DZN_PIPELINE_BINDING_NORMAL:
316       inout->binding = new_binding;
317       break;
318    default:
319       unreachable("Invalid binding type");
320    }
321 }
322 
323 static bool
adjust_var_bindings(nir_shader * shader,struct dzn_device * device,const struct dzn_pipeline_layout * layout,uint8_t * bindings_hash)324 adjust_var_bindings(nir_shader *shader,
325                     struct dzn_device *device,
326                     const struct dzn_pipeline_layout *layout,
327                     uint8_t *bindings_hash)
328 {
329    uint32_t modes = nir_var_image | nir_var_uniform | nir_var_mem_ubo | nir_var_mem_ssbo;
330    struct mesa_sha1 bindings_hash_ctx;
331 
332    if (bindings_hash)
333       _mesa_sha1_init(&bindings_hash_ctx);
334 
335    nir_foreach_variable_with_modes(var, shader, modes) {
336       if (var->data.mode == nir_var_uniform) {
337          const struct glsl_type *type = glsl_without_array(var->type);
338 
339          if (!glsl_type_is_sampler(type) && !glsl_type_is_texture(type))
340             continue;
341       }
342 
343       unsigned s = var->data.descriptor_set, b = var->data.binding;
344 
345       if (s >= layout->set_count)
346          continue;
347 
348       assert(b < layout->binding_translation[s].binding_count);
349       if (!device->bindless)
350          var->data.binding = layout->binding_translation[s].base_reg[b];
351 
352       if (bindings_hash) {
353          _mesa_sha1_update(&bindings_hash_ctx, &s, sizeof(s));
354          _mesa_sha1_update(&bindings_hash_ctx, &b, sizeof(b));
355          _mesa_sha1_update(&bindings_hash_ctx, &var->data.binding, sizeof(var->data.binding));
356       }
357    }
358 
359    if (bindings_hash)
360       _mesa_sha1_final(&bindings_hash_ctx, bindings_hash);
361 
362    if (device->bindless) {
363       struct dxil_spirv_nir_lower_bindless_options options = {
364          .dynamic_buffer_binding = layout->dynamic_buffer_count ? layout->set_count : ~0,
365          .num_descriptor_sets = layout->set_count,
366          .callback_context = (void *)layout,
367          .remap_binding = adjust_to_bindless_cb
368       };
369       bool ret = dxil_spirv_nir_lower_bindless(shader, &options);
370       /* We skipped remapping variable bindings in the hashing loop, but if there's static
371        * samplers still declared, we need to remap those now. */
372       nir_foreach_variable_with_modes(var, shader, nir_var_uniform) {
373          assert(glsl_type_is_sampler(glsl_without_array(var->type)));
374          var->data.binding = layout->binding_translation[var->data.descriptor_set].base_reg[var->data.binding];
375       }
376       return ret;
377    } else {
378       return nir_shader_intrinsics_pass(shader, adjust_resource_index_binding,
379                                           nir_metadata_all, (void *)layout);
380    }
381 }
382 
383 enum dxil_shader_model
dzn_get_shader_model(const struct dzn_physical_device * pdev)384    dzn_get_shader_model(const struct dzn_physical_device *pdev)
385 {
386    static_assert(D3D_SHADER_MODEL_6_0 == 0x60 && SHADER_MODEL_6_0 == 0x60000, "Validating math below");
387    static_assert(D3D_SHADER_MODEL_6_8 == 0x68 && SHADER_MODEL_6_8 == 0x60008, "Validating math below");
388    return ((pdev->shader_model & 0xf0) << 12) | (pdev->shader_model & 0xf);
389 }
390 
391 static VkResult
dzn_pipeline_compile_shader(struct dzn_device * device,nir_shader * nir,uint32_t input_clip_size,D3D12_SHADER_BYTECODE * slot)392 dzn_pipeline_compile_shader(struct dzn_device *device,
393                             nir_shader *nir,
394                             uint32_t input_clip_size,
395                             D3D12_SHADER_BYTECODE *slot)
396 {
397    struct dzn_instance *instance =
398       container_of(device->vk.physical->instance, struct dzn_instance, vk);
399    struct dzn_physical_device *pdev =
400       container_of(device->vk.physical, struct dzn_physical_device, vk);
401    struct nir_to_dxil_options opts = {
402       .environment = DXIL_ENVIRONMENT_VULKAN,
403       .lower_int16 = !pdev->options4.Native16BitShaderOpsSupported &&
404       /* Don't lower 16-bit types if they can only come from min-precision */
405          (device->vk.enabled_extensions.KHR_shader_float16_int8 ||
406           device->vk.enabled_features.shaderFloat16 ||
407           device->vk.enabled_features.shaderInt16),
408       .shader_model_max = dzn_get_shader_model(pdev),
409       .input_clip_size = input_clip_size,
410 #ifdef _WIN32
411       .validator_version_max = dxil_get_validator_version(instance->dxil_validator),
412 #endif
413    };
414    struct blob dxil_blob;
415    VkResult result = VK_SUCCESS;
416 
417    if (instance->debug_flags & DZN_DEBUG_NIR)
418       nir_print_shader(nir, stderr);
419 
420    if (nir_to_dxil(nir, &opts, NULL, &dxil_blob)) {
421       blob_finish_get_buffer(&dxil_blob, (void **)&slot->pShaderBytecode,
422                              (size_t *)&slot->BytecodeLength);
423    } else {
424       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
425    }
426 
427    if (dxil_blob.allocated)
428       blob_finish(&dxil_blob);
429 
430    if (result != VK_SUCCESS)
431       return result;
432 
433 #ifdef _WIN32
434    char *err;
435    bool res = dxil_validate_module(instance->dxil_validator,
436                                    (void *)slot->pShaderBytecode,
437                                    slot->BytecodeLength, &err);
438 
439    if (instance->debug_flags & DZN_DEBUG_DXIL) {
440       char *disasm = dxil_disasm_module(instance->dxil_validator,
441                                         (void *)slot->pShaderBytecode,
442                                         slot->BytecodeLength);
443       if (disasm) {
444          fprintf(stderr,
445                  "== BEGIN SHADER ============================================\n"
446                  "%s\n"
447                  "== END SHADER ==============================================\n",
448                   disasm);
449          ralloc_free(disasm);
450       }
451    }
452 
453    if (!res && !(instance->debug_flags & DZN_DEBUG_EXPERIMENTAL)) {
454       if (err) {
455          mesa_loge(
456                "== VALIDATION ERROR =============================================\n"
457                "%s\n"
458                "== END ==========================================================\n",
459                err);
460          ralloc_free(err);
461       }
462       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
463    }
464 #endif
465 
466    return VK_SUCCESS;
467 }
468 
469 static D3D12_SHADER_BYTECODE *
dzn_pipeline_get_gfx_shader_slot(D3D12_PIPELINE_STATE_STREAM_DESC * stream,gl_shader_stage in)470 dzn_pipeline_get_gfx_shader_slot(D3D12_PIPELINE_STATE_STREAM_DESC *stream,
471                                  gl_shader_stage in)
472 {
473    switch (in) {
474    case MESA_SHADER_VERTEX: {
475       d3d12_gfx_pipeline_state_stream_new_desc(stream, VS, D3D12_SHADER_BYTECODE, desc);
476       return desc;
477    }
478    case MESA_SHADER_TESS_CTRL: {
479       d3d12_gfx_pipeline_state_stream_new_desc(stream, HS, D3D12_SHADER_BYTECODE, desc);
480       return desc;
481    }
482    case MESA_SHADER_TESS_EVAL: {
483       d3d12_gfx_pipeline_state_stream_new_desc(stream, DS, D3D12_SHADER_BYTECODE, desc);
484       return desc;
485    }
486    case MESA_SHADER_GEOMETRY: {
487       d3d12_gfx_pipeline_state_stream_new_desc(stream, GS, D3D12_SHADER_BYTECODE, desc);
488       return desc;
489    }
490    case MESA_SHADER_FRAGMENT: {
491       d3d12_gfx_pipeline_state_stream_new_desc(stream, PS, D3D12_SHADER_BYTECODE, desc);
492       return desc;
493    }
494    default: unreachable("Unsupported stage");
495    }
496 }
497 
498 struct dzn_cached_dxil_shader_header {
499    gl_shader_stage stage;
500    size_t size;
501    uint8_t data[0];
502 };
503 
504 static VkResult
dzn_pipeline_cache_lookup_dxil_shader(struct vk_pipeline_cache * cache,const uint8_t * dxil_hash,gl_shader_stage * stage,D3D12_SHADER_BYTECODE * bc)505 dzn_pipeline_cache_lookup_dxil_shader(struct vk_pipeline_cache *cache,
506                                       const uint8_t *dxil_hash,
507                                       gl_shader_stage *stage,
508                                       D3D12_SHADER_BYTECODE *bc)
509 {
510    *stage = MESA_SHADER_NONE;
511 
512    if (!cache)
513       return VK_SUCCESS;
514 
515    struct vk_pipeline_cache_object *cache_obj = NULL;
516 
517    cache_obj =
518       vk_pipeline_cache_lookup_object(cache, dxil_hash, SHA1_DIGEST_LENGTH,
519                                       &dzn_cached_blob_ops,
520                                       NULL);
521    if (!cache_obj)
522       return VK_SUCCESS;
523 
524    struct dzn_cached_blob *cached_blob =
525       container_of(cache_obj, struct dzn_cached_blob, base);
526    VkResult ret = VK_SUCCESS;
527 
528    assert(sizeof(struct dzn_cached_dxil_shader_header) <= cached_blob->size);
529 
530    const struct dzn_cached_dxil_shader_header *info =
531       (struct dzn_cached_dxil_shader_header *)(cached_blob->data);
532 
533    assert(sizeof(struct dzn_cached_dxil_shader_header) + info->size <= cached_blob->size);
534    assert(info->stage > MESA_SHADER_NONE && info->stage < MESA_VULKAN_SHADER_STAGES);
535    assert(info->size > 0);
536 
537    void *code = malloc(info->size);
538    if (!code) {
539       ret = vk_error(cache->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
540       goto out;
541    }
542 
543    memcpy(code, info->data, info->size);
544 
545    bc->pShaderBytecode = code;
546    bc->BytecodeLength = info->size;
547    *stage = info->stage;
548 
549 out:
550    vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
551    return ret;
552 }
553 
554 static void
dzn_pipeline_cache_add_dxil_shader(struct vk_pipeline_cache * cache,const uint8_t * dxil_hash,gl_shader_stage stage,const D3D12_SHADER_BYTECODE * bc)555 dzn_pipeline_cache_add_dxil_shader(struct vk_pipeline_cache *cache,
556                                    const uint8_t *dxil_hash,
557                                    gl_shader_stage stage,
558                                    const D3D12_SHADER_BYTECODE *bc)
559 {
560    size_t size = sizeof(struct dzn_cached_dxil_shader_header) +
561                  bc->BytecodeLength;
562 
563    struct vk_pipeline_cache_object *cache_obj =
564       dzn_cached_blob_create(cache->base.device, dxil_hash, NULL, size);
565    if (!cache_obj)
566       return;
567 
568    struct dzn_cached_blob *cached_blob =
569       container_of(cache_obj, struct dzn_cached_blob, base);
570    struct dzn_cached_dxil_shader_header *info =
571       (struct dzn_cached_dxil_shader_header *)(cached_blob->data);
572    info->stage = stage;
573    info->size = bc->BytecodeLength;
574    memcpy(info->data, bc->pShaderBytecode, bc->BytecodeLength);
575 
576    cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
577    vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
578 }
579 
580 struct dzn_cached_gfx_pipeline_header {
581    uint32_t stages : 31;
582    uint32_t rast_disabled_from_missing_position : 1;
583    uint32_t input_count;
584 };
585 
586 static VkResult
dzn_pipeline_cache_lookup_gfx_pipeline(struct dzn_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,const uint8_t * pipeline_hash,bool * cache_hit)587 dzn_pipeline_cache_lookup_gfx_pipeline(struct dzn_graphics_pipeline *pipeline,
588                                        struct vk_pipeline_cache *cache,
589                                        const uint8_t *pipeline_hash,
590                                        bool *cache_hit)
591 {
592    *cache_hit = false;
593 
594    if (!cache)
595       return VK_SUCCESS;
596 
597    struct vk_pipeline_cache_object *cache_obj = NULL;
598 
599    cache_obj =
600       vk_pipeline_cache_lookup_object(cache, pipeline_hash, SHA1_DIGEST_LENGTH,
601                                       &dzn_cached_blob_ops,
602                                       NULL);
603    if (!cache_obj)
604       return VK_SUCCESS;
605 
606    struct dzn_cached_blob *cached_blob =
607       container_of(cache_obj, struct dzn_cached_blob, base);
608    D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc =
609       &pipeline->templates.stream_desc;
610 
611    const struct dzn_cached_gfx_pipeline_header *info =
612       (const struct dzn_cached_gfx_pipeline_header *)(cached_blob->data);
613    size_t offset = ALIGN_POT(sizeof(*info), alignof(D3D12_INPUT_ELEMENT_DESC));
614 
615    assert(cached_blob->size >= sizeof(*info));
616 
617    if (info->input_count > 0) {
618       const D3D12_INPUT_ELEMENT_DESC *inputs =
619          (const D3D12_INPUT_ELEMENT_DESC *)((uint8_t *)cached_blob->data + offset);
620 
621       assert(cached_blob->size >= offset + sizeof(*inputs) * info->input_count);
622 
623       memcpy(pipeline->templates.inputs, inputs,
624              sizeof(*inputs) * info->input_count);
625       d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, INPUT_LAYOUT, D3D12_INPUT_LAYOUT_DESC, desc);
626       desc->pInputElementDescs = pipeline->templates.inputs;
627       desc->NumElements = info->input_count;
628       offset += sizeof(*inputs) * info->input_count;
629    }
630 
631    assert(cached_blob->size == offset + util_bitcount(info->stages) * SHA1_DIGEST_LENGTH);
632 
633    u_foreach_bit(s, info->stages) {
634       uint8_t *dxil_hash = (uint8_t *)cached_blob->data + offset;
635       gl_shader_stage stage;
636 
637       D3D12_SHADER_BYTECODE *slot =
638          dzn_pipeline_get_gfx_shader_slot(stream_desc, s);
639 
640       VkResult ret =
641          dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, slot);
642       if (ret != VK_SUCCESS)
643          return ret;
644 
645       assert(stage == s);
646       offset += SHA1_DIGEST_LENGTH;
647    }
648 
649    pipeline->rast_disabled_from_missing_position = info->rast_disabled_from_missing_position;
650 
651    *cache_hit = true;
652 
653    vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
654    return VK_SUCCESS;
655 }
656 
657 static void
dzn_pipeline_cache_add_gfx_pipeline(struct dzn_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,uint32_t vertex_input_count,const uint8_t * pipeline_hash,const uint8_t * const * dxil_hashes)658 dzn_pipeline_cache_add_gfx_pipeline(struct dzn_graphics_pipeline *pipeline,
659                                     struct vk_pipeline_cache *cache,
660                                     uint32_t vertex_input_count,
661                                     const uint8_t *pipeline_hash,
662                                     const uint8_t *const *dxil_hashes)
663 {
664    size_t offset =
665       ALIGN_POT(sizeof(struct dzn_cached_gfx_pipeline_header), alignof(D3D12_INPUT_ELEMENT_DESC)) +
666       (sizeof(D3D12_INPUT_ELEMENT_DESC) * vertex_input_count);
667    uint32_t stages = 0;
668 
669    for (uint32_t i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
670       if (pipeline->templates.shaders[i].bc) {
671          stages |= BITFIELD_BIT(i);
672          offset += SHA1_DIGEST_LENGTH;
673       }
674    }
675 
676    struct vk_pipeline_cache_object *cache_obj =
677       dzn_cached_blob_create(cache->base.device, pipeline_hash, NULL, offset);
678    if (!cache_obj)
679       return;
680 
681    struct dzn_cached_blob *cached_blob =
682       container_of(cache_obj, struct dzn_cached_blob, base);
683 
684    offset = 0;
685    struct dzn_cached_gfx_pipeline_header *info =
686       (struct dzn_cached_gfx_pipeline_header *)(cached_blob->data);
687 
688    info->input_count = vertex_input_count;
689    info->stages = stages;
690    info->rast_disabled_from_missing_position = pipeline->rast_disabled_from_missing_position;
691 
692    offset = ALIGN_POT(offset + sizeof(*info), alignof(D3D12_INPUT_ELEMENT_DESC));
693 
694    D3D12_INPUT_ELEMENT_DESC *inputs =
695       (D3D12_INPUT_ELEMENT_DESC *)((uint8_t *)cached_blob->data + offset);
696    memcpy(inputs, pipeline->templates.inputs,
697           sizeof(*inputs) * vertex_input_count);
698    offset += sizeof(*inputs) * vertex_input_count;
699 
700    u_foreach_bit(s, stages) {
701       uint8_t *dxil_hash = (uint8_t *)cached_blob->data + offset;
702 
703       memcpy(dxil_hash, dxil_hashes[s], SHA1_DIGEST_LENGTH);
704       offset += SHA1_DIGEST_LENGTH;
705    }
706 
707    cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
708    vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
709 }
710 
711 static void
dzn_graphics_pipeline_hash_attribs(D3D12_INPUT_ELEMENT_DESC * attribs,enum pipe_format * vi_conversions,uint8_t * result)712 dzn_graphics_pipeline_hash_attribs(D3D12_INPUT_ELEMENT_DESC *attribs,
713                                    enum pipe_format *vi_conversions,
714                                    uint8_t *result)
715 {
716    struct mesa_sha1 ctx;
717 
718    _mesa_sha1_init(&ctx);
719    _mesa_sha1_update(&ctx, attribs, sizeof(*attribs) * MAX_VERTEX_GENERIC_ATTRIBS);
720    _mesa_sha1_update(&ctx, vi_conversions, sizeof(*vi_conversions) * MAX_VERTEX_GENERIC_ATTRIBS);
721    _mesa_sha1_final(&ctx, result);
722 }
723 
724 static VkResult
dzn_graphics_pipeline_compile_shaders(struct dzn_device * device,struct dzn_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,const struct dzn_pipeline_layout * layout,D3D12_PIPELINE_STATE_STREAM_DESC * out,D3D12_INPUT_ELEMENT_DESC * attribs,enum pipe_format * vi_conversions,const VkGraphicsPipelineCreateInfo * info)725 dzn_graphics_pipeline_compile_shaders(struct dzn_device *device,
726                                       struct dzn_graphics_pipeline *pipeline,
727                                       struct vk_pipeline_cache *cache,
728                                       const struct dzn_pipeline_layout *layout,
729                                       D3D12_PIPELINE_STATE_STREAM_DESC *out,
730                                       D3D12_INPUT_ELEMENT_DESC *attribs,
731                                       enum pipe_format *vi_conversions,
732                                       const VkGraphicsPipelineCreateInfo *info)
733 {
734    struct dzn_physical_device *pdev =
735       container_of(device->vk.physical, struct dzn_physical_device, vk);
736    const VkPipelineViewportStateCreateInfo *vp_info =
737       info->pRasterizationState->rasterizerDiscardEnable ?
738       NULL : info->pViewportState;
739    struct {
740       const VkPipelineShaderStageCreateInfo *info;
741       uint8_t spirv_hash[SHA1_DIGEST_LENGTH];
742       uint8_t dxil_hash[SHA1_DIGEST_LENGTH];
743       uint8_t nir_hash[SHA1_DIGEST_LENGTH];
744       uint8_t link_hashes[SHA1_DIGEST_LENGTH][2];
745    } stages[MESA_VULKAN_SHADER_STAGES] = { 0 };
746    const uint8_t *dxil_hashes[MESA_VULKAN_SHADER_STAGES] = { 0 };
747    uint8_t attribs_hash[SHA1_DIGEST_LENGTH];
748    uint8_t pipeline_hash[SHA1_DIGEST_LENGTH];
749    gl_shader_stage last_raster_stage = MESA_SHADER_NONE;
750    uint32_t active_stage_mask = 0;
751    VkResult ret;
752 
753    /* First step: collect stage info in a table indexed by gl_shader_stage
754     * so we can iterate over stages in pipeline order or reverse pipeline
755     * order.
756     */
757    for (uint32_t i = 0; i < info->stageCount; i++) {
758       gl_shader_stage stage =
759          vk_to_mesa_shader_stage(info->pStages[i].stage);
760 
761       assert(stage <= MESA_SHADER_FRAGMENT);
762 
763       if ((stage == MESA_SHADER_VERTEX ||
764            stage == MESA_SHADER_TESS_EVAL ||
765            stage == MESA_SHADER_GEOMETRY) &&
766           last_raster_stage < stage)
767          last_raster_stage = stage;
768 
769       if (stage == MESA_SHADER_FRAGMENT &&
770           info->pRasterizationState &&
771           (info->pRasterizationState->rasterizerDiscardEnable ||
772            info->pRasterizationState->cullMode == VK_CULL_MODE_FRONT_AND_BACK)) {
773          /* Disable rasterization (AKA leave fragment shader NULL) when
774           * front+back culling or discard is set.
775           */
776          continue;
777       }
778 
779       stages[stage].info = &info->pStages[i];
780       active_stage_mask |= BITFIELD_BIT(stage);
781    }
782 
783    pipeline->use_gs_for_polygon_mode_point =
784       info->pRasterizationState &&
785       info->pRasterizationState->polygonMode == VK_POLYGON_MODE_POINT &&
786       !(active_stage_mask & (1 << MESA_SHADER_GEOMETRY));
787    if (pipeline->use_gs_for_polygon_mode_point)
788       last_raster_stage = MESA_SHADER_GEOMETRY;
789 
790    enum dxil_spirv_yz_flip_mode yz_flip_mode = DXIL_SPIRV_YZ_FLIP_NONE;
791    uint16_t y_flip_mask = 0, z_flip_mask = 0;
792    bool lower_view_index =
793       !pipeline->multiview.native_view_instancing &&
794       pipeline->multiview.view_mask > 1;
795 
796    if (pipeline->vp.dynamic) {
797       yz_flip_mode = DXIL_SPIRV_YZ_FLIP_CONDITIONAL;
798    } else if (vp_info) {
799       for (uint32_t i = 0; vp_info->pViewports && i < vp_info->viewportCount; i++) {
800          if (vp_info->pViewports[i].height > 0)
801             y_flip_mask |= BITFIELD_BIT(i);
802 
803          if (vp_info->pViewports[i].minDepth > vp_info->pViewports[i].maxDepth)
804             z_flip_mask |= BITFIELD_BIT(i);
805       }
806 
807       if (y_flip_mask && z_flip_mask)
808          yz_flip_mode = DXIL_SPIRV_YZ_FLIP_UNCONDITIONAL;
809       else if (z_flip_mask)
810          yz_flip_mode = DXIL_SPIRV_Z_FLIP_UNCONDITIONAL;
811       else if (y_flip_mask)
812          yz_flip_mode = DXIL_SPIRV_Y_FLIP_UNCONDITIONAL;
813    }
814 
815    bool force_sample_rate_shading =
816       !info->pRasterizationState->rasterizerDiscardEnable &&
817       info->pMultisampleState &&
818       info->pMultisampleState->sampleShadingEnable;
819 
820    if (cache) {
821       dzn_graphics_pipeline_hash_attribs(attribs, vi_conversions, attribs_hash);
822 
823       struct mesa_sha1 pipeline_hash_ctx;
824 
825       _mesa_sha1_init(&pipeline_hash_ctx);
826       _mesa_sha1_update(&pipeline_hash_ctx, &device->bindless, sizeof(device->bindless));
827       _mesa_sha1_update(&pipeline_hash_ctx, attribs_hash, sizeof(attribs_hash));
828       _mesa_sha1_update(&pipeline_hash_ctx, &yz_flip_mode, sizeof(yz_flip_mode));
829       _mesa_sha1_update(&pipeline_hash_ctx, &y_flip_mask, sizeof(y_flip_mask));
830       _mesa_sha1_update(&pipeline_hash_ctx, &z_flip_mask, sizeof(z_flip_mask));
831       _mesa_sha1_update(&pipeline_hash_ctx, &force_sample_rate_shading, sizeof(force_sample_rate_shading));
832       _mesa_sha1_update(&pipeline_hash_ctx, &lower_view_index, sizeof(lower_view_index));
833       _mesa_sha1_update(&pipeline_hash_ctx, &pipeline->use_gs_for_polygon_mode_point, sizeof(pipeline->use_gs_for_polygon_mode_point));
834 
835       u_foreach_bit(stage, active_stage_mask) {
836          const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *subgroup_size =
837             (const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *)
838             vk_find_struct_const(stages[stage].info->pNext, PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO);
839          enum gl_subgroup_size subgroup_enum = subgroup_size && subgroup_size->requiredSubgroupSize >= 8 ?
840             subgroup_size->requiredSubgroupSize : SUBGROUP_SIZE_FULL_SUBGROUPS;
841 
842          vk_pipeline_hash_shader_stage(stages[stage].info, NULL, stages[stage].spirv_hash);
843          _mesa_sha1_update(&pipeline_hash_ctx, &subgroup_enum, sizeof(subgroup_enum));
844          _mesa_sha1_update(&pipeline_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash));
845          _mesa_sha1_update(&pipeline_hash_ctx, layout->stages[stage].hash, sizeof(layout->stages[stage].hash));
846       }
847       _mesa_sha1_final(&pipeline_hash_ctx, pipeline_hash);
848 
849       bool cache_hit;
850       ret = dzn_pipeline_cache_lookup_gfx_pipeline(pipeline, cache, pipeline_hash,
851                                                    &cache_hit);
852       if (ret != VK_SUCCESS)
853          return ret;
854 
855       if (cache_hit)
856          return VK_SUCCESS;
857    }
858 
859    /* Second step: get NIR shaders for all stages. */
860    nir_shader_compiler_options nir_opts;
861    unsigned supported_bit_sizes = (pdev->options4.Native16BitShaderOpsSupported ? 16 : 0) | 32 | 64;
862    dxil_get_nir_compiler_options(&nir_opts, dzn_get_shader_model(pdev), supported_bit_sizes, supported_bit_sizes);
863    nir_opts.lower_base_vertex = true;
864    u_foreach_bit(stage, active_stage_mask) {
865       struct mesa_sha1 nir_hash_ctx;
866 
867       const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *subgroup_size =
868          (const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *)
869          vk_find_struct_const(stages[stage].info->pNext, PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO);
870       enum gl_subgroup_size subgroup_enum = subgroup_size && subgroup_size->requiredSubgroupSize >= 8 ?
871          subgroup_size->requiredSubgroupSize : SUBGROUP_SIZE_FULL_SUBGROUPS;
872 
873       if (cache) {
874          _mesa_sha1_init(&nir_hash_ctx);
875          _mesa_sha1_update(&nir_hash_ctx, &device->bindless, sizeof(device->bindless));
876          if (stage != MESA_SHADER_FRAGMENT) {
877             _mesa_sha1_update(&nir_hash_ctx, &lower_view_index, sizeof(lower_view_index));
878             _mesa_sha1_update(&nir_hash_ctx, &force_sample_rate_shading, sizeof(force_sample_rate_shading));
879          }
880          if (stage == MESA_SHADER_VERTEX)
881             _mesa_sha1_update(&nir_hash_ctx, attribs_hash, sizeof(attribs_hash));
882          if (stage == last_raster_stage) {
883             _mesa_sha1_update(&nir_hash_ctx, &yz_flip_mode, sizeof(yz_flip_mode));
884             _mesa_sha1_update(&nir_hash_ctx, &y_flip_mask, sizeof(y_flip_mask));
885             _mesa_sha1_update(&nir_hash_ctx, &z_flip_mask, sizeof(z_flip_mask));
886             _mesa_sha1_update(&nir_hash_ctx, &lower_view_index, sizeof(lower_view_index));
887          }
888          _mesa_sha1_update(&nir_hash_ctx, &subgroup_enum, sizeof(subgroup_enum));
889          _mesa_sha1_update(&nir_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash));
890          _mesa_sha1_final(&nir_hash_ctx, stages[stage].nir_hash);
891       }
892 
893       struct dzn_nir_options options = {
894          .yz_flip_mode = stage == last_raster_stage ? yz_flip_mode : DXIL_SPIRV_YZ_FLIP_NONE,
895          .y_flip_mask = y_flip_mask,
896          .z_flip_mask = z_flip_mask,
897          .force_sample_rate_shading = stage == MESA_SHADER_FRAGMENT ? force_sample_rate_shading : false,
898          .lower_view_index = lower_view_index,
899          .lower_view_index_to_rt_layer = stage == last_raster_stage ? lower_view_index : false,
900          .vi_conversions = vi_conversions,
901          .nir_opts = &nir_opts,
902          .subgroup_size = subgroup_enum,
903       };
904 
905       ret = dzn_pipeline_get_nir_shader(device, layout,
906                                         cache, stages[stage].nir_hash,
907                                         stages[stage].info, stage,
908                                         &options,
909                                         &pipeline->templates.shaders[stage].nir);
910       if (ret != VK_SUCCESS)
911          return ret;
912    }
913 
914    if (pipeline->use_gs_for_polygon_mode_point) {
915       /* TODO: Cache; handle TES */
916       struct dzn_nir_point_gs_info gs_info = {
917          .cull_mode = info->pRasterizationState->cullMode,
918          .front_ccw = info->pRasterizationState->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE,
919          .depth_bias = info->pRasterizationState->depthBiasEnable,
920          .depth_bias_dynamic = pipeline->zsa.dynamic_depth_bias,
921          .ds_fmt = pipeline->zsa.ds_fmt,
922          .constant_depth_bias = info->pRasterizationState->depthBiasConstantFactor,
923          .slope_scaled_depth_bias = info->pRasterizationState->depthBiasSlopeFactor,
924          .depth_bias_clamp = info->pRasterizationState->depthBiasClamp,
925          .runtime_data_cbv = {
926             .register_space = DZN_REGISTER_SPACE_SYSVALS,
927             .base_shader_register = 0,
928          }
929       };
930       pipeline->templates.shaders[MESA_SHADER_GEOMETRY].nir =
931          dzn_nir_polygon_point_mode_gs(pipeline->templates.shaders[MESA_SHADER_VERTEX].nir,
932                                        &gs_info);
933 
934       struct dxil_spirv_runtime_conf conf = {
935          .runtime_data_cbv = {
936             .register_space = DZN_REGISTER_SPACE_SYSVALS,
937             .base_shader_register = 0,
938          },
939          .yz_flip = {
940             .mode = yz_flip_mode,
941             .y_mask = y_flip_mask,
942             .z_mask = z_flip_mask,
943          },
944       };
945 
946       bool requires_runtime_data;
947       NIR_PASS_V(pipeline->templates.shaders[MESA_SHADER_GEOMETRY].nir, dxil_spirv_nir_lower_yz_flip,
948                  &conf, &requires_runtime_data);
949 
950       active_stage_mask |= (1 << MESA_SHADER_GEOMETRY);
951       memcpy(stages[MESA_SHADER_GEOMETRY].spirv_hash, stages[MESA_SHADER_VERTEX].spirv_hash, SHA1_DIGEST_LENGTH);
952 
953       if ((active_stage_mask & (1 << MESA_SHADER_FRAGMENT)) &&
954           BITSET_TEST(pipeline->templates.shaders[MESA_SHADER_FRAGMENT].nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE))
955          NIR_PASS_V(pipeline->templates.shaders[MESA_SHADER_FRAGMENT].nir, dxil_nir_forward_front_face);
956    }
957 
958    /* Third step: link those NIR shaders. We iterate in reverse order
959     * so we can eliminate outputs that are never read by the next stage.
960     */
961    uint32_t link_mask = active_stage_mask;
962    while (link_mask != 0) {
963       gl_shader_stage stage = util_last_bit(link_mask) - 1;
964       link_mask &= ~BITFIELD_BIT(stage);
965       gl_shader_stage prev_stage = util_last_bit(link_mask) - 1;
966 
967       struct dxil_spirv_runtime_conf conf = {
968          .runtime_data_cbv = {
969             .register_space = DZN_REGISTER_SPACE_SYSVALS,
970             .base_shader_register = 0,
971       }};
972 
973       assert(pipeline->templates.shaders[stage].nir);
974       bool requires_runtime_data;
975       dxil_spirv_nir_link(pipeline->templates.shaders[stage].nir,
976                           prev_stage != MESA_SHADER_NONE ?
977                           pipeline->templates.shaders[prev_stage].nir : NULL,
978                           &conf, &requires_runtime_data);
979 
980       if (prev_stage != MESA_SHADER_NONE) {
981          memcpy(stages[stage].link_hashes[0], stages[prev_stage].spirv_hash, SHA1_DIGEST_LENGTH);
982          memcpy(stages[prev_stage].link_hashes[1], stages[stage].spirv_hash, SHA1_DIGEST_LENGTH);
983       }
984    }
985 
986    u_foreach_bit(stage, active_stage_mask) {
987       uint8_t bindings_hash[SHA1_DIGEST_LENGTH];
988 
989       NIR_PASS_V(pipeline->templates.shaders[stage].nir, adjust_var_bindings, device, layout,
990                  cache ? bindings_hash : NULL);
991 
992       if (cache) {
993          struct mesa_sha1 dxil_hash_ctx;
994 
995          _mesa_sha1_init(&dxil_hash_ctx);
996          _mesa_sha1_update(&dxil_hash_ctx, stages[stage].nir_hash, sizeof(stages[stage].nir_hash));
997          _mesa_sha1_update(&dxil_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash));
998          _mesa_sha1_update(&dxil_hash_ctx, stages[stage].link_hashes[0], sizeof(stages[stage].link_hashes[0]));
999          _mesa_sha1_update(&dxil_hash_ctx, stages[stage].link_hashes[1], sizeof(stages[stage].link_hashes[1]));
1000          _mesa_sha1_update(&dxil_hash_ctx, bindings_hash, sizeof(bindings_hash));
1001          _mesa_sha1_final(&dxil_hash_ctx, stages[stage].dxil_hash);
1002          dxil_hashes[stage] = stages[stage].dxil_hash;
1003 
1004          gl_shader_stage cached_stage;
1005          D3D12_SHADER_BYTECODE bc;
1006          ret = dzn_pipeline_cache_lookup_dxil_shader(cache, stages[stage].dxil_hash, &cached_stage, &bc);
1007          if (ret != VK_SUCCESS)
1008             return ret;
1009 
1010          if (cached_stage != MESA_SHADER_NONE) {
1011             assert(cached_stage == stage);
1012             D3D12_SHADER_BYTECODE *slot =
1013                dzn_pipeline_get_gfx_shader_slot(out, stage);
1014             *slot = bc;
1015             pipeline->templates.shaders[stage].bc = slot;
1016          }
1017       }
1018    }
1019 
1020    uint32_t vert_input_count = 0;
1021    if (pipeline->templates.shaders[MESA_SHADER_VERTEX].nir) {
1022       /* Now, declare one D3D12_INPUT_ELEMENT_DESC per VS input variable, so
1023        * we can handle location overlaps properly.
1024        */
1025       nir_foreach_shader_in_variable(var, pipeline->templates.shaders[MESA_SHADER_VERTEX].nir) {
1026          assert(var->data.location >= VERT_ATTRIB_GENERIC0);
1027          unsigned loc = var->data.location - VERT_ATTRIB_GENERIC0;
1028          assert(vert_input_count < D3D12_VS_INPUT_REGISTER_COUNT);
1029          assert(loc < MAX_VERTEX_GENERIC_ATTRIBS);
1030 
1031          pipeline->templates.inputs[vert_input_count] = attribs[loc];
1032          pipeline->templates.inputs[vert_input_count].SemanticIndex = vert_input_count;
1033          var->data.driver_location = vert_input_count++;
1034       }
1035 
1036       if (vert_input_count > 0) {
1037          d3d12_gfx_pipeline_state_stream_new_desc(out, INPUT_LAYOUT, D3D12_INPUT_LAYOUT_DESC, desc);
1038          desc->pInputElementDescs = pipeline->templates.inputs;
1039          desc->NumElements = vert_input_count;
1040       }
1041    }
1042 
1043    /* Last step: translate NIR shaders into DXIL modules */
1044    u_foreach_bit(stage, active_stage_mask) {
1045       gl_shader_stage prev_stage =
1046          util_last_bit(active_stage_mask & BITFIELD_MASK(stage)) - 1;
1047       uint32_t prev_stage_output_clip_size = 0;
1048       if (stage == MESA_SHADER_FRAGMENT) {
1049          /* Disable rasterization if the last geometry stage doesn't
1050           * write the position.
1051           */
1052          if (prev_stage == MESA_SHADER_NONE ||
1053              !(pipeline->templates.shaders[prev_stage].nir->info.outputs_written & VARYING_BIT_POS)) {
1054             pipeline->rast_disabled_from_missing_position = true;
1055             /* Clear a cache hit if there was one. */
1056             pipeline->templates.shaders[stage].bc = NULL;
1057             continue;
1058          }
1059       } else if (prev_stage != MESA_SHADER_NONE) {
1060          prev_stage_output_clip_size = pipeline->templates.shaders[prev_stage].nir->info.clip_distance_array_size;
1061       }
1062 
1063       /* Cache hit, we can skip the compilation. */
1064       if (pipeline->templates.shaders[stage].bc)
1065          continue;
1066 
1067       D3D12_SHADER_BYTECODE *slot =
1068          dzn_pipeline_get_gfx_shader_slot(out, stage);
1069 
1070       ret = dzn_pipeline_compile_shader(device, pipeline->templates.shaders[stage].nir, prev_stage_output_clip_size, slot);
1071       if (ret != VK_SUCCESS)
1072          return ret;
1073 
1074       pipeline->templates.shaders[stage].bc = slot;
1075 
1076       if (cache)
1077          dzn_pipeline_cache_add_dxil_shader(cache, stages[stage].dxil_hash, stage, slot);
1078    }
1079 
1080    if (cache)
1081       dzn_pipeline_cache_add_gfx_pipeline(pipeline, cache, vert_input_count, pipeline_hash,
1082                                           dxil_hashes);
1083 
1084    return VK_SUCCESS;
1085 }
1086 
1087 VkFormat
dzn_graphics_pipeline_patch_vi_format(VkFormat format)1088 dzn_graphics_pipeline_patch_vi_format(VkFormat format)
1089 {
1090    switch (format) {
1091    case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
1092    case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1093    case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
1094    case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
1095    case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
1096    case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
1097    case VK_FORMAT_A2B10G10R10_USCALED_PACK32:
1098       return VK_FORMAT_R32_UINT;
1099    case VK_FORMAT_R8G8B8A8_SSCALED:
1100       return VK_FORMAT_R8G8B8A8_SINT;
1101    case VK_FORMAT_R8G8B8A8_USCALED:
1102       return VK_FORMAT_R8G8B8A8_UINT;
1103    case VK_FORMAT_R16G16B16A16_USCALED:
1104       return VK_FORMAT_R16G16B16A16_UINT;
1105    case VK_FORMAT_R16G16B16A16_SSCALED:
1106       return VK_FORMAT_R16G16B16A16_SINT;
1107    default:
1108       return format;
1109    }
1110 }
1111 
1112 static VkResult
dzn_graphics_pipeline_translate_vi(struct dzn_graphics_pipeline * pipeline,const VkGraphicsPipelineCreateInfo * in,D3D12_INPUT_ELEMENT_DESC * inputs,enum pipe_format * vi_conversions)1113 dzn_graphics_pipeline_translate_vi(struct dzn_graphics_pipeline *pipeline,
1114                                    const VkGraphicsPipelineCreateInfo *in,
1115                                    D3D12_INPUT_ELEMENT_DESC *inputs,
1116                                    enum pipe_format *vi_conversions)
1117 {
1118    const VkPipelineVertexInputStateCreateInfo *in_vi =
1119       in->pVertexInputState;
1120    const VkPipelineVertexInputDivisorStateCreateInfoEXT *divisors =
1121       (const VkPipelineVertexInputDivisorStateCreateInfoEXT *)
1122       vk_find_struct_const(in_vi, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
1123 
1124    if (!in_vi->vertexAttributeDescriptionCount)
1125       return VK_SUCCESS;
1126 
1127    D3D12_INPUT_CLASSIFICATION slot_class[MAX_VBS];
1128 
1129    pipeline->vb.count = 0;
1130    for (uint32_t i = 0; i < in_vi->vertexBindingDescriptionCount; i++) {
1131       const struct VkVertexInputBindingDescription *bdesc =
1132          &in_vi->pVertexBindingDescriptions[i];
1133 
1134       pipeline->vb.count = MAX2(pipeline->vb.count, bdesc->binding + 1);
1135       pipeline->vb.strides[bdesc->binding] = bdesc->stride;
1136       if (bdesc->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) {
1137          slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
1138       } else {
1139          assert(bdesc->inputRate == VK_VERTEX_INPUT_RATE_VERTEX);
1140          slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
1141       }
1142    }
1143 
1144    for (uint32_t i = 0; i < in_vi->vertexAttributeDescriptionCount; i++) {
1145       const VkVertexInputAttributeDescription *attr =
1146          &in_vi->pVertexAttributeDescriptions[i];
1147       const VkVertexInputBindingDivisorDescriptionEXT *divisor = NULL;
1148 
1149       if (slot_class[attr->binding] == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA &&
1150           divisors) {
1151          for (uint32_t d = 0; d < divisors->vertexBindingDivisorCount; d++) {
1152             if (attr->binding == divisors->pVertexBindingDivisors[d].binding) {
1153                divisor = &divisors->pVertexBindingDivisors[d];
1154                break;
1155             }
1156          }
1157       }
1158 
1159       VkFormat patched_format = dzn_graphics_pipeline_patch_vi_format(attr->format);
1160       if (patched_format != attr->format)
1161          vi_conversions[attr->location] = vk_format_to_pipe_format(attr->format);
1162 
1163       /* nir_to_dxil() name all vertex inputs as TEXCOORDx */
1164       inputs[attr->location] = (D3D12_INPUT_ELEMENT_DESC) {
1165          .SemanticName = "TEXCOORD",
1166          .Format = dzn_buffer_get_dxgi_format(patched_format),
1167          .InputSlot = attr->binding,
1168          .InputSlotClass = slot_class[attr->binding],
1169          .InstanceDataStepRate =
1170             divisor ? divisor->divisor :
1171             slot_class[attr->binding] == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA ? 1 : 0,
1172          .AlignedByteOffset = attr->offset,
1173       };
1174    }
1175 
1176    return VK_SUCCESS;
1177 }
1178 
1179 static D3D12_PRIMITIVE_TOPOLOGY_TYPE
to_prim_topology_type(VkPrimitiveTopology in)1180 to_prim_topology_type(VkPrimitiveTopology in)
1181 {
1182    switch (in) {
1183    case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
1184       return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
1185    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
1186    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
1187    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
1188    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
1189       return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
1190    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
1191    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
1192    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
1193    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
1194    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
1195       return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
1196    case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
1197       return D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH;
1198    default: unreachable("Invalid primitive topology");
1199    }
1200 }
1201 
1202 static D3D12_PRIMITIVE_TOPOLOGY
to_prim_topology(VkPrimitiveTopology in,unsigned patch_control_points,bool support_triangle_fan)1203 to_prim_topology(VkPrimitiveTopology in, unsigned patch_control_points, bool support_triangle_fan)
1204 {
1205    switch (in) {
1206    case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: return D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
1207    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: return D3D_PRIMITIVE_TOPOLOGY_LINELIST;
1208    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
1209    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;
1210    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ;
1211    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
1212    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
1213    /* Triangle fans are emulated using an intermediate index buffer. */
1214    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: return support_triangle_fan ?
1215       D3D_PRIMITIVE_TOPOLOGY_TRIANGLEFAN : D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
1216    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ;
1217    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ;
1218    case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
1219       assert(patch_control_points);
1220       return (D3D12_PRIMITIVE_TOPOLOGY)(D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + patch_control_points - 1);
1221    default: unreachable("Invalid primitive topology");
1222    }
1223 }
1224 
1225 static VkResult
dzn_graphics_pipeline_translate_ia(struct dzn_device * device,struct dzn_graphics_pipeline * pipeline,D3D12_PIPELINE_STATE_STREAM_DESC * out,const VkGraphicsPipelineCreateInfo * in)1226 dzn_graphics_pipeline_translate_ia(struct dzn_device *device,
1227                                    struct dzn_graphics_pipeline *pipeline,
1228                                    D3D12_PIPELINE_STATE_STREAM_DESC *out,
1229                                    const VkGraphicsPipelineCreateInfo *in)
1230 {
1231    struct dzn_physical_device *pdev =
1232       container_of(device->vk.physical, struct dzn_physical_device, vk);
1233    const VkPipelineInputAssemblyStateCreateInfo *in_ia =
1234       in->pInputAssemblyState;
1235    bool has_tes = false;
1236    for (uint32_t i = 0; i < in->stageCount; i++) {
1237       if (in->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT ||
1238           in->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) {
1239          has_tes = true;
1240          break;
1241       }
1242    }
1243    const VkPipelineTessellationStateCreateInfo *in_tes =
1244       has_tes ? in->pTessellationState : NULL;
1245    VkResult ret = VK_SUCCESS;
1246 
1247    d3d12_gfx_pipeline_state_stream_new_desc(out, PRIMITIVE_TOPOLOGY, D3D12_PRIMITIVE_TOPOLOGY_TYPE, prim_top_type);
1248    *prim_top_type = to_prim_topology_type(in_ia->topology);
1249    pipeline->ia.triangle_fan = in_ia->topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN && !pdev->options15.TriangleFanSupported;
1250    pipeline->ia.topology =
1251       to_prim_topology(in_ia->topology, in_tes ? in_tes->patchControlPoints : 0,
1252                        pdev->options15.TriangleFanSupported);
1253 
1254    if (in_ia->primitiveRestartEnable) {
1255       d3d12_gfx_pipeline_state_stream_new_desc(out, IB_STRIP_CUT_VALUE, D3D12_INDEX_BUFFER_STRIP_CUT_VALUE, ib_strip_cut);
1256       pipeline->templates.desc_offsets.ib_strip_cut =
1257          (uintptr_t)ib_strip_cut - (uintptr_t)out->pPipelineStateSubobjectStream;
1258       *ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;
1259       ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline);
1260    }
1261 
1262    return ret;
1263 }
1264 
1265 static D3D12_FILL_MODE
translate_polygon_mode(VkPolygonMode in)1266 translate_polygon_mode(VkPolygonMode in)
1267 {
1268    switch (in) {
1269    case VK_POLYGON_MODE_FILL: return D3D12_FILL_MODE_SOLID;
1270    case VK_POLYGON_MODE_LINE: return D3D12_FILL_MODE_WIREFRAME;
1271    case VK_POLYGON_MODE_POINT:
1272       /* This is handled elsewhere */
1273       return D3D12_FILL_MODE_SOLID;
1274    default: unreachable("Unsupported polygon mode");
1275    }
1276 }
1277 
1278 static D3D12_CULL_MODE
translate_cull_mode(VkCullModeFlags in)1279 translate_cull_mode(VkCullModeFlags in)
1280 {
1281    switch (in) {
1282    case VK_CULL_MODE_NONE: return D3D12_CULL_MODE_NONE;
1283    case VK_CULL_MODE_FRONT_BIT: return D3D12_CULL_MODE_FRONT;
1284    case VK_CULL_MODE_BACK_BIT: return D3D12_CULL_MODE_BACK;
1285    /* Front+back face culling is equivalent to 'rasterization disabled' */
1286    case VK_CULL_MODE_FRONT_AND_BACK: return D3D12_CULL_MODE_NONE;
1287    default: unreachable("Unsupported cull mode");
1288    }
1289 }
1290 
1291 static int32_t
translate_depth_bias(double depth_bias)1292 translate_depth_bias(double depth_bias)
1293 {
1294    if (depth_bias > INT32_MAX)
1295       return INT32_MAX;
1296    else if (depth_bias < INT32_MIN)
1297       return INT32_MIN;
1298 
1299    return depth_bias;
1300 }
1301 
1302 static void
dzn_graphics_pipeline_translate_rast(struct dzn_device * device,struct dzn_graphics_pipeline * pipeline,D3D12_PIPELINE_STATE_STREAM_DESC * out,const VkGraphicsPipelineCreateInfo * in)1303 dzn_graphics_pipeline_translate_rast(struct dzn_device *device,
1304                                      struct dzn_graphics_pipeline *pipeline,
1305                                      D3D12_PIPELINE_STATE_STREAM_DESC *out,
1306                                      const VkGraphicsPipelineCreateInfo *in)
1307 {
1308    struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
1309    const VkPipelineRasterizationStateCreateInfo *in_rast =
1310       in->pRasterizationState;
1311    const VkPipelineViewportStateCreateInfo *in_vp =
1312       in_rast->rasterizerDiscardEnable ? NULL : in->pViewportState;
1313    const VkPipelineMultisampleStateCreateInfo *in_ms =
1314       in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState;
1315 
1316    if (in_vp) {
1317       pipeline->vp.count = in_vp->viewportCount;
1318       if (in_vp->pViewports) {
1319          for (uint32_t i = 0; in_vp->pViewports && i < in_vp->viewportCount; i++)
1320             dzn_translate_viewport(&pipeline->vp.desc[i], &in_vp->pViewports[i]);
1321       }
1322 
1323       pipeline->scissor.count = in_vp->scissorCount;
1324       if (in_vp->pScissors) {
1325          for (uint32_t i = 0; i < in_vp->scissorCount; i++)
1326             dzn_translate_rect(&pipeline->scissor.desc[i], &in_vp->pScissors[i]);
1327       }
1328    }
1329 
1330    if (pdev->options19.NarrowQuadrilateralLinesSupported) {
1331       assert(pdev->options16.DynamicDepthBiasSupported);
1332       d3d12_gfx_pipeline_state_stream_new_desc(out, RASTERIZER2, D3D12_RASTERIZER_DESC2, desc);
1333       pipeline->templates.desc_offsets.rast =
1334          (uintptr_t)desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1335       desc->DepthClipEnable = !in_rast->depthClampEnable;
1336       desc->FillMode = translate_polygon_mode(in_rast->polygonMode);
1337       desc->CullMode = translate_cull_mode(in_rast->cullMode);
1338       desc->FrontCounterClockwise =
1339          in_rast->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE;
1340       if (in_rast->depthBiasEnable) {
1341          desc->DepthBias = in_rast->depthBiasConstantFactor;
1342          desc->SlopeScaledDepthBias = in_rast->depthBiasSlopeFactor;
1343          desc->DepthBiasClamp = in_rast->depthBiasClamp;
1344       }
1345       desc->LineRasterizationMode = D3D12_LINE_RASTERIZATION_MODE_QUADRILATERAL_NARROW;
1346    } else {
1347       static_assert(sizeof(D3D12_RASTERIZER_DESC) == sizeof(D3D12_RASTERIZER_DESC1), "Casting between these");
1348       D3D12_PIPELINE_STATE_SUBOBJECT_TYPE rast_type = pdev->options16.DynamicDepthBiasSupported ?
1349          D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER1 :
1350          D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER;
1351       d3d12_pipeline_state_stream_new_desc(out, MAX_GFX_PIPELINE_STATE_STREAM_SIZE, rast_type, D3D12_RASTERIZER_DESC, desc);
1352       pipeline->templates.desc_offsets.rast =
1353          (uintptr_t)desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1354       desc->DepthClipEnable = !in_rast->depthClampEnable;
1355       desc->FillMode = translate_polygon_mode(in_rast->polygonMode);
1356       desc->CullMode = translate_cull_mode(in_rast->cullMode);
1357       desc->FrontCounterClockwise =
1358          in_rast->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE;
1359       if (in_rast->depthBiasEnable) {
1360          if (rast_type == D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER1)
1361             ((D3D12_RASTERIZER_DESC1 *)desc)->DepthBias = in_rast->depthBiasConstantFactor;
1362          else
1363             desc->DepthBias = translate_depth_bias(in_rast->depthBiasConstantFactor);
1364          desc->SlopeScaledDepthBias = in_rast->depthBiasSlopeFactor;
1365          desc->DepthBiasClamp = in_rast->depthBiasClamp;
1366       }
1367 
1368       /* The Vulkan conformance tests use different reference rasterizers for single-sampled
1369        * and multi-sampled lines. The single-sampled lines can be bresenham lines, but multi-
1370        * sampled need to be quadrilateral lines. This still isn't *quite* sufficient, because
1371        * D3D only supports a line width of 1.4 (per spec), but Vulkan requires us to support
1372        * 1.0 (and without claiming wide lines, that's all we can support).
1373        */
1374       if (in_ms && in_ms->rasterizationSamples > 1)
1375          desc->MultisampleEnable = true;
1376    }
1377 
1378    assert(in_rast->lineWidth == 1.0f);
1379 }
1380 
1381 static void
dzn_graphics_pipeline_translate_ms(struct dzn_graphics_pipeline * pipeline,D3D12_PIPELINE_STATE_STREAM_DESC * out,const VkGraphicsPipelineCreateInfo * in)1382 dzn_graphics_pipeline_translate_ms(struct dzn_graphics_pipeline *pipeline,
1383                                    D3D12_PIPELINE_STATE_STREAM_DESC *out,
1384                                    const VkGraphicsPipelineCreateInfo *in)
1385 {
1386    const VkPipelineRasterizationStateCreateInfo *in_rast =
1387       in->pRasterizationState;
1388    const VkPipelineMultisampleStateCreateInfo *in_ms =
1389       in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState;
1390 
1391    if (!in_ms)
1392       return;
1393 
1394    /* TODO: minSampleShading (use VRS), alphaToOneEnable */
1395    d3d12_gfx_pipeline_state_stream_new_desc(out, SAMPLE_DESC, DXGI_SAMPLE_DESC, desc);
1396    desc->Count = in_ms ? in_ms->rasterizationSamples : 1;
1397    desc->Quality = 0;
1398 
1399    if (!in_ms->pSampleMask)
1400       return;
1401 
1402    d3d12_gfx_pipeline_state_stream_new_desc(out, SAMPLE_MASK, UINT, mask);
1403    *mask = *in_ms->pSampleMask;
1404 }
1405 
1406 static D3D12_STENCIL_OP
translate_stencil_op(VkStencilOp in)1407 translate_stencil_op(VkStencilOp in)
1408 {
1409    switch (in) {
1410    case VK_STENCIL_OP_KEEP: return D3D12_STENCIL_OP_KEEP;
1411    case VK_STENCIL_OP_ZERO: return D3D12_STENCIL_OP_ZERO;
1412    case VK_STENCIL_OP_REPLACE: return D3D12_STENCIL_OP_REPLACE;
1413    case VK_STENCIL_OP_INCREMENT_AND_CLAMP: return D3D12_STENCIL_OP_INCR_SAT;
1414    case VK_STENCIL_OP_DECREMENT_AND_CLAMP: return D3D12_STENCIL_OP_DECR_SAT;
1415    case VK_STENCIL_OP_INCREMENT_AND_WRAP: return D3D12_STENCIL_OP_INCR;
1416    case VK_STENCIL_OP_DECREMENT_AND_WRAP: return D3D12_STENCIL_OP_DECR;
1417    case VK_STENCIL_OP_INVERT: return D3D12_STENCIL_OP_INVERT;
1418    default: unreachable("Invalid stencil op");
1419    }
1420 }
1421 
1422 static void
translate_stencil_test(struct dzn_graphics_pipeline * pipeline,D3D12_DEPTH_STENCIL_DESC2 * out,const VkGraphicsPipelineCreateInfo * in)1423 translate_stencil_test(struct dzn_graphics_pipeline *pipeline,
1424                        D3D12_DEPTH_STENCIL_DESC2 *out,
1425                        const VkGraphicsPipelineCreateInfo *in)
1426 {
1427    const VkPipelineDepthStencilStateCreateInfo *in_zsa =
1428       in->pDepthStencilState;
1429 
1430    bool front_test_uses_ref =
1431       !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) &&
1432       in_zsa->front.compareOp != VK_COMPARE_OP_NEVER &&
1433       in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS &&
1434       (pipeline->zsa.stencil_test.dynamic_compare_mask ||
1435        in_zsa->front.compareMask != 0);
1436    bool back_test_uses_ref =
1437       !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) &&
1438       in_zsa->back.compareOp != VK_COMPARE_OP_NEVER &&
1439       in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS &&
1440       (pipeline->zsa.stencil_test.dynamic_compare_mask ||
1441        in_zsa->back.compareMask != 0);
1442 
1443    if (front_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask)
1444       pipeline->zsa.stencil_test.front.compare_mask = UINT32_MAX;
1445    else if (front_test_uses_ref)
1446       pipeline->zsa.stencil_test.front.compare_mask = in_zsa->front.compareMask;
1447    else
1448       pipeline->zsa.stencil_test.front.compare_mask = 0;
1449 
1450    if (back_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask)
1451       pipeline->zsa.stencil_test.back.compare_mask = UINT32_MAX;
1452    else if (back_test_uses_ref)
1453       pipeline->zsa.stencil_test.back.compare_mask = in_zsa->back.compareMask;
1454    else
1455       pipeline->zsa.stencil_test.back.compare_mask = 0;
1456 
1457    bool back_wr_uses_ref =
1458       !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) &&
1459       ((in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS &&
1460         in_zsa->back.failOp == VK_STENCIL_OP_REPLACE) ||
1461        (in_zsa->back.compareOp != VK_COMPARE_OP_NEVER &&
1462         (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) &&
1463         in_zsa->back.passOp == VK_STENCIL_OP_REPLACE) ||
1464        (in_zsa->depthTestEnable &&
1465         in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS &&
1466         in_zsa->back.depthFailOp == VK_STENCIL_OP_REPLACE));
1467    bool front_wr_uses_ref =
1468       !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) &&
1469       ((in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS &&
1470         in_zsa->front.failOp == VK_STENCIL_OP_REPLACE) ||
1471        (in_zsa->front.compareOp != VK_COMPARE_OP_NEVER &&
1472         (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) &&
1473         in_zsa->front.passOp == VK_STENCIL_OP_REPLACE) ||
1474        (in_zsa->depthTestEnable &&
1475         in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS &&
1476         in_zsa->front.depthFailOp == VK_STENCIL_OP_REPLACE));
1477 
1478    pipeline->zsa.stencil_test.front.write_mask =
1479       (pipeline->zsa.stencil_test.dynamic_write_mask ||
1480        (in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT)) ?
1481       0 : in_zsa->front.writeMask;
1482    pipeline->zsa.stencil_test.back.write_mask =
1483       (pipeline->zsa.stencil_test.dynamic_write_mask ||
1484        (in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT)) ?
1485       0 : in_zsa->back.writeMask;
1486 
1487    pipeline->zsa.stencil_test.front.uses_ref = front_test_uses_ref || front_wr_uses_ref;
1488    pipeline->zsa.stencil_test.back.uses_ref = back_test_uses_ref || back_wr_uses_ref;
1489 
1490    pipeline->zsa.stencil_test.front.ref =
1491       pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->front.reference;
1492    pipeline->zsa.stencil_test.back.ref =
1493       pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->back.reference;
1494 
1495    out->FrontFace.StencilReadMask = pipeline->zsa.stencil_test.front.compare_mask;
1496    out->BackFace.StencilReadMask = pipeline->zsa.stencil_test.back.compare_mask;
1497    out->FrontFace.StencilWriteMask = pipeline->zsa.stencil_test.front.write_mask;
1498    out->BackFace.StencilWriteMask = pipeline->zsa.stencil_test.back.write_mask;
1499 }
1500 
1501 static void
dzn_graphics_pipeline_translate_zsa(struct dzn_device * device,struct dzn_graphics_pipeline * pipeline,D3D12_PIPELINE_STATE_STREAM_DESC * out,const VkGraphicsPipelineCreateInfo * in)1502 dzn_graphics_pipeline_translate_zsa(struct dzn_device *device,
1503                                     struct dzn_graphics_pipeline *pipeline,
1504                                     D3D12_PIPELINE_STATE_STREAM_DESC *out,
1505                                     const VkGraphicsPipelineCreateInfo *in)
1506 {
1507    struct dzn_physical_device *pdev =
1508       container_of(device->vk.physical, struct dzn_physical_device, vk);
1509 
1510    const VkPipelineRasterizationStateCreateInfo *in_rast =
1511       in->pRasterizationState;
1512    const VkPipelineDepthStencilStateCreateInfo *in_zsa =
1513       in_rast->rasterizerDiscardEnable ? NULL : in->pDepthStencilState;
1514    const VkPipelineRenderingCreateInfo *ri = vk_find_struct_const(in, PIPELINE_RENDERING_CREATE_INFO);
1515 
1516    if (!in_zsa ||
1517        in_rast->cullMode == VK_CULL_MODE_FRONT_AND_BACK) {
1518       /* Ensure depth is disabled if the rasterizer should be disabled / everything culled */
1519       if (pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
1520          d3d12_gfx_pipeline_state_stream_new_desc(out, DEPTH_STENCIL2, D3D12_DEPTH_STENCIL_DESC2, stream_desc);
1521          pipeline->templates.desc_offsets.ds = (uintptr_t)stream_desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1522          memset(stream_desc, 0, sizeof(*stream_desc));
1523       } else {
1524          d3d12_gfx_pipeline_state_stream_new_desc(out, DEPTH_STENCIL1, D3D12_DEPTH_STENCIL_DESC1, stream_desc);
1525          pipeline->templates.desc_offsets.ds = (uintptr_t)stream_desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1526          memset(stream_desc, 0, sizeof(*stream_desc));
1527       }
1528       return;
1529    }
1530 
1531    D3D12_DEPTH_STENCIL_DESC2 desc;
1532    memset(&desc, 0, sizeof(desc));
1533 
1534    bool has_no_depth = ri && ri->depthAttachmentFormat == VK_FORMAT_UNDEFINED;
1535    bool has_no_stencil = ri && ri->stencilAttachmentFormat == VK_FORMAT_UNDEFINED;
1536 
1537    desc.DepthEnable = !has_no_depth &&
1538       (in_zsa->depthTestEnable || in_zsa->depthBoundsTestEnable);
1539    if (desc.DepthEnable) {
1540       desc.DepthWriteMask =
1541          in_zsa->depthWriteEnable ?
1542          D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
1543       desc.DepthFunc =
1544          in_zsa->depthTestEnable ?
1545          dzn_translate_compare_op(in_zsa->depthCompareOp) :
1546          D3D12_COMPARISON_FUNC_ALWAYS;
1547    }
1548    pipeline->zsa.depth_bounds.enable = in_zsa->depthBoundsTestEnable;
1549    pipeline->zsa.depth_bounds.min = in_zsa->minDepthBounds;
1550    pipeline->zsa.depth_bounds.max = in_zsa->maxDepthBounds;
1551    desc.DepthBoundsTestEnable = in_zsa->depthBoundsTestEnable;
1552    desc.StencilEnable = in_zsa->stencilTestEnable && !has_no_stencil;
1553    if (desc.StencilEnable) {
1554       desc.FrontFace.StencilFailOp = translate_stencil_op(in_zsa->front.failOp);
1555       desc.FrontFace.StencilDepthFailOp = translate_stencil_op(in_zsa->front.depthFailOp);
1556       desc.FrontFace.StencilPassOp = translate_stencil_op(in_zsa->front.passOp);
1557       desc.FrontFace.StencilFunc = dzn_translate_compare_op(in_zsa->front.compareOp);
1558       desc.BackFace.StencilFailOp = translate_stencil_op(in_zsa->back.failOp);
1559       desc.BackFace.StencilDepthFailOp = translate_stencil_op(in_zsa->back.depthFailOp);
1560       desc.BackFace.StencilPassOp = translate_stencil_op(in_zsa->back.passOp);
1561       desc.BackFace.StencilFunc = dzn_translate_compare_op(in_zsa->back.compareOp);
1562 
1563       pipeline->zsa.stencil_test.enable = true;
1564 
1565       translate_stencil_test(pipeline, &desc, in);
1566    }
1567 
1568    if (pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
1569       d3d12_gfx_pipeline_state_stream_new_desc(out, DEPTH_STENCIL2, D3D12_DEPTH_STENCIL_DESC2, stream_desc);
1570       pipeline->templates.desc_offsets.ds =
1571          (uintptr_t)stream_desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1572       *stream_desc = desc;
1573    } else {
1574       d3d12_gfx_pipeline_state_stream_new_desc(out, DEPTH_STENCIL1, D3D12_DEPTH_STENCIL_DESC1, stream_desc);
1575       pipeline->templates.desc_offsets.ds =
1576          (uintptr_t)stream_desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1577 
1578       stream_desc->DepthEnable = desc.DepthEnable;
1579       stream_desc->DepthWriteMask = desc.DepthWriteMask;
1580       stream_desc->DepthFunc = desc.DepthFunc;
1581       stream_desc->DepthBoundsTestEnable = desc.DepthBoundsTestEnable;
1582       stream_desc->StencilEnable = desc.StencilEnable;
1583       stream_desc->FrontFace.StencilFailOp = desc.FrontFace.StencilFailOp;
1584       stream_desc->FrontFace.StencilDepthFailOp = desc.FrontFace.StencilDepthFailOp;
1585       stream_desc->FrontFace.StencilPassOp = desc.FrontFace.StencilPassOp;
1586       stream_desc->FrontFace.StencilFunc = desc.FrontFace.StencilFunc;
1587       stream_desc->BackFace.StencilFailOp = desc.BackFace.StencilFailOp;
1588       stream_desc->BackFace.StencilDepthFailOp = desc.BackFace.StencilDepthFailOp;
1589       stream_desc->BackFace.StencilPassOp = desc.BackFace.StencilPassOp;
1590       stream_desc->BackFace.StencilFunc = desc.BackFace.StencilFunc;
1591 
1592       /* No support for independent front/back, just pick front (if set, else back) */
1593       stream_desc->StencilReadMask = desc.FrontFace.StencilReadMask ? desc.FrontFace.StencilReadMask : desc.BackFace.StencilReadMask;
1594       stream_desc->StencilWriteMask = desc.FrontFace.StencilWriteMask ? desc.FrontFace.StencilWriteMask : desc.BackFace.StencilWriteMask;
1595    }
1596 }
1597 
1598 static D3D12_BLEND
translate_blend_factor(VkBlendFactor in,bool is_alpha,bool support_alpha_blend_factor)1599 translate_blend_factor(VkBlendFactor in, bool is_alpha, bool support_alpha_blend_factor)
1600 {
1601    switch (in) {
1602    case VK_BLEND_FACTOR_ZERO: return D3D12_BLEND_ZERO;
1603    case VK_BLEND_FACTOR_ONE: return D3D12_BLEND_ONE;
1604    case VK_BLEND_FACTOR_SRC_COLOR:
1605       return is_alpha ? D3D12_BLEND_SRC_ALPHA : D3D12_BLEND_SRC_COLOR;
1606    case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
1607       return is_alpha ? D3D12_BLEND_INV_SRC_ALPHA : D3D12_BLEND_INV_SRC_COLOR;
1608    case VK_BLEND_FACTOR_DST_COLOR:
1609       return is_alpha ? D3D12_BLEND_DEST_ALPHA : D3D12_BLEND_DEST_COLOR;
1610    case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
1611       return is_alpha ? D3D12_BLEND_INV_DEST_ALPHA : D3D12_BLEND_INV_DEST_COLOR;
1612    case VK_BLEND_FACTOR_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA;
1613    case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA;
1614    case VK_BLEND_FACTOR_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA;
1615    case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA;
1616    case VK_BLEND_FACTOR_CONSTANT_COLOR:
1617       return is_alpha && support_alpha_blend_factor ? D3D12_BLEND_ALPHA_FACTOR : D3D12_BLEND_BLEND_FACTOR;
1618    case VK_BLEND_FACTOR_CONSTANT_ALPHA:
1619       return support_alpha_blend_factor ? D3D12_BLEND_ALPHA_FACTOR : D3D12_BLEND_BLEND_FACTOR;
1620    case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
1621       return is_alpha && support_alpha_blend_factor ? D3D12_BLEND_INV_ALPHA_FACTOR : D3D12_BLEND_INV_BLEND_FACTOR;
1622    case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
1623       return support_alpha_blend_factor ? D3D12_BLEND_INV_ALPHA_FACTOR : D3D12_BLEND_INV_BLEND_FACTOR;
1624    case VK_BLEND_FACTOR_SRC1_COLOR:
1625       return is_alpha ? D3D12_BLEND_SRC1_ALPHA : D3D12_BLEND_SRC1_COLOR;
1626    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
1627       return is_alpha ? D3D12_BLEND_INV_SRC1_ALPHA : D3D12_BLEND_INV_SRC1_COLOR;
1628    case VK_BLEND_FACTOR_SRC1_ALPHA: return D3D12_BLEND_SRC1_ALPHA;
1629    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: return D3D12_BLEND_INV_SRC1_ALPHA;
1630    case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT;
1631    default: unreachable("Invalid blend factor");
1632    }
1633 }
1634 
1635 static D3D12_BLEND_OP
translate_blend_op(VkBlendOp in)1636 translate_blend_op(VkBlendOp in)
1637 {
1638    switch (in) {
1639    case VK_BLEND_OP_ADD: return D3D12_BLEND_OP_ADD;
1640    case VK_BLEND_OP_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT;
1641    case VK_BLEND_OP_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT;
1642    case VK_BLEND_OP_MIN: return D3D12_BLEND_OP_MIN;
1643    case VK_BLEND_OP_MAX: return D3D12_BLEND_OP_MAX;
1644    default: unreachable("Invalid blend op");
1645    }
1646 }
1647 
1648 static D3D12_LOGIC_OP
translate_logic_op(VkLogicOp in)1649 translate_logic_op(VkLogicOp in)
1650 {
1651    switch (in) {
1652    case VK_LOGIC_OP_CLEAR: return D3D12_LOGIC_OP_CLEAR;
1653    case VK_LOGIC_OP_AND: return D3D12_LOGIC_OP_AND;
1654    case VK_LOGIC_OP_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE;
1655    case VK_LOGIC_OP_COPY: return D3D12_LOGIC_OP_COPY;
1656    case VK_LOGIC_OP_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED;
1657    case VK_LOGIC_OP_NO_OP: return D3D12_LOGIC_OP_NOOP;
1658    case VK_LOGIC_OP_XOR: return D3D12_LOGIC_OP_XOR;
1659    case VK_LOGIC_OP_OR: return D3D12_LOGIC_OP_OR;
1660    case VK_LOGIC_OP_NOR: return D3D12_LOGIC_OP_NOR;
1661    case VK_LOGIC_OP_EQUIVALENT: return D3D12_LOGIC_OP_EQUIV;
1662    case VK_LOGIC_OP_INVERT: return D3D12_LOGIC_OP_INVERT;
1663    case VK_LOGIC_OP_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE;
1664    case VK_LOGIC_OP_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED;
1665    case VK_LOGIC_OP_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED;
1666    case VK_LOGIC_OP_NAND: return D3D12_LOGIC_OP_NAND;
1667    case VK_LOGIC_OP_SET: return D3D12_LOGIC_OP_SET;
1668    default: unreachable("Invalid logic op");
1669    }
1670 }
1671 
1672 static void
dzn_graphics_pipeline_translate_blend(struct dzn_graphics_pipeline * pipeline,D3D12_PIPELINE_STATE_STREAM_DESC * out,const VkGraphicsPipelineCreateInfo * in)1673 dzn_graphics_pipeline_translate_blend(struct dzn_graphics_pipeline *pipeline,
1674                                       D3D12_PIPELINE_STATE_STREAM_DESC *out,
1675                                       const VkGraphicsPipelineCreateInfo *in)
1676 {
1677    const VkPipelineRasterizationStateCreateInfo *in_rast =
1678       in->pRasterizationState;
1679    const VkPipelineColorBlendStateCreateInfo *in_blend =
1680       in_rast->rasterizerDiscardEnable ? NULL : in->pColorBlendState;
1681    const VkPipelineMultisampleStateCreateInfo *in_ms =
1682       in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState;
1683 
1684    if (!in_blend || !in_ms)
1685       return;
1686 
1687    struct dzn_device *device =
1688       container_of(pipeline->base.base.device, struct dzn_device, vk);
1689    struct dzn_physical_device *pdev =
1690       container_of(device->vk.physical, struct dzn_physical_device, vk);
1691    bool support_alpha_blend_factor = pdev->options13.AlphaBlendFactorSupported;
1692 
1693    d3d12_gfx_pipeline_state_stream_new_desc(out, BLEND, D3D12_BLEND_DESC, desc);
1694    D3D12_LOGIC_OP logicop =
1695       in_blend->logicOpEnable ?
1696       translate_logic_op(in_blend->logicOp) : D3D12_LOGIC_OP_NOOP;
1697    desc->AlphaToCoverageEnable = in_ms->alphaToCoverageEnable;
1698    memcpy(pipeline->blend.constants, in_blend->blendConstants,
1699           sizeof(pipeline->blend.constants));
1700 
1701    for (uint32_t i = 0; i < in_blend->attachmentCount; i++) {
1702       if (i > 0 &&
1703           memcmp(&in_blend->pAttachments[i - 1], &in_blend->pAttachments[i],
1704                  sizeof(*in_blend->pAttachments)) != 0)
1705          desc->IndependentBlendEnable = true;
1706 
1707       desc->RenderTarget[i].BlendEnable =
1708          in_blend->pAttachments[i].blendEnable;
1709       desc->RenderTarget[i].RenderTargetWriteMask =
1710          in_blend->pAttachments[i].colorWriteMask;
1711 
1712       if (in_blend->logicOpEnable) {
1713          desc->RenderTarget[i].LogicOpEnable = true;
1714          desc->RenderTarget[i].LogicOp = logicop;
1715       } else {
1716          desc->RenderTarget[i].SrcBlend =
1717             translate_blend_factor(in_blend->pAttachments[i].srcColorBlendFactor, false, support_alpha_blend_factor);
1718          desc->RenderTarget[i].DestBlend =
1719             translate_blend_factor(in_blend->pAttachments[i].dstColorBlendFactor, false, support_alpha_blend_factor);
1720          desc->RenderTarget[i].BlendOp =
1721             translate_blend_op(in_blend->pAttachments[i].colorBlendOp);
1722          desc->RenderTarget[i].SrcBlendAlpha =
1723             translate_blend_factor(in_blend->pAttachments[i].srcAlphaBlendFactor, true, support_alpha_blend_factor);
1724          desc->RenderTarget[i].DestBlendAlpha =
1725             translate_blend_factor(in_blend->pAttachments[i].dstAlphaBlendFactor, true, support_alpha_blend_factor);
1726          desc->RenderTarget[i].BlendOpAlpha =
1727             translate_blend_op(in_blend->pAttachments[i].alphaBlendOp);
1728       }
1729    }
1730 }
1731 
1732 
1733 static void
dzn_pipeline_init(struct dzn_pipeline * pipeline,struct dzn_device * device,VkPipelineBindPoint type,struct dzn_pipeline_layout * layout,D3D12_PIPELINE_STATE_STREAM_DESC * stream_desc)1734 dzn_pipeline_init(struct dzn_pipeline *pipeline,
1735                   struct dzn_device *device,
1736                   VkPipelineBindPoint type,
1737                   struct dzn_pipeline_layout *layout,
1738                   D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc)
1739 {
1740    pipeline->type = type;
1741    pipeline->root.sets_param_count = layout->root.sets_param_count;
1742    pipeline->root.sysval_cbv_param_idx = layout->root.sysval_cbv_param_idx;
1743    pipeline->root.push_constant_cbv_param_idx = layout->root.push_constant_cbv_param_idx;
1744    pipeline->root.dynamic_buffer_bindless_param_idx = layout->root.dynamic_buffer_bindless_param_idx;
1745    STATIC_ASSERT(sizeof(pipeline->root.type) == sizeof(layout->root.type));
1746    memcpy(pipeline->root.type, layout->root.type, sizeof(pipeline->root.type));
1747    pipeline->root.sig = layout->root.sig;
1748    ID3D12RootSignature_AddRef(pipeline->root.sig);
1749 
1750    STATIC_ASSERT(sizeof(layout->desc_count) == sizeof(pipeline->desc_count));
1751    memcpy(pipeline->desc_count, layout->desc_count, sizeof(pipeline->desc_count));
1752 
1753    STATIC_ASSERT(sizeof(layout->sets) == sizeof(pipeline->sets));
1754    memcpy(pipeline->sets, layout->sets, sizeof(pipeline->sets));
1755    pipeline->set_count = layout->set_count;
1756    pipeline->dynamic_buffer_count = layout->dynamic_buffer_count;
1757    vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
1758 
1759    ASSERTED uint32_t max_streamsz =
1760       type == VK_PIPELINE_BIND_POINT_GRAPHICS ?
1761       MAX_GFX_PIPELINE_STATE_STREAM_SIZE :
1762       MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE;
1763 
1764    d3d12_pipeline_state_stream_new_desc_abbrev(stream_desc, max_streamsz, ROOT_SIGNATURE,
1765                                                ID3D12RootSignature *, root_sig);
1766    *root_sig = pipeline->root.sig;
1767 }
1768 
1769 static void
dzn_pipeline_finish(struct dzn_pipeline * pipeline)1770 dzn_pipeline_finish(struct dzn_pipeline *pipeline)
1771 {
1772    if (pipeline->state)
1773       ID3D12PipelineState_Release(pipeline->state);
1774    if (pipeline->root.sig)
1775       ID3D12RootSignature_Release(pipeline->root.sig);
1776 
1777    vk_object_base_finish(&pipeline->base);
1778 }
1779 
dzn_graphics_pipeline_delete_variant(struct hash_entry * he)1780 static void dzn_graphics_pipeline_delete_variant(struct hash_entry *he)
1781 {
1782    struct dzn_graphics_pipeline_variant *variant = he->data;
1783 
1784    if (variant->state)
1785       ID3D12PipelineState_Release(variant->state);
1786 }
1787 
1788 static void
dzn_graphics_pipeline_cleanup_nir_shaders(struct dzn_graphics_pipeline * pipeline)1789 dzn_graphics_pipeline_cleanup_nir_shaders(struct dzn_graphics_pipeline *pipeline)
1790 {
1791    for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->templates.shaders); i++) {
1792       ralloc_free(pipeline->templates.shaders[i].nir);
1793       pipeline->templates.shaders[i].nir = NULL;
1794    }
1795 }
1796 
1797 static void
dzn_graphics_pipeline_cleanup_dxil_shaders(struct dzn_graphics_pipeline * pipeline)1798 dzn_graphics_pipeline_cleanup_dxil_shaders(struct dzn_graphics_pipeline *pipeline)
1799 {
1800    for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->templates.shaders); i++) {
1801       if (pipeline->templates.shaders[i].bc) {
1802          free((void *)pipeline->templates.shaders[i].bc->pShaderBytecode);
1803          pipeline->templates.shaders[i].bc = NULL;
1804       }
1805    }
1806 }
1807 
1808 static void
dzn_graphics_pipeline_destroy(struct dzn_graphics_pipeline * pipeline,const VkAllocationCallbacks * alloc)1809 dzn_graphics_pipeline_destroy(struct dzn_graphics_pipeline *pipeline,
1810                               const VkAllocationCallbacks *alloc)
1811 {
1812    if (!pipeline)
1813       return;
1814 
1815    _mesa_hash_table_destroy(pipeline->variants,
1816                             dzn_graphics_pipeline_delete_variant);
1817 
1818    dzn_graphics_pipeline_cleanup_nir_shaders(pipeline);
1819    dzn_graphics_pipeline_cleanup_dxil_shaders(pipeline);
1820 
1821    for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->indirect_cmd_sigs); i++) {
1822       if (pipeline->indirect_cmd_sigs[i])
1823          ID3D12CommandSignature_Release(pipeline->indirect_cmd_sigs[i]);
1824    }
1825 
1826    dzn_pipeline_finish(&pipeline->base);
1827    vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline);
1828 }
1829 
1830 static VkResult
dzn_graphics_pipeline_create(struct dzn_device * device,VkPipelineCache cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * out)1831 dzn_graphics_pipeline_create(struct dzn_device *device,
1832                              VkPipelineCache cache,
1833                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
1834                              const VkAllocationCallbacks *pAllocator,
1835                              VkPipeline *out)
1836 {
1837    struct dzn_physical_device *pdev =
1838       container_of(device->vk.physical, struct dzn_physical_device, vk);
1839    const VkPipelineRenderingCreateInfo *ri = (const VkPipelineRenderingCreateInfo *)
1840       vk_find_struct_const(pCreateInfo, PIPELINE_RENDERING_CREATE_INFO);
1841    VK_FROM_HANDLE(vk_pipeline_cache, pcache, cache);
1842    VK_FROM_HANDLE(vk_render_pass, pass, pCreateInfo->renderPass);
1843    VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout);
1844    uint32_t color_count = 0;
1845    VkFormat color_fmts[MAX_RTS] = { 0 };
1846    VkFormat zs_fmt = VK_FORMAT_UNDEFINED;
1847    VkResult ret;
1848    HRESULT hres = 0;
1849    D3D12_VIEW_INSTANCE_LOCATION vi_locs[D3D12_MAX_VIEW_INSTANCE_COUNT];
1850 
1851    struct dzn_graphics_pipeline *pipeline =
1852       vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
1853                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1854    if (!pipeline)
1855       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1856 
1857    D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc = &pipeline->templates.stream_desc;
1858    stream_desc->pPipelineStateSubobjectStream = pipeline->templates.stream_buf;
1859 
1860    dzn_pipeline_init(&pipeline->base, device,
1861                      VK_PIPELINE_BIND_POINT_GRAPHICS,
1862                      layout, stream_desc);
1863    D3D12_INPUT_ELEMENT_DESC attribs[MAX_VERTEX_GENERIC_ATTRIBS] = { 0 };
1864    enum pipe_format vi_conversions[MAX_VERTEX_GENERIC_ATTRIBS] = { 0 };
1865 
1866    ret = dzn_graphics_pipeline_translate_vi(pipeline, pCreateInfo,
1867                                             attribs, vi_conversions);
1868    if (ret != VK_SUCCESS)
1869       goto out;
1870 
1871    d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, FLAGS, D3D12_PIPELINE_STATE_FLAGS, flags);
1872    *flags = D3D12_PIPELINE_STATE_FLAG_NONE;
1873 
1874    if (pCreateInfo->pDynamicState) {
1875       for (uint32_t i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; i++) {
1876          switch (pCreateInfo->pDynamicState->pDynamicStates[i]) {
1877          case VK_DYNAMIC_STATE_VIEWPORT:
1878             pipeline->vp.dynamic = true;
1879             break;
1880          case VK_DYNAMIC_STATE_SCISSOR:
1881             pipeline->scissor.dynamic = true;
1882             break;
1883          case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
1884             pipeline->zsa.stencil_test.dynamic_ref = true;
1885             break;
1886          case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
1887             pipeline->zsa.stencil_test.dynamic_compare_mask = true;
1888             ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline);
1889             if (ret)
1890                goto out;
1891             break;
1892          case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
1893             pipeline->zsa.stencil_test.dynamic_write_mask = true;
1894             ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline);
1895             if (ret)
1896                goto out;
1897             break;
1898          case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
1899             pipeline->blend.dynamic_constants = true;
1900             break;
1901          case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
1902             pipeline->zsa.depth_bounds.dynamic = true;
1903             break;
1904          case VK_DYNAMIC_STATE_DEPTH_BIAS:
1905             pipeline->zsa.dynamic_depth_bias = true;
1906             if (pdev->options16.DynamicDepthBiasSupported) {
1907                *flags |= D3D12_PIPELINE_STATE_FLAG_DYNAMIC_DEPTH_BIAS;
1908             } else {
1909                ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline);
1910                if (ret)
1911                   goto out;
1912             }
1913             break;
1914          case VK_DYNAMIC_STATE_LINE_WIDTH:
1915             /* Nothing to do since we just support lineWidth = 1. */
1916             break;
1917          default: unreachable("Unsupported dynamic state");
1918          }
1919       }
1920    }
1921 
1922    ret = dzn_graphics_pipeline_translate_ia(device, pipeline, stream_desc, pCreateInfo);
1923    if (ret)
1924       goto out;
1925 
1926    dzn_graphics_pipeline_translate_rast(device, pipeline, stream_desc, pCreateInfo);
1927    dzn_graphics_pipeline_translate_ms(pipeline, stream_desc, pCreateInfo);
1928    dzn_graphics_pipeline_translate_zsa(device, pipeline, stream_desc, pCreateInfo);
1929    dzn_graphics_pipeline_translate_blend(pipeline, stream_desc, pCreateInfo);
1930 
1931    unsigned view_mask = 0;
1932    if (pass) {
1933       const struct vk_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
1934       color_count = subpass->color_count;
1935       for (uint32_t i = 0; i < subpass->color_count; i++) {
1936          uint32_t idx = subpass->color_attachments[i].attachment;
1937 
1938          if (idx == VK_ATTACHMENT_UNUSED) continue;
1939 
1940          const struct vk_render_pass_attachment *attachment =
1941             &pass->attachments[idx];
1942 
1943          color_fmts[i] = attachment->format;
1944       }
1945 
1946       if (subpass->depth_stencil_attachment &&
1947           subpass->depth_stencil_attachment->attachment != VK_ATTACHMENT_UNUSED) {
1948          const struct vk_render_pass_attachment *attachment =
1949             &pass->attachments[subpass->depth_stencil_attachment->attachment];
1950 
1951          zs_fmt = attachment->format;
1952       }
1953 
1954       view_mask = subpass->view_mask;
1955    } else if (ri) {
1956       color_count = ri->colorAttachmentCount;
1957       memcpy(color_fmts, ri->pColorAttachmentFormats,
1958              sizeof(color_fmts[0]) * color_count);
1959       if (ri->depthAttachmentFormat != VK_FORMAT_UNDEFINED)
1960          zs_fmt = ri->depthAttachmentFormat;
1961       else if (ri->stencilAttachmentFormat != VK_FORMAT_UNDEFINED)
1962          zs_fmt = ri->stencilAttachmentFormat;
1963 
1964       view_mask = ri->viewMask;
1965    }
1966 
1967    if (color_count > 0) {
1968       d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, RENDER_TARGET_FORMATS, struct D3D12_RT_FORMAT_ARRAY, rts);
1969       rts->NumRenderTargets = color_count;
1970       for (uint32_t i = 0; i < color_count; i++) {
1971          rts->RTFormats[i] =
1972             dzn_image_get_dxgi_format(pdev, color_fmts[i],
1973                                       VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
1974                                       VK_IMAGE_ASPECT_COLOR_BIT);
1975       }
1976    }
1977 
1978    if (zs_fmt != VK_FORMAT_UNDEFINED) {
1979       d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, DEPTH_STENCIL_FORMAT, DXGI_FORMAT, ds_fmt);
1980       *ds_fmt =
1981          dzn_image_get_dxgi_format(pdev, zs_fmt,
1982                                    VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
1983                                    VK_IMAGE_ASPECT_DEPTH_BIT |
1984                                    VK_IMAGE_ASPECT_STENCIL_BIT);
1985       pipeline->zsa.ds_fmt = *ds_fmt;
1986    }
1987 
1988    pipeline->multiview.view_mask = MAX2(view_mask, 1);
1989    if (view_mask != 0 && /* Is multiview */
1990        view_mask != 1 && /* Is non-trivially multiview */
1991        (view_mask & ~((1 << D3D12_MAX_VIEW_INSTANCE_COUNT) - 1)) == 0 && /* Uses only views 0 thru 3 */
1992        pdev->options3.ViewInstancingTier > D3D12_VIEW_INSTANCING_TIER_NOT_SUPPORTED /* Actually supported */) {
1993       d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, VIEW_INSTANCING, D3D12_VIEW_INSTANCING_DESC, vi);
1994       vi->pViewInstanceLocations = vi_locs;
1995       for (uint32_t i = 0; i < D3D12_MAX_VIEW_INSTANCE_COUNT; ++i) {
1996          vi_locs[i].RenderTargetArrayIndex = i;
1997          vi_locs[i].ViewportArrayIndex = 0;
1998          if (view_mask & (1 << i))
1999             vi->ViewInstanceCount = i + 1;
2000       }
2001       vi->Flags = D3D12_VIEW_INSTANCING_FLAG_ENABLE_VIEW_INSTANCE_MASKING;
2002       pipeline->multiview.native_view_instancing = true;
2003    }
2004 
2005    ret = dzn_graphics_pipeline_compile_shaders(device, pipeline, pcache,
2006                                                layout, stream_desc,
2007                                                attribs, vi_conversions,
2008                                                pCreateInfo);
2009    if (ret != VK_SUCCESS)
2010       goto out;
2011 
2012    /* If we have no position output from a pre-rasterizer stage, we need to make sure that
2013     * depth is disabled, to fully disable the rasterizer. We can only know this after compiling
2014     * or loading the shaders.
2015     */
2016    if (pipeline->rast_disabled_from_missing_position) {
2017       if (pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
2018          D3D12_DEPTH_STENCIL_DESC2 *ds = dzn_graphics_pipeline_get_desc(pipeline, pipeline->templates.stream_buf, ds);
2019          if (ds)
2020             ds->DepthEnable = ds->StencilEnable = false;
2021       } else {
2022          D3D12_DEPTH_STENCIL_DESC1 *ds = dzn_graphics_pipeline_get_desc(pipeline, pipeline->templates.stream_buf, ds);
2023          if (ds)
2024             ds->DepthEnable = ds->StencilEnable = false;
2025       }
2026    }
2027 
2028    if (!pipeline->variants) {
2029       hres = ID3D12Device4_CreatePipelineState(device->dev, stream_desc,
2030                                                &IID_ID3D12PipelineState,
2031                                                (void **)&pipeline->base.state);
2032       if (FAILED(hres)) {
2033          ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2034          goto out;
2035       }
2036 
2037       dzn_graphics_pipeline_cleanup_dxil_shaders(pipeline);
2038    }
2039 
2040    dzn_graphics_pipeline_cleanup_nir_shaders(pipeline);
2041    ret = VK_SUCCESS;
2042 
2043 out:
2044    if (ret != VK_SUCCESS)
2045       dzn_graphics_pipeline_destroy(pipeline, pAllocator);
2046    else
2047       *out = dzn_graphics_pipeline_to_handle(pipeline);
2048 
2049    return ret;
2050 }
2051 
2052 static void
mask_key_for_stencil_state(struct dzn_physical_device * pdev,struct dzn_graphics_pipeline * pipeline,const struct dzn_graphics_pipeline_variant_key * key,struct dzn_graphics_pipeline_variant_key * masked_key)2053 mask_key_for_stencil_state(struct dzn_physical_device *pdev,
2054                            struct dzn_graphics_pipeline *pipeline,
2055                            const struct dzn_graphics_pipeline_variant_key *key,
2056                            struct dzn_graphics_pipeline_variant_key *masked_key)
2057 {
2058    if (pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
2059       const D3D12_DEPTH_STENCIL_DESC2 *ds_templ =
2060          dzn_graphics_pipeline_get_desc_template(pipeline, ds);
2061       if (ds_templ && ds_templ->StencilEnable) {
2062          if (ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2063              ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
2064             masked_key->stencil_test.front.compare_mask = key->stencil_test.front.compare_mask;
2065          if (ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2066              ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
2067             masked_key->stencil_test.back.compare_mask = key->stencil_test.back.compare_mask;
2068          if (pipeline->zsa.stencil_test.dynamic_write_mask) {
2069             masked_key->stencil_test.front.write_mask = key->stencil_test.front.write_mask;
2070             masked_key->stencil_test.back.write_mask = key->stencil_test.back.write_mask;
2071          }
2072       }
2073    } else {
2074       const D3D12_DEPTH_STENCIL_DESC1 *ds_templ =
2075          dzn_graphics_pipeline_get_desc_template(pipeline, ds);
2076       if (ds_templ && ds_templ->StencilEnable) {
2077          if (ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2078              ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
2079             masked_key->stencil_test.front.compare_mask = key->stencil_test.front.compare_mask;
2080          if (ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2081              ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
2082             masked_key->stencil_test.back.compare_mask = key->stencil_test.back.compare_mask;
2083          if (pipeline->zsa.stencil_test.dynamic_write_mask) {
2084             masked_key->stencil_test.front.write_mask = key->stencil_test.front.write_mask;
2085             masked_key->stencil_test.back.write_mask = key->stencil_test.back.write_mask;
2086          }
2087       }
2088    }
2089 }
2090 
2091 static void
update_stencil_state(struct dzn_physical_device * pdev,struct dzn_graphics_pipeline * pipeline,uintptr_t * stream_buf,const struct dzn_graphics_pipeline_variant_key * masked_key)2092 update_stencil_state(struct dzn_physical_device *pdev,
2093                      struct dzn_graphics_pipeline *pipeline,
2094                      uintptr_t *stream_buf,
2095                      const struct dzn_graphics_pipeline_variant_key *masked_key)
2096 {
2097    if (pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
2098       D3D12_DEPTH_STENCIL_DESC2 *ds =
2099          dzn_graphics_pipeline_get_desc(pipeline, stream_buf, ds);
2100       if (ds && ds->StencilEnable) {
2101          if (pipeline->zsa.stencil_test.dynamic_compare_mask) {
2102             if (ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2103                   ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) {
2104                ds->FrontFace.StencilReadMask = masked_key->stencil_test.front.compare_mask;
2105             }
2106 
2107             if (ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2108                   ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) {
2109                ds->BackFace.StencilReadMask = masked_key->stencil_test.back.compare_mask;
2110             }
2111          }
2112 
2113          if (pipeline->zsa.stencil_test.dynamic_write_mask) {
2114             ds->FrontFace.StencilWriteMask = masked_key->stencil_test.front.write_mask;
2115             ds->BackFace.StencilWriteMask = masked_key->stencil_test.back.write_mask;
2116          }
2117       }
2118    } else {
2119       D3D12_DEPTH_STENCIL_DESC1 *ds =
2120          dzn_graphics_pipeline_get_desc(pipeline, stream_buf, ds);
2121       if (ds && ds->StencilEnable) {
2122          if (pipeline->zsa.stencil_test.dynamic_compare_mask) {
2123             if (ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2124                   ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) {
2125                ds->StencilReadMask = masked_key->stencil_test.front.compare_mask;
2126             }
2127 
2128             if (ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2129                   ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) {
2130                ds->StencilReadMask = masked_key->stencil_test.back.compare_mask;
2131             }
2132 
2133             if (ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2134                   ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS &&
2135                   ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
2136                   ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
2137                assert(masked_key->stencil_test.front.compare_mask == masked_key->stencil_test.back.compare_mask);
2138          }
2139 
2140          if (pipeline->zsa.stencil_test.dynamic_write_mask) {
2141             assert(!masked_key->stencil_test.front.write_mask ||
2142                      !masked_key->stencil_test.back.write_mask ||
2143                      masked_key->stencil_test.front.write_mask == masked_key->stencil_test.back.write_mask);
2144             ds->StencilWriteMask =
2145                masked_key->stencil_test.front.write_mask |
2146                masked_key->stencil_test.back.write_mask;
2147          }
2148       }
2149    }
2150 }
2151 
2152 ID3D12PipelineState *
dzn_graphics_pipeline_get_state(struct dzn_graphics_pipeline * pipeline,const struct dzn_graphics_pipeline_variant_key * key)2153 dzn_graphics_pipeline_get_state(struct dzn_graphics_pipeline *pipeline,
2154                                 const struct dzn_graphics_pipeline_variant_key *key)
2155 {
2156    if (!pipeline->variants)
2157       return pipeline->base.state;
2158 
2159    struct dzn_device *device =
2160       container_of(pipeline->base.base.device, struct dzn_device, vk);
2161    struct dzn_physical_device *pdev =
2162       container_of(device->vk.physical, struct dzn_physical_device, vk);
2163 
2164    struct dzn_graphics_pipeline_variant_key masked_key = { 0 };
2165 
2166    if (dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut))
2167       masked_key.ib_strip_cut = key->ib_strip_cut;
2168 
2169    if (!pdev->options16.DynamicDepthBiasSupported &&
2170        dzn_graphics_pipeline_get_desc_template(pipeline, rast) &&
2171        pipeline->zsa.dynamic_depth_bias)
2172       masked_key.depth_bias = key->depth_bias;
2173 
2174    mask_key_for_stencil_state(pdev, pipeline, key, &masked_key);
2175 
2176    struct hash_entry *he =
2177       _mesa_hash_table_search(pipeline->variants, &masked_key);
2178 
2179    struct dzn_graphics_pipeline_variant *variant;
2180 
2181    if (!he) {
2182       variant = rzalloc(pipeline->variants, struct dzn_graphics_pipeline_variant);
2183       variant->key = masked_key;
2184 
2185       uintptr_t stream_buf[MAX_GFX_PIPELINE_STATE_STREAM_SIZE / sizeof(uintptr_t)];
2186       D3D12_PIPELINE_STATE_STREAM_DESC stream_desc = {
2187          .SizeInBytes = pipeline->templates.stream_desc.SizeInBytes,
2188          .pPipelineStateSubobjectStream = stream_buf,
2189       };
2190 
2191       memcpy(stream_buf, pipeline->templates.stream_buf, stream_desc.SizeInBytes);
2192 
2193       D3D12_INDEX_BUFFER_STRIP_CUT_VALUE *ib_strip_cut =
2194          dzn_graphics_pipeline_get_desc(pipeline, stream_buf, ib_strip_cut);
2195       if (ib_strip_cut)
2196          *ib_strip_cut = masked_key.ib_strip_cut;
2197 
2198       D3D12_RASTERIZER_DESC *rast =
2199          dzn_graphics_pipeline_get_desc(pipeline, stream_buf, rast);
2200       if (!pdev->options16.DynamicDepthBiasSupported && rast && pipeline->zsa.dynamic_depth_bias) {
2201          rast->DepthBias = translate_depth_bias(masked_key.depth_bias.constant_factor);
2202          rast->DepthBiasClamp = masked_key.depth_bias.clamp;
2203          rast->SlopeScaledDepthBias = masked_key.depth_bias.slope_factor;
2204       }
2205 
2206       update_stencil_state(pdev, pipeline, stream_buf, &masked_key);
2207 
2208       ASSERTED HRESULT hres = ID3D12Device4_CreatePipelineState(device->dev, &stream_desc,
2209                                                                 &IID_ID3D12PipelineState,
2210                                                                 (void**)(&variant->state));
2211       assert(!FAILED(hres));
2212       he = _mesa_hash_table_insert(pipeline->variants, &variant->key, variant);
2213       assert(he);
2214    } else {
2215       variant = he->data;
2216    }
2217 
2218    if (variant->state)
2219       ID3D12PipelineState_AddRef(variant->state);
2220 
2221    if (pipeline->base.state)
2222       ID3D12PipelineState_Release(pipeline->base.state);
2223 
2224    pipeline->base.state = variant->state;
2225    return variant->state;
2226 }
2227 
2228 #define DZN_INDIRECT_CMD_SIG_MAX_ARGS 4
2229 
2230 ID3D12CommandSignature *
dzn_graphics_pipeline_get_indirect_cmd_sig(struct dzn_graphics_pipeline * pipeline,enum dzn_indirect_draw_cmd_sig_type type)2231 dzn_graphics_pipeline_get_indirect_cmd_sig(struct dzn_graphics_pipeline *pipeline,
2232                                            enum dzn_indirect_draw_cmd_sig_type type)
2233 {
2234    assert(type < DZN_NUM_INDIRECT_DRAW_CMD_SIGS);
2235 
2236    struct dzn_device *device =
2237       container_of(pipeline->base.base.device, struct dzn_device, vk);
2238    ID3D12CommandSignature *cmdsig = pipeline->indirect_cmd_sigs[type];
2239 
2240    if (cmdsig)
2241       return cmdsig;
2242 
2243    bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG;
2244    bool indexed = type == DZN_INDIRECT_INDEXED_DRAW_CMD_SIG || triangle_fan;
2245 
2246    uint32_t cmd_arg_count = 0;
2247    D3D12_INDIRECT_ARGUMENT_DESC cmd_args[DZN_INDIRECT_CMD_SIG_MAX_ARGS];
2248 
2249    if (triangle_fan) {
2250       cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
2251          .Type = D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW,
2252       };
2253    }
2254 
2255    cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
2256       .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
2257       .Constant = {
2258          .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
2259          .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) / 4,
2260          .Num32BitValuesToSet = 2,
2261       },
2262    };
2263 
2264    cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
2265       .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
2266       .Constant = {
2267          .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
2268          .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, draw_id) / 4,
2269          .Num32BitValuesToSet = 1,
2270       },
2271    };
2272 
2273    cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
2274       .Type = indexed ?
2275               D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED :
2276               D3D12_INDIRECT_ARGUMENT_TYPE_DRAW,
2277    };
2278 
2279    assert(cmd_arg_count <= ARRAY_SIZE(cmd_args));
2280    assert(offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) == 0);
2281 
2282    D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = {
2283       .ByteStride =
2284          triangle_fan ?
2285          sizeof(struct dzn_indirect_triangle_fan_draw_exec_params) :
2286          sizeof(struct dzn_indirect_draw_exec_params),
2287       .NumArgumentDescs = cmd_arg_count,
2288       .pArgumentDescs = cmd_args,
2289    };
2290    HRESULT hres =
2291       ID3D12Device1_CreateCommandSignature(device->dev, &cmd_sig_desc,
2292                                            pipeline->base.root.sig,
2293                                            &IID_ID3D12CommandSignature,
2294                                            (void **)&cmdsig);
2295    if (FAILED(hres))
2296       return NULL;
2297 
2298    pipeline->indirect_cmd_sigs[type] = cmdsig;
2299    return cmdsig;
2300 }
2301 
2302 VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateGraphicsPipelines(VkDevice dev,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)2303 dzn_CreateGraphicsPipelines(VkDevice dev,
2304                             VkPipelineCache pipelineCache,
2305                             uint32_t count,
2306                             const VkGraphicsPipelineCreateInfo *pCreateInfos,
2307                             const VkAllocationCallbacks *pAllocator,
2308                             VkPipeline *pPipelines)
2309 {
2310    VK_FROM_HANDLE(dzn_device, device, dev);
2311    VkResult result = VK_SUCCESS;
2312 
2313    unsigned i;
2314    for (i = 0; i < count; i++) {
2315       result = dzn_graphics_pipeline_create(device,
2316                                             pipelineCache,
2317                                             &pCreateInfos[i],
2318                                             pAllocator,
2319                                             &pPipelines[i]);
2320       if (result != VK_SUCCESS) {
2321          pPipelines[i] = VK_NULL_HANDLE;
2322 
2323          /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it
2324           * is not obvious what error should be report upon 2 different failures.
2325           */
2326          if (result != VK_PIPELINE_COMPILE_REQUIRED)
2327             break;
2328 
2329          if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
2330             break;
2331       }
2332    }
2333 
2334    for (; i < count; i++)
2335       pPipelines[i] = VK_NULL_HANDLE;
2336 
2337    return result;
2338 }
2339 
2340 static void
dzn_compute_pipeline_destroy(struct dzn_compute_pipeline * pipeline,const VkAllocationCallbacks * alloc)2341 dzn_compute_pipeline_destroy(struct dzn_compute_pipeline *pipeline,
2342                              const VkAllocationCallbacks *alloc)
2343 {
2344    if (!pipeline)
2345       return;
2346 
2347    if (pipeline->indirect_cmd_sig)
2348       ID3D12CommandSignature_Release(pipeline->indirect_cmd_sig);
2349 
2350    dzn_pipeline_finish(&pipeline->base);
2351    vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline);
2352 }
2353 
2354 static VkResult
dzn_pipeline_cache_lookup_compute_pipeline(struct vk_pipeline_cache * cache,uint8_t * pipeline_hash,D3D12_PIPELINE_STATE_STREAM_DESC * stream_desc,D3D12_SHADER_BYTECODE * dxil,bool * cache_hit)2355 dzn_pipeline_cache_lookup_compute_pipeline(struct vk_pipeline_cache *cache,
2356                                            uint8_t *pipeline_hash,
2357                                            D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc,
2358                                            D3D12_SHADER_BYTECODE *dxil,
2359                                            bool *cache_hit)
2360 {
2361    *cache_hit = false;
2362 
2363    if (!cache)
2364       return VK_SUCCESS;
2365 
2366    struct vk_pipeline_cache_object *cache_obj = NULL;
2367 
2368    cache_obj =
2369       vk_pipeline_cache_lookup_object(cache, pipeline_hash, SHA1_DIGEST_LENGTH,
2370                                       &dzn_cached_blob_ops,
2371                                       NULL);
2372    if (!cache_obj)
2373       return VK_SUCCESS;
2374 
2375    struct dzn_cached_blob *cached_blob =
2376       container_of(cache_obj, struct dzn_cached_blob, base);
2377 
2378    assert(cached_blob->size == SHA1_DIGEST_LENGTH);
2379 
2380    const uint8_t *dxil_hash = cached_blob->data;
2381    gl_shader_stage stage;
2382 
2383    VkResult ret =
2384       dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, dxil);
2385 
2386    if (ret != VK_SUCCESS || stage == MESA_SHADER_NONE)
2387       goto out;
2388 
2389    assert(stage == MESA_SHADER_COMPUTE);
2390 
2391    d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, slot);
2392    *slot = *dxil;
2393    *cache_hit = true;
2394 
2395 out:
2396    vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
2397    return ret;
2398 }
2399 
2400 static void
dzn_pipeline_cache_add_compute_pipeline(struct vk_pipeline_cache * cache,uint8_t * pipeline_hash,uint8_t * dxil_hash)2401 dzn_pipeline_cache_add_compute_pipeline(struct vk_pipeline_cache *cache,
2402                                         uint8_t *pipeline_hash,
2403                                         uint8_t *dxil_hash)
2404 {
2405    struct vk_pipeline_cache_object *cache_obj =
2406       dzn_cached_blob_create(cache->base.device, pipeline_hash, NULL, SHA1_DIGEST_LENGTH);
2407    if (!cache_obj)
2408       return;
2409 
2410    struct dzn_cached_blob *cached_blob =
2411       container_of(cache_obj, struct dzn_cached_blob, base);
2412 
2413    memcpy((void *)cached_blob->data, dxil_hash, SHA1_DIGEST_LENGTH);
2414 
2415    cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
2416    vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
2417 }
2418 
2419 static VkResult
dzn_compute_pipeline_compile_shader(struct dzn_device * device,struct dzn_compute_pipeline * pipeline,struct vk_pipeline_cache * cache,const struct dzn_pipeline_layout * layout,D3D12_PIPELINE_STATE_STREAM_DESC * stream_desc,D3D12_SHADER_BYTECODE * shader,const VkComputePipelineCreateInfo * info)2420 dzn_compute_pipeline_compile_shader(struct dzn_device *device,
2421                                     struct dzn_compute_pipeline *pipeline,
2422                                     struct vk_pipeline_cache *cache,
2423                                     const struct dzn_pipeline_layout *layout,
2424                                     D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc,
2425                                     D3D12_SHADER_BYTECODE *shader,
2426                                     const VkComputePipelineCreateInfo *info)
2427 {
2428    struct dzn_physical_device *pdev =
2429       container_of(device->vk.physical, struct dzn_physical_device, vk);
2430    uint8_t spirv_hash[SHA1_DIGEST_LENGTH], pipeline_hash[SHA1_DIGEST_LENGTH], nir_hash[SHA1_DIGEST_LENGTH];
2431    VkResult ret = VK_SUCCESS;
2432    nir_shader *nir = NULL;
2433 
2434    const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *subgroup_size =
2435       (const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *)
2436       vk_find_struct_const(info->stage.pNext, PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO);
2437    enum gl_subgroup_size subgroup_enum = subgroup_size && subgroup_size->requiredSubgroupSize >= 8 ?
2438       subgroup_size->requiredSubgroupSize : SUBGROUP_SIZE_FULL_SUBGROUPS;
2439 
2440    if (cache) {
2441       struct mesa_sha1 pipeline_hash_ctx;
2442 
2443       _mesa_sha1_init(&pipeline_hash_ctx);
2444       vk_pipeline_hash_shader_stage(&info->stage, NULL, spirv_hash);
2445       _mesa_sha1_update(&pipeline_hash_ctx, &device->bindless, sizeof(device->bindless));
2446       _mesa_sha1_update(&pipeline_hash_ctx, &subgroup_enum, sizeof(subgroup_enum));
2447       _mesa_sha1_update(&pipeline_hash_ctx, spirv_hash, sizeof(spirv_hash));
2448       _mesa_sha1_update(&pipeline_hash_ctx, layout->stages[MESA_SHADER_COMPUTE].hash,
2449                         sizeof(layout->stages[MESA_SHADER_COMPUTE].hash));
2450       _mesa_sha1_final(&pipeline_hash_ctx, pipeline_hash);
2451 
2452       bool cache_hit = false;
2453       ret = dzn_pipeline_cache_lookup_compute_pipeline(cache, pipeline_hash,
2454                                                        stream_desc, shader,
2455                                                        &cache_hit);
2456       if (ret != VK_SUCCESS || cache_hit)
2457          goto out;
2458    }
2459 
2460    if (cache) {
2461       struct mesa_sha1 nir_hash_ctx;
2462       _mesa_sha1_init(&nir_hash_ctx);
2463       _mesa_sha1_update(&nir_hash_ctx, &device->bindless, sizeof(device->bindless));
2464       _mesa_sha1_update(&nir_hash_ctx, &subgroup_enum, sizeof(subgroup_enum));
2465       _mesa_sha1_update(&nir_hash_ctx, spirv_hash, sizeof(spirv_hash));
2466       _mesa_sha1_final(&nir_hash_ctx, nir_hash);
2467    }
2468    nir_shader_compiler_options nir_opts;
2469    const unsigned supported_bit_sizes = 16 | 32 | 64;
2470    dxil_get_nir_compiler_options(&nir_opts, dzn_get_shader_model(pdev), supported_bit_sizes, supported_bit_sizes);
2471    struct dzn_nir_options options = {
2472       .nir_opts = &nir_opts,
2473       .subgroup_size = subgroup_enum,
2474    };
2475    ret = dzn_pipeline_get_nir_shader(device, layout, cache, nir_hash,
2476                                      &info->stage, MESA_SHADER_COMPUTE,
2477                                      &options, &nir);
2478    if (ret != VK_SUCCESS)
2479       return ret;
2480 
2481    uint8_t bindings_hash[SHA1_DIGEST_LENGTH], dxil_hash[SHA1_DIGEST_LENGTH];
2482 
2483    NIR_PASS_V(nir, adjust_var_bindings, device, layout, cache ? bindings_hash : NULL);
2484 
2485    if (cache) {
2486       struct mesa_sha1 dxil_hash_ctx;
2487 
2488       _mesa_sha1_init(&dxil_hash_ctx);
2489       _mesa_sha1_update(&dxil_hash_ctx, nir_hash, sizeof(nir_hash));
2490       _mesa_sha1_update(&dxil_hash_ctx, spirv_hash, sizeof(spirv_hash));
2491       _mesa_sha1_update(&dxil_hash_ctx, bindings_hash, sizeof(bindings_hash));
2492       _mesa_sha1_final(&dxil_hash_ctx, dxil_hash);
2493 
2494       gl_shader_stage stage;
2495 
2496       ret = dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, shader);
2497       if (ret != VK_SUCCESS)
2498          goto out;
2499 
2500       if (stage != MESA_SHADER_NONE) {
2501          assert(stage == MESA_SHADER_COMPUTE);
2502          d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, cs);
2503          *cs = *shader;
2504          dzn_pipeline_cache_add_compute_pipeline(cache, pipeline_hash, dxil_hash);
2505          goto out;
2506       }
2507    }
2508 
2509    ret = dzn_pipeline_compile_shader(device, nir, 0, shader);
2510    if (ret != VK_SUCCESS)
2511       goto out;
2512 
2513    d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, cs);
2514    *cs = *shader;
2515 
2516    if (cache) {
2517       dzn_pipeline_cache_add_dxil_shader(cache, dxil_hash, MESA_SHADER_COMPUTE, shader);
2518       dzn_pipeline_cache_add_compute_pipeline(cache, pipeline_hash, dxil_hash);
2519    }
2520 
2521 out:
2522    ralloc_free(nir);
2523    return ret;
2524 }
2525 
2526 static VkResult
dzn_compute_pipeline_create(struct dzn_device * device,VkPipelineCache cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * out)2527 dzn_compute_pipeline_create(struct dzn_device *device,
2528                             VkPipelineCache cache,
2529                             const VkComputePipelineCreateInfo *pCreateInfo,
2530                             const VkAllocationCallbacks *pAllocator,
2531                             VkPipeline *out)
2532 {
2533    VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout);
2534    VK_FROM_HANDLE(vk_pipeline_cache, pcache, cache);
2535 
2536    struct dzn_compute_pipeline *pipeline =
2537       vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
2538                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2539    if (!pipeline)
2540       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2541 
2542    uintptr_t state_buf[MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE / sizeof(uintptr_t)];
2543    D3D12_PIPELINE_STATE_STREAM_DESC stream_desc = {
2544       .pPipelineStateSubobjectStream = state_buf,
2545    };
2546 
2547    dzn_pipeline_init(&pipeline->base, device,
2548                      VK_PIPELINE_BIND_POINT_COMPUTE,
2549                      layout, &stream_desc);
2550 
2551    D3D12_SHADER_BYTECODE shader = { 0 };
2552    VkResult ret =
2553       dzn_compute_pipeline_compile_shader(device, pipeline, pcache, layout,
2554                                           &stream_desc, &shader, pCreateInfo);
2555    if (ret != VK_SUCCESS)
2556       goto out;
2557 
2558    if (FAILED(ID3D12Device4_CreatePipelineState(device->dev, &stream_desc,
2559                                                 &IID_ID3D12PipelineState,
2560                                                 (void **)&pipeline->base.state)))
2561       ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2562 
2563 out:
2564    free((void *)shader.pShaderBytecode);
2565    if (ret != VK_SUCCESS)
2566       dzn_compute_pipeline_destroy(pipeline, pAllocator);
2567    else
2568       *out = dzn_compute_pipeline_to_handle(pipeline);
2569 
2570    return ret;
2571 }
2572 
2573 ID3D12CommandSignature *
dzn_compute_pipeline_get_indirect_cmd_sig(struct dzn_compute_pipeline * pipeline)2574 dzn_compute_pipeline_get_indirect_cmd_sig(struct dzn_compute_pipeline *pipeline)
2575 {
2576    if (pipeline->indirect_cmd_sig)
2577       return pipeline->indirect_cmd_sig;
2578 
2579    struct dzn_device *device =
2580       container_of(pipeline->base.base.device, struct dzn_device, vk);
2581 
2582    D3D12_INDIRECT_ARGUMENT_DESC indirect_dispatch_args[] = {
2583       {
2584          .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
2585          .Constant = {
2586             .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
2587             .DestOffsetIn32BitValues = 0,
2588             .Num32BitValuesToSet = 3,
2589          },
2590       },
2591       {
2592          .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH,
2593       },
2594    };
2595 
2596    D3D12_COMMAND_SIGNATURE_DESC indirect_dispatch_desc = {
2597       .ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS) * 2,
2598       .NumArgumentDescs = ARRAY_SIZE(indirect_dispatch_args),
2599       .pArgumentDescs = indirect_dispatch_args,
2600    };
2601 
2602    HRESULT hres =
2603       ID3D12Device1_CreateCommandSignature(device->dev, &indirect_dispatch_desc,
2604                                            pipeline->base.root.sig,
2605                                            &IID_ID3D12CommandSignature,
2606                                            (void **)&pipeline->indirect_cmd_sig);
2607    if (FAILED(hres))
2608       return NULL;
2609 
2610    return pipeline->indirect_cmd_sig;
2611 }
2612 
2613 VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateComputePipelines(VkDevice dev,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)2614 dzn_CreateComputePipelines(VkDevice dev,
2615                            VkPipelineCache pipelineCache,
2616                            uint32_t count,
2617                            const VkComputePipelineCreateInfo *pCreateInfos,
2618                            const VkAllocationCallbacks *pAllocator,
2619                            VkPipeline *pPipelines)
2620 {
2621    VK_FROM_HANDLE(dzn_device, device, dev);
2622    VkResult result = VK_SUCCESS;
2623 
2624    unsigned i;
2625    for (i = 0; i < count; i++) {
2626       result = dzn_compute_pipeline_create(device,
2627                                            pipelineCache,
2628                                            &pCreateInfos[i],
2629                                            pAllocator,
2630                                            &pPipelines[i]);
2631       if (result != VK_SUCCESS) {
2632          pPipelines[i] = VK_NULL_HANDLE;
2633 
2634          /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it
2635           * is not obvious what error should be report upon 2 different failures.
2636           */
2637          if (result != VK_PIPELINE_COMPILE_REQUIRED)
2638             break;
2639 
2640          if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
2641             break;
2642       }
2643    }
2644 
2645    for (; i < count; i++)
2646       pPipelines[i] = VK_NULL_HANDLE;
2647 
2648    return result;
2649 }
2650 
2651 VKAPI_ATTR void VKAPI_CALL
dzn_DestroyPipeline(VkDevice device,VkPipeline pipeline,const VkAllocationCallbacks * pAllocator)2652 dzn_DestroyPipeline(VkDevice device,
2653                     VkPipeline pipeline,
2654                     const VkAllocationCallbacks *pAllocator)
2655 {
2656    VK_FROM_HANDLE(dzn_pipeline, pipe, pipeline);
2657 
2658    if (!pipe)
2659       return;
2660 
2661    if (pipe->type == VK_PIPELINE_BIND_POINT_GRAPHICS) {
2662       struct dzn_graphics_pipeline *gfx = container_of(pipe, struct dzn_graphics_pipeline, base);
2663       dzn_graphics_pipeline_destroy(gfx, pAllocator);
2664    } else {
2665       assert(pipe->type == VK_PIPELINE_BIND_POINT_COMPUTE);
2666       struct dzn_compute_pipeline *compute = container_of(pipe, struct dzn_compute_pipeline, base);
2667       dzn_compute_pipeline_destroy(compute, pAllocator);
2668    }
2669 }
2670