1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "dzn_private.h"
25
26 #include "spirv/nir_spirv.h"
27
28 #include "dxil_nir.h"
29 #include "nir_to_dxil.h"
30 #include "dxil_spirv_nir.h"
31 #include "spirv_to_dxil.h"
32
33 #include "dxil_validator.h"
34
35 #include "vk_alloc.h"
36 #include "vk_util.h"
37 #include "vk_format.h"
38 #include "vk_pipeline.h"
39 #include "vk_pipeline_cache.h"
40
41 #include "util/u_debug.h"
42
43 #define d3d12_pipeline_state_stream_new_desc(__stream, __maxstreamsz, __id, __type, __desc) \
44 __type *__desc; \
45 do { \
46 struct { \
47 D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type; \
48 __type desc; \
49 } *__wrapper; \
50 (__stream)->SizeInBytes = ALIGN_POT((__stream)->SizeInBytes, alignof(void *)); \
51 __wrapper = (void *)((uint8_t *)(__stream)->pPipelineStateSubobjectStream + (__stream)->SizeInBytes); \
52 (__stream)->SizeInBytes += sizeof(*__wrapper); \
53 assert((__stream)->SizeInBytes <= __maxstreamsz); \
54 __wrapper->type = D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_ ## __id; \
55 __desc = &__wrapper->desc; \
56 memset(__desc, 0, sizeof(*__desc)); \
57 } while (0)
58
59 #define d3d12_gfx_pipeline_state_stream_new_desc(__stream, __id, __type, __desc) \
60 d3d12_pipeline_state_stream_new_desc(__stream, MAX_GFX_PIPELINE_STATE_STREAM_SIZE, __id, __type, __desc)
61
62 #define d3d12_compute_pipeline_state_stream_new_desc(__stream, __id, __type, __desc) \
63 d3d12_pipeline_state_stream_new_desc(__stream, MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE, __id, __type, __desc)
64
65 static bool
gfx_pipeline_variant_key_equal(const void * a,const void * b)66 gfx_pipeline_variant_key_equal(const void *a, const void *b)
67 {
68 return !memcmp(a, b, sizeof(struct dzn_graphics_pipeline_variant_key));
69 }
70
71 static uint32_t
gfx_pipeline_variant_key_hash(const void * key)72 gfx_pipeline_variant_key_hash(const void *key)
73 {
74 return _mesa_hash_data(key, sizeof(struct dzn_graphics_pipeline_variant_key));
75 }
76
77 struct dzn_cached_blob {
78 struct vk_pipeline_cache_object base;
79 uint8_t hash[SHA1_DIGEST_LENGTH];
80 const void *data;
81 size_t size;
82 };
83
84 static bool
dzn_cached_blob_serialize(struct vk_pipeline_cache_object * object,struct blob * blob)85 dzn_cached_blob_serialize(struct vk_pipeline_cache_object *object,
86 struct blob *blob)
87 {
88 struct dzn_cached_blob *cached_blob =
89 container_of(object, struct dzn_cached_blob, base);
90
91 blob_write_bytes(blob, cached_blob->data, cached_blob->size);
92 return true;
93 }
94
95 static void
dzn_cached_blob_destroy(struct vk_pipeline_cache_object * object)96 dzn_cached_blob_destroy(struct vk_pipeline_cache_object *object)
97 {
98 struct dzn_cached_blob *shader =
99 container_of(object, struct dzn_cached_blob, base);
100
101 vk_free(&shader->base.device->alloc, shader);
102 }
103
104 static struct vk_pipeline_cache_object *
105 dzn_cached_blob_create(struct vk_device *device,
106 const void *hash,
107 const void *data,
108 size_t data_size);
109
110 static struct vk_pipeline_cache_object *
dzn_cached_blob_deserialize(struct vk_device * device,const void * key_data,size_t key_size,struct blob_reader * blob)111 dzn_cached_blob_deserialize(struct vk_device *device,
112 const void *key_data,
113 size_t key_size,
114 struct blob_reader *blob)
115 {
116 size_t data_size = blob->end - blob->current;
117 assert(key_size == SHA1_DIGEST_LENGTH);
118
119 return dzn_cached_blob_create(device, key_data,
120 blob_read_bytes(blob, data_size),
121 data_size);
122 }
123
124 const struct vk_pipeline_cache_object_ops dzn_cached_blob_ops = {
125 .serialize = dzn_cached_blob_serialize,
126 .deserialize = dzn_cached_blob_deserialize,
127 .destroy = dzn_cached_blob_destroy,
128 };
129
130
131 static struct vk_pipeline_cache_object *
dzn_cached_blob_create(struct vk_device * device,const void * hash,const void * data,size_t data_size)132 dzn_cached_blob_create(struct vk_device *device,
133 const void *hash,
134 const void *data,
135 size_t data_size)
136 {
137 VK_MULTIALLOC(ma);
138 VK_MULTIALLOC_DECL(&ma, struct dzn_cached_blob, blob, 1);
139 VK_MULTIALLOC_DECL(&ma, uint8_t, copy, data_size);
140
141 if (!vk_multialloc_alloc(&ma, &device->alloc,
142 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
143 return NULL;
144
145 memcpy(blob->hash, hash, sizeof(blob->hash));
146
147 vk_pipeline_cache_object_init(device, &blob->base,
148 &dzn_cached_blob_ops,
149 blob->hash, sizeof(blob->hash));
150
151 if (data)
152 memcpy(copy, data, data_size);
153 blob->data = copy;
154 blob->size = data_size;
155
156 return &blob->base;
157 }
158
159 static VkResult
dzn_graphics_pipeline_prepare_for_variants(struct dzn_device * device,struct dzn_graphics_pipeline * pipeline)160 dzn_graphics_pipeline_prepare_for_variants(struct dzn_device *device,
161 struct dzn_graphics_pipeline *pipeline)
162 {
163 if (pipeline->variants)
164 return VK_SUCCESS;
165
166 pipeline->variants =
167 _mesa_hash_table_create(NULL,
168 gfx_pipeline_variant_key_hash,
169 gfx_pipeline_variant_key_equal);
170 if (!pipeline->variants)
171 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
172
173 return VK_SUCCESS;
174 }
175
176 static dxil_spirv_shader_stage
to_dxil_shader_stage(VkShaderStageFlagBits in)177 to_dxil_shader_stage(VkShaderStageFlagBits in)
178 {
179 switch (in) {
180 case VK_SHADER_STAGE_VERTEX_BIT: return DXIL_SPIRV_SHADER_VERTEX;
181 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return DXIL_SPIRV_SHADER_TESS_CTRL;
182 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return DXIL_SPIRV_SHADER_TESS_EVAL;
183 case VK_SHADER_STAGE_GEOMETRY_BIT: return DXIL_SPIRV_SHADER_GEOMETRY;
184 case VK_SHADER_STAGE_FRAGMENT_BIT: return DXIL_SPIRV_SHADER_FRAGMENT;
185 case VK_SHADER_STAGE_COMPUTE_BIT: return DXIL_SPIRV_SHADER_COMPUTE;
186 default: unreachable("Unsupported stage");
187 }
188 }
189
190 static VkResult
dzn_pipeline_get_nir_shader(struct dzn_device * device,const struct dzn_pipeline_layout * layout,struct vk_pipeline_cache * cache,const uint8_t * hash,const VkPipelineShaderStageCreateInfo * stage_info,gl_shader_stage stage,enum dxil_spirv_yz_flip_mode yz_flip_mode,uint16_t y_flip_mask,uint16_t z_flip_mask,bool force_sample_rate_shading,enum pipe_format * vi_conversions,const nir_shader_compiler_options * nir_opts,nir_shader ** nir)191 dzn_pipeline_get_nir_shader(struct dzn_device *device,
192 const struct dzn_pipeline_layout *layout,
193 struct vk_pipeline_cache *cache,
194 const uint8_t *hash,
195 const VkPipelineShaderStageCreateInfo *stage_info,
196 gl_shader_stage stage,
197 enum dxil_spirv_yz_flip_mode yz_flip_mode,
198 uint16_t y_flip_mask, uint16_t z_flip_mask,
199 bool force_sample_rate_shading,
200 enum pipe_format *vi_conversions,
201 const nir_shader_compiler_options *nir_opts,
202 nir_shader **nir)
203 {
204 if (cache) {
205 *nir = vk_pipeline_cache_lookup_nir(cache, hash, SHA1_DIGEST_LENGTH,
206 nir_opts, NULL, NULL);
207 if (*nir)
208 return VK_SUCCESS;
209 }
210
211 VK_FROM_HANDLE(vk_shader_module, module, stage_info->module);
212 struct spirv_to_nir_options spirv_opts = {
213 .caps = {
214 .draw_parameters = true,
215 },
216 .ubo_addr_format = nir_address_format_32bit_index_offset,
217 .ssbo_addr_format = nir_address_format_32bit_index_offset,
218 .shared_addr_format = nir_address_format_32bit_offset_as_64bit,
219
220 /* use_deref_buffer_array_length + nir_lower_explicit_io force
221 * get_ssbo_size to take in the return from load_vulkan_descriptor
222 * instead of vulkan_resource_index. This makes it much easier to
223 * get the DXIL handle for the SSBO.
224 */
225 .use_deref_buffer_array_length = true
226 };
227
228 VkResult result =
229 vk_shader_module_to_nir(&device->vk, module, stage,
230 stage_info->pName, stage_info->pSpecializationInfo,
231 &spirv_opts, nir_opts, NULL, nir);
232 if (result != VK_SUCCESS)
233 return result;
234
235 struct dxil_spirv_runtime_conf conf = {
236 .runtime_data_cbv = {
237 .register_space = DZN_REGISTER_SPACE_SYSVALS,
238 .base_shader_register = 0,
239 },
240 .push_constant_cbv = {
241 .register_space = DZN_REGISTER_SPACE_PUSH_CONSTANT,
242 .base_shader_register = 0,
243 },
244 .zero_based_vertex_instance_id = false,
245 .yz_flip = {
246 .mode = yz_flip_mode,
247 .y_mask = y_flip_mask,
248 .z_mask = z_flip_mask,
249 },
250 .read_only_images_as_srvs = true,
251 .force_sample_rate_shading = force_sample_rate_shading,
252 };
253
254 bool requires_runtime_data;
255 dxil_spirv_nir_passes(*nir, &conf, &requires_runtime_data);
256
257 if (stage == MESA_SHADER_VERTEX) {
258 bool needs_conv = false;
259 for (uint32_t i = 0; i < MAX_VERTEX_GENERIC_ATTRIBS; i++) {
260 if (vi_conversions[i] != PIPE_FORMAT_NONE)
261 needs_conv = true;
262 }
263
264 if (needs_conv)
265 NIR_PASS_V(*nir, dxil_nir_lower_vs_vertex_conversion, vi_conversions);
266 }
267
268 if (cache)
269 vk_pipeline_cache_add_nir(cache, hash, SHA1_DIGEST_LENGTH, *nir);
270
271 return VK_SUCCESS;
272 }
273
274 static bool
adjust_resource_index_binding(struct nir_builder * builder,nir_instr * instr,void * cb_data)275 adjust_resource_index_binding(struct nir_builder *builder, nir_instr *instr,
276 void *cb_data)
277 {
278 if (instr->type != nir_instr_type_intrinsic)
279 return false;
280
281 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
282
283 if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
284 return false;
285
286 const struct dzn_pipeline_layout *layout = cb_data;
287 unsigned set = nir_intrinsic_desc_set(intrin);
288 unsigned binding = nir_intrinsic_binding(intrin);
289
290 if (set >= layout->set_count ||
291 binding >= layout->binding_translation[set].binding_count)
292 return false;
293
294 binding = layout->binding_translation[set].base_reg[binding];
295 nir_intrinsic_set_binding(intrin, binding);
296
297 return true;
298 }
299
300 static bool
adjust_var_bindings(nir_shader * shader,const struct dzn_pipeline_layout * layout,uint8_t * bindings_hash)301 adjust_var_bindings(nir_shader *shader,
302 const struct dzn_pipeline_layout *layout,
303 uint8_t *bindings_hash)
304 {
305 uint32_t modes = nir_var_image | nir_var_uniform | nir_var_mem_ubo | nir_var_mem_ssbo;
306 struct mesa_sha1 bindings_hash_ctx;
307
308 if (bindings_hash)
309 _mesa_sha1_init(&bindings_hash_ctx);
310
311 nir_foreach_variable_with_modes(var, shader, modes) {
312 if (var->data.mode == nir_var_uniform) {
313 const struct glsl_type *type = glsl_without_array(var->type);
314
315 if (!glsl_type_is_sampler(type) && !glsl_type_is_texture(type))
316 continue;
317 }
318
319 unsigned s = var->data.descriptor_set, b = var->data.binding;
320
321 if (s >= layout->set_count)
322 continue;
323
324 assert(b < layout->binding_translation[s].binding_count);
325 var->data.binding = layout->binding_translation[s].base_reg[b];
326
327 if (bindings_hash) {
328 _mesa_sha1_update(&bindings_hash_ctx, &s, sizeof(s));
329 _mesa_sha1_update(&bindings_hash_ctx, &b, sizeof(b));
330 _mesa_sha1_update(&bindings_hash_ctx, &var->data.binding, sizeof(var->data.binding));
331 }
332 }
333
334 if (bindings_hash)
335 _mesa_sha1_final(&bindings_hash_ctx, bindings_hash);
336
337 return nir_shader_instructions_pass(shader, adjust_resource_index_binding,
338 nir_metadata_all, (void *)layout);
339 }
340
341 static VkResult
dzn_pipeline_compile_shader(struct dzn_device * device,nir_shader * nir,D3D12_SHADER_BYTECODE * slot)342 dzn_pipeline_compile_shader(struct dzn_device *device,
343 nir_shader *nir,
344 D3D12_SHADER_BYTECODE *slot)
345 {
346 struct dzn_instance *instance =
347 container_of(device->vk.physical->instance, struct dzn_instance, vk);
348 struct nir_to_dxil_options opts = {
349 .environment = DXIL_ENVIRONMENT_VULKAN,
350 .shader_model_max = SHADER_MODEL_6_2,
351 #ifdef _WIN32
352 .validator_version_max = dxil_get_validator_version(instance->dxil_validator),
353 #endif
354 };
355 struct blob dxil_blob;
356 VkResult result = VK_SUCCESS;
357
358 if (instance->debug_flags & DZN_DEBUG_NIR)
359 nir_print_shader(nir, stderr);
360
361 if (nir_to_dxil(nir, &opts, &dxil_blob)) {
362 blob_finish_get_buffer(&dxil_blob, (void **)&slot->pShaderBytecode,
363 (size_t *)&slot->BytecodeLength);
364 } else {
365 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
366 }
367
368 if (dxil_blob.allocated)
369 blob_finish(&dxil_blob);
370
371 if (result != VK_SUCCESS)
372 return result;
373
374 #ifdef _WIN32
375 char *err;
376 bool res = dxil_validate_module(instance->dxil_validator,
377 (void *)slot->pShaderBytecode,
378 slot->BytecodeLength, &err);
379
380 if (instance->debug_flags & DZN_DEBUG_DXIL) {
381 char *disasm = dxil_disasm_module(instance->dxil_validator,
382 (void *)slot->pShaderBytecode,
383 slot->BytecodeLength);
384 if (disasm) {
385 fprintf(stderr,
386 "== BEGIN SHADER ============================================\n"
387 "%s\n"
388 "== END SHADER ==============================================\n",
389 disasm);
390 ralloc_free(disasm);
391 }
392 }
393
394 if (!res) {
395 if (err) {
396 fprintf(stderr,
397 "== VALIDATION ERROR =============================================\n"
398 "%s\n"
399 "== END ==========================================================\n",
400 err);
401 ralloc_free(err);
402 }
403 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
404 }
405 #endif
406
407 return VK_SUCCESS;
408 }
409
410 static D3D12_SHADER_BYTECODE *
dzn_pipeline_get_gfx_shader_slot(D3D12_PIPELINE_STATE_STREAM_DESC * stream,gl_shader_stage in)411 dzn_pipeline_get_gfx_shader_slot(D3D12_PIPELINE_STATE_STREAM_DESC *stream,
412 gl_shader_stage in)
413 {
414 switch (in) {
415 case MESA_SHADER_VERTEX: {
416 d3d12_gfx_pipeline_state_stream_new_desc(stream, VS, D3D12_SHADER_BYTECODE, desc);
417 return desc;
418 }
419 case MESA_SHADER_TESS_CTRL: {
420 d3d12_gfx_pipeline_state_stream_new_desc(stream, DS, D3D12_SHADER_BYTECODE, desc);
421 return desc;
422 }
423 case MESA_SHADER_TESS_EVAL: {
424 d3d12_gfx_pipeline_state_stream_new_desc(stream, HS, D3D12_SHADER_BYTECODE, desc);
425 return desc;
426 }
427 case MESA_SHADER_GEOMETRY: {
428 d3d12_gfx_pipeline_state_stream_new_desc(stream, GS, D3D12_SHADER_BYTECODE, desc);
429 return desc;
430 }
431 case MESA_SHADER_FRAGMENT: {
432 d3d12_gfx_pipeline_state_stream_new_desc(stream, PS, D3D12_SHADER_BYTECODE, desc);
433 return desc;
434 }
435 default: unreachable("Unsupported stage");
436 }
437 }
438
439 struct dzn_cached_dxil_shader_header {
440 gl_shader_stage stage;
441 size_t size;
442 uint8_t data[0];
443 };
444
445 static VkResult
dzn_pipeline_cache_lookup_dxil_shader(struct vk_pipeline_cache * cache,const uint8_t * dxil_hash,gl_shader_stage * stage,D3D12_SHADER_BYTECODE * bc)446 dzn_pipeline_cache_lookup_dxil_shader(struct vk_pipeline_cache *cache,
447 const uint8_t *dxil_hash,
448 gl_shader_stage *stage,
449 D3D12_SHADER_BYTECODE *bc)
450 {
451 *stage = MESA_SHADER_NONE;
452
453 if (!cache)
454 return VK_SUCCESS;
455
456 struct vk_pipeline_cache_object *cache_obj = NULL;
457
458 cache_obj =
459 vk_pipeline_cache_lookup_object(cache, dxil_hash, SHA1_DIGEST_LENGTH,
460 &dzn_cached_blob_ops,
461 NULL);
462 if (!cache_obj)
463 return VK_SUCCESS;
464
465 struct dzn_cached_blob *cached_blob =
466 container_of(cache_obj, struct dzn_cached_blob, base);
467 VkResult ret = VK_SUCCESS;
468
469 assert(sizeof(struct dzn_cached_dxil_shader_header) <= cached_blob->size);
470
471 const struct dzn_cached_dxil_shader_header *info =
472 (struct dzn_cached_dxil_shader_header *)(cached_blob->data);
473
474 assert(sizeof(struct dzn_cached_dxil_shader_header) + info->size <= cached_blob->size);
475 assert(info->stage > MESA_SHADER_NONE && info->stage < MESA_VULKAN_SHADER_STAGES);
476 assert(info->size > 0);
477
478 void *code = malloc(info->size);
479 if (!code) {
480 ret = vk_error(cache->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
481 goto out;
482 }
483
484 memcpy(code, info->data, info->size);
485
486 bc->pShaderBytecode = code;
487 bc->BytecodeLength = info->size;
488 *stage = info->stage;
489
490 out:
491 vk_pipeline_cache_object_unref(cache_obj);
492 return ret;
493 }
494
495 static void
dzn_pipeline_cache_add_dxil_shader(struct vk_pipeline_cache * cache,const uint8_t * dxil_hash,gl_shader_stage stage,const D3D12_SHADER_BYTECODE * bc)496 dzn_pipeline_cache_add_dxil_shader(struct vk_pipeline_cache *cache,
497 const uint8_t *dxil_hash,
498 gl_shader_stage stage,
499 const D3D12_SHADER_BYTECODE *bc)
500 {
501 size_t size = sizeof(struct dzn_cached_dxil_shader_header) +
502 bc->BytecodeLength;
503
504 struct vk_pipeline_cache_object *cache_obj =
505 dzn_cached_blob_create(cache->base.device, dxil_hash, NULL, size);
506 if (!cache_obj)
507 return;
508
509 struct dzn_cached_blob *cached_blob =
510 container_of(cache_obj, struct dzn_cached_blob, base);
511 struct dzn_cached_dxil_shader_header *info =
512 (struct dzn_cached_dxil_shader_header *)(cached_blob->data);
513 info->stage = stage;
514 info->size = bc->BytecodeLength;
515 memcpy(info->data, bc->pShaderBytecode, bc->BytecodeLength);
516
517 cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
518 vk_pipeline_cache_object_unref(cache_obj);
519 }
520
521 struct dzn_cached_gfx_pipeline_header {
522 uint32_t stages;
523 uint32_t input_count;
524 };
525
526 static VkResult
dzn_pipeline_cache_lookup_gfx_pipeline(struct dzn_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,const uint8_t * pipeline_hash,bool * cache_hit)527 dzn_pipeline_cache_lookup_gfx_pipeline(struct dzn_graphics_pipeline *pipeline,
528 struct vk_pipeline_cache *cache,
529 const uint8_t *pipeline_hash,
530 bool *cache_hit)
531 {
532 *cache_hit = false;
533
534 if (!cache)
535 return VK_SUCCESS;
536
537 struct vk_pipeline_cache_object *cache_obj = NULL;
538
539 cache_obj =
540 vk_pipeline_cache_lookup_object(cache, pipeline_hash, SHA1_DIGEST_LENGTH,
541 &dzn_cached_blob_ops,
542 NULL);
543 if (!cache_obj)
544 return VK_SUCCESS;
545
546 struct dzn_cached_blob *cached_blob =
547 container_of(cache_obj, struct dzn_cached_blob, base);
548 D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc =
549 &pipeline->templates.stream_desc;
550
551 const struct dzn_cached_gfx_pipeline_header *info =
552 (const struct dzn_cached_gfx_pipeline_header *)(cached_blob->data);
553 size_t offset = sizeof(*info);
554
555 assert(cached_blob->size >= sizeof(*info));
556
557 if (info->input_count > 0) {
558 offset = ALIGN_POT(offset, alignof(D3D12_INPUT_LAYOUT_DESC));
559 const D3D12_INPUT_ELEMENT_DESC *inputs =
560 (const D3D12_INPUT_ELEMENT_DESC *)((uint8_t *)cached_blob->data + offset);
561
562 assert(cached_blob->size >= offset + sizeof(*inputs) * info->input_count);
563
564 memcpy(pipeline->templates.inputs, inputs,
565 sizeof(*inputs) * info->input_count);
566 d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, INPUT_LAYOUT, D3D12_INPUT_LAYOUT_DESC, desc);
567 desc->pInputElementDescs = pipeline->templates.inputs;
568 desc->NumElements = info->input_count;
569 offset += sizeof(*inputs) * info->input_count;
570 }
571
572 assert(cached_blob->size == offset + util_bitcount(info->stages) * SHA1_DIGEST_LENGTH);
573
574 u_foreach_bit(s, info->stages) {
575 uint8_t *dxil_hash = (uint8_t *)cached_blob->data + offset;
576 gl_shader_stage stage;
577
578 D3D12_SHADER_BYTECODE *slot =
579 dzn_pipeline_get_gfx_shader_slot(stream_desc, s);
580
581 VkResult ret =
582 dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, slot);
583 if (ret != VK_SUCCESS)
584 return ret;
585
586 assert(stage == s);
587 offset += SHA1_DIGEST_LENGTH;
588 }
589
590 *cache_hit = true;
591
592 vk_pipeline_cache_object_unref(cache_obj);
593 return VK_SUCCESS;
594 }
595
596 static void
dzn_pipeline_cache_add_gfx_pipeline(struct dzn_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,uint32_t vertex_input_count,const uint8_t * pipeline_hash,const uint8_t * const * dxil_hashes)597 dzn_pipeline_cache_add_gfx_pipeline(struct dzn_graphics_pipeline *pipeline,
598 struct vk_pipeline_cache *cache,
599 uint32_t vertex_input_count,
600 const uint8_t *pipeline_hash,
601 const uint8_t *const *dxil_hashes)
602 {
603 size_t offset =
604 ALIGN_POT(sizeof(struct dzn_cached_gfx_pipeline_header), alignof(D3D12_INPUT_ELEMENT_DESC)) +
605 (sizeof(D3D12_INPUT_ELEMENT_DESC) * vertex_input_count);
606 uint32_t stages = 0;
607
608 for (uint32_t i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
609 if (pipeline->templates.shaders[i].bc) {
610 stages |= BITFIELD_BIT(i);
611 offset += SHA1_DIGEST_LENGTH;
612 }
613 }
614
615 struct vk_pipeline_cache_object *cache_obj =
616 dzn_cached_blob_create(cache->base.device, pipeline_hash, NULL, offset);
617 if (!cache_obj)
618 return;
619
620 struct dzn_cached_blob *cached_blob =
621 container_of(cache_obj, struct dzn_cached_blob, base);
622
623 offset = 0;
624 struct dzn_cached_gfx_pipeline_header *info =
625 (struct dzn_cached_gfx_pipeline_header *)(cached_blob->data);
626
627 info->input_count = vertex_input_count;
628 info->stages = stages;
629
630 offset = ALIGN_POT(offset + sizeof(*info), alignof(D3D12_INPUT_ELEMENT_DESC));
631
632 D3D12_INPUT_ELEMENT_DESC *inputs =
633 (D3D12_INPUT_ELEMENT_DESC *)((uint8_t *)cached_blob->data + offset);
634 memcpy(inputs, pipeline->templates.inputs,
635 sizeof(*inputs) * vertex_input_count);
636 offset += sizeof(*inputs) * vertex_input_count;
637
638 u_foreach_bit(s, stages) {
639 uint8_t *dxil_hash = (uint8_t *)cached_blob->data + offset;
640
641 memcpy(dxil_hash, dxil_hashes[s], SHA1_DIGEST_LENGTH);
642 offset += SHA1_DIGEST_LENGTH;
643 }
644
645 cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
646 vk_pipeline_cache_object_unref(cache_obj);
647 }
648
649 static void
dzn_graphics_pipeline_hash_attribs(D3D12_INPUT_ELEMENT_DESC * attribs,enum pipe_format * vi_conversions,uint8_t * result)650 dzn_graphics_pipeline_hash_attribs(D3D12_INPUT_ELEMENT_DESC *attribs,
651 enum pipe_format *vi_conversions,
652 uint8_t *result)
653 {
654 struct mesa_sha1 ctx;
655
656 _mesa_sha1_init(&ctx);
657 _mesa_sha1_update(&ctx, attribs, sizeof(*attribs) * MAX_VERTEX_GENERIC_ATTRIBS);
658 _mesa_sha1_update(&ctx, vi_conversions, sizeof(*vi_conversions) * MAX_VERTEX_GENERIC_ATTRIBS);
659 _mesa_sha1_final(&ctx, result);
660 }
661
662 static VkResult
dzn_graphics_pipeline_compile_shaders(struct dzn_device * device,struct dzn_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,const struct dzn_pipeline_layout * layout,D3D12_PIPELINE_STATE_STREAM_DESC * out,D3D12_INPUT_ELEMENT_DESC * attribs,enum pipe_format * vi_conversions,const VkGraphicsPipelineCreateInfo * info)663 dzn_graphics_pipeline_compile_shaders(struct dzn_device *device,
664 struct dzn_graphics_pipeline *pipeline,
665 struct vk_pipeline_cache *cache,
666 const struct dzn_pipeline_layout *layout,
667 D3D12_PIPELINE_STATE_STREAM_DESC *out,
668 D3D12_INPUT_ELEMENT_DESC *attribs,
669 enum pipe_format *vi_conversions,
670 const VkGraphicsPipelineCreateInfo *info)
671 {
672 const VkPipelineViewportStateCreateInfo *vp_info =
673 info->pRasterizationState->rasterizerDiscardEnable ?
674 NULL : info->pViewportState;
675 struct {
676 const VkPipelineShaderStageCreateInfo *info;
677 uint8_t spirv_hash[SHA1_DIGEST_LENGTH];
678 uint8_t dxil_hash[SHA1_DIGEST_LENGTH];
679 } stages[MESA_VULKAN_SHADER_STAGES] = { 0 };
680 const uint8_t *dxil_hashes[MESA_VULKAN_SHADER_STAGES] = { 0 };
681 uint8_t attribs_hash[SHA1_DIGEST_LENGTH];
682 uint8_t pipeline_hash[SHA1_DIGEST_LENGTH];
683 gl_shader_stage yz_flip_stage = MESA_SHADER_NONE;
684 uint32_t active_stage_mask = 0;
685 VkResult ret;
686
687 /* First step: collect stage info in a table indexed by gl_shader_stage
688 * so we can iterate over stages in pipeline order or reverse pipeline
689 * order.
690 */
691 for (uint32_t i = 0; i < info->stageCount; i++) {
692 gl_shader_stage stage =
693 vk_to_mesa_shader_stage(info->pStages[i].stage);
694
695 assert(stage <= MESA_SHADER_FRAGMENT);
696
697 if ((stage == MESA_SHADER_VERTEX ||
698 stage == MESA_SHADER_TESS_EVAL ||
699 stage == MESA_SHADER_GEOMETRY) &&
700 yz_flip_stage < stage)
701 yz_flip_stage = stage;
702
703 if (stage == MESA_SHADER_FRAGMENT &&
704 info->pRasterizationState &&
705 (info->pRasterizationState->rasterizerDiscardEnable ||
706 info->pRasterizationState->cullMode == VK_CULL_MODE_FRONT_AND_BACK)) {
707 /* Disable rasterization (AKA leave fragment shader NULL) when
708 * front+back culling or discard is set.
709 */
710 continue;
711 }
712
713 stages[stage].info = &info->pStages[i];
714 active_stage_mask |= BITFIELD_BIT(stage);
715 }
716
717 enum dxil_spirv_yz_flip_mode yz_flip_mode = DXIL_SPIRV_YZ_FLIP_NONE;
718 uint16_t y_flip_mask = 0, z_flip_mask = 0;
719
720 if (pipeline->vp.dynamic) {
721 yz_flip_mode = DXIL_SPIRV_YZ_FLIP_CONDITIONAL;
722 } else if (vp_info) {
723 for (uint32_t i = 0; vp_info->pViewports && i < vp_info->viewportCount; i++) {
724 if (vp_info->pViewports[i].height > 0)
725 y_flip_mask |= BITFIELD_BIT(i);
726
727 if (vp_info->pViewports[i].minDepth > vp_info->pViewports[i].maxDepth)
728 z_flip_mask |= BITFIELD_BIT(i);
729 }
730
731 if (y_flip_mask && z_flip_mask)
732 yz_flip_mode = DXIL_SPIRV_YZ_FLIP_UNCONDITIONAL;
733 else if (z_flip_mask)
734 yz_flip_mode = DXIL_SPIRV_Z_FLIP_UNCONDITIONAL;
735 else if (y_flip_mask)
736 yz_flip_mode = DXIL_SPIRV_Y_FLIP_UNCONDITIONAL;
737 }
738
739 bool force_sample_rate_shading =
740 info->pMultisampleState &&
741 info->pMultisampleState->sampleShadingEnable;
742
743 if (cache) {
744 dzn_graphics_pipeline_hash_attribs(attribs, vi_conversions, attribs_hash);
745
746 struct mesa_sha1 pipeline_hash_ctx;
747
748 _mesa_sha1_init(&pipeline_hash_ctx);
749 _mesa_sha1_update(&pipeline_hash_ctx, attribs_hash, sizeof(attribs_hash));
750 _mesa_sha1_update(&pipeline_hash_ctx, &yz_flip_mode, sizeof(yz_flip_mode));
751 _mesa_sha1_update(&pipeline_hash_ctx, &y_flip_mask, sizeof(y_flip_mask));
752 _mesa_sha1_update(&pipeline_hash_ctx, &z_flip_mask, sizeof(z_flip_mask));
753 _mesa_sha1_update(&pipeline_hash_ctx, &force_sample_rate_shading, sizeof(force_sample_rate_shading));
754
755 u_foreach_bit(stage, active_stage_mask) {
756 vk_pipeline_hash_shader_stage(stages[stage].info, stages[stage].spirv_hash);
757 _mesa_sha1_update(&pipeline_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash));
758 _mesa_sha1_update(&pipeline_hash_ctx, layout->stages[stage].hash, sizeof(layout->stages[stage].hash));
759 }
760 _mesa_sha1_final(&pipeline_hash_ctx, pipeline_hash);
761
762 bool cache_hit;
763 ret = dzn_pipeline_cache_lookup_gfx_pipeline(pipeline, cache, pipeline_hash,
764 &cache_hit);
765 if (ret != VK_SUCCESS)
766 return ret;
767
768 if (cache_hit)
769 return VK_SUCCESS;
770 }
771
772 /* Second step: get NIR shaders for all stages. */
773 nir_shader_compiler_options nir_opts = *dxil_get_nir_compiler_options();
774 nir_opts.lower_base_vertex = true;
775 u_foreach_bit(stage, active_stage_mask) {
776 struct mesa_sha1 nir_hash_ctx;
777 uint8_t nir_hash[SHA1_DIGEST_LENGTH];
778
779 if (cache) {
780 _mesa_sha1_init(&nir_hash_ctx);
781 if (stage == MESA_SHADER_VERTEX)
782 _mesa_sha1_update(&nir_hash_ctx, attribs_hash, sizeof(attribs_hash));
783 if (stage == yz_flip_stage) {
784 _mesa_sha1_update(&nir_hash_ctx, &yz_flip_mode, sizeof(yz_flip_mode));
785 _mesa_sha1_update(&nir_hash_ctx, &y_flip_mask, sizeof(y_flip_mask));
786 _mesa_sha1_update(&nir_hash_ctx, &z_flip_mask, sizeof(z_flip_mask));
787 }
788 _mesa_sha1_update(&nir_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash));
789 _mesa_sha1_final(&nir_hash_ctx, nir_hash);
790 }
791
792 ret = dzn_pipeline_get_nir_shader(device, layout,
793 cache, nir_hash,
794 stages[stage].info, stage,
795 stage == yz_flip_stage ? yz_flip_mode : DXIL_SPIRV_YZ_FLIP_NONE,
796 y_flip_mask, z_flip_mask,
797 stage == MESA_SHADER_FRAGMENT ? force_sample_rate_shading : false,
798 vi_conversions,
799 &nir_opts, &pipeline->templates.shaders[stage].nir);
800 if (ret != VK_SUCCESS)
801 return ret;
802 }
803
804 /* Third step: link those NIR shaders. We iterate in reverse order
805 * so we can eliminate outputs that are never read by the next stage.
806 */
807 uint32_t link_mask = active_stage_mask;
808 while (link_mask != 0) {
809 gl_shader_stage stage = util_last_bit(link_mask) - 1;
810 link_mask &= ~BITFIELD_BIT(stage);
811 gl_shader_stage prev_stage = util_last_bit(link_mask) - 1;
812
813 assert(pipeline->templates.shaders[stage].nir);
814 dxil_spirv_nir_link(pipeline->templates.shaders[stage].nir,
815 prev_stage != MESA_SHADER_NONE ?
816 pipeline->templates.shaders[prev_stage].nir : NULL);
817 }
818
819 u_foreach_bit(stage, active_stage_mask) {
820 uint8_t bindings_hash[SHA1_DIGEST_LENGTH];
821
822 NIR_PASS_V(pipeline->templates.shaders[stage].nir, adjust_var_bindings, layout,
823 cache ? bindings_hash : NULL);
824
825 if (cache) {
826 struct mesa_sha1 dxil_hash_ctx;
827
828 _mesa_sha1_init(&dxil_hash_ctx);
829
830 if (stage == MESA_SHADER_VERTEX)
831 _mesa_sha1_update(&dxil_hash_ctx, attribs_hash, sizeof(attribs_hash));
832
833 if (stage == yz_flip_stage) {
834 _mesa_sha1_update(&dxil_hash_ctx, &yz_flip_mode, sizeof(yz_flip_mode));
835 _mesa_sha1_update(&dxil_hash_ctx, &y_flip_mask, sizeof(y_flip_mask));
836 _mesa_sha1_update(&dxil_hash_ctx, &z_flip_mask, sizeof(z_flip_mask));
837 }
838
839 if (stage == MESA_SHADER_FRAGMENT)
840 _mesa_sha1_update(&dxil_hash_ctx, &force_sample_rate_shading, sizeof(force_sample_rate_shading));
841
842 _mesa_sha1_update(&dxil_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash));
843 _mesa_sha1_update(&dxil_hash_ctx, bindings_hash, sizeof(bindings_hash));
844 _mesa_sha1_final(&dxil_hash_ctx, stages[stage].dxil_hash);
845 dxil_hashes[stage] = stages[stage].dxil_hash;
846
847 gl_shader_stage cached_stage;
848 D3D12_SHADER_BYTECODE bc;
849 ret = dzn_pipeline_cache_lookup_dxil_shader(cache, stages[stage].dxil_hash, &cached_stage, &bc);
850 if (ret != VK_SUCCESS)
851 return ret;
852
853 if (cached_stage != MESA_SHADER_NONE) {
854 assert(cached_stage == stage);
855 D3D12_SHADER_BYTECODE *slot =
856 dzn_pipeline_get_gfx_shader_slot(out, stage);
857 *slot = bc;
858 pipeline->templates.shaders[stage].bc = slot;
859 }
860 }
861 }
862
863 uint32_t vert_input_count = 0;
864 if (pipeline->templates.shaders[MESA_SHADER_VERTEX].nir) {
865 /* Now, declare one D3D12_INPUT_ELEMENT_DESC per VS input variable, so
866 * we can handle location overlaps properly.
867 */
868 nir_foreach_shader_in_variable(var, pipeline->templates.shaders[MESA_SHADER_VERTEX].nir) {
869 assert(var->data.location >= VERT_ATTRIB_GENERIC0);
870 unsigned loc = var->data.location - VERT_ATTRIB_GENERIC0;
871 assert(vert_input_count < D3D12_VS_INPUT_REGISTER_COUNT);
872 assert(loc < MAX_VERTEX_GENERIC_ATTRIBS);
873
874 pipeline->templates.inputs[vert_input_count] = attribs[loc];
875 pipeline->templates.inputs[vert_input_count].SemanticIndex = vert_input_count;
876 var->data.driver_location = vert_input_count++;
877 }
878
879 if (vert_input_count > 0) {
880 d3d12_gfx_pipeline_state_stream_new_desc(out, INPUT_LAYOUT, D3D12_INPUT_LAYOUT_DESC, desc);
881 desc->pInputElementDescs = pipeline->templates.inputs;
882 desc->NumElements = vert_input_count;
883 }
884 }
885
886 /* Last step: translate NIR shaders into DXIL modules */
887 u_foreach_bit(stage, active_stage_mask) {
888 /* Cache hit, we can skip the compilation. */
889 if (pipeline->templates.shaders[stage].bc)
890 continue;
891
892 if (stage == MESA_SHADER_FRAGMENT) {
893 gl_shader_stage prev_stage =
894 util_last_bit(active_stage_mask & BITFIELD_MASK(MESA_SHADER_FRAGMENT)) - 1;
895 /* Disable rasterization if the last geometry stage doesn't
896 * write the position.
897 */
898 if (prev_stage == MESA_SHADER_NONE ||
899 !(pipeline->templates.shaders[prev_stage].nir->info.outputs_written & VARYING_BIT_POS))
900 continue;
901 }
902
903 D3D12_SHADER_BYTECODE *slot =
904 dzn_pipeline_get_gfx_shader_slot(out, stage);
905
906 ret = dzn_pipeline_compile_shader(device, pipeline->templates.shaders[stage].nir, slot);
907 if (ret != VK_SUCCESS)
908 return ret;
909
910 pipeline->templates.shaders[stage].bc = slot;
911
912 if (cache)
913 dzn_pipeline_cache_add_dxil_shader(cache, stages[stage].dxil_hash, stage, slot);
914 }
915
916 if (cache)
917 dzn_pipeline_cache_add_gfx_pipeline(pipeline, cache, vert_input_count, pipeline_hash,
918 dxil_hashes);
919
920 return VK_SUCCESS;
921 }
922
923 VkFormat
dzn_graphics_pipeline_patch_vi_format(VkFormat format)924 dzn_graphics_pipeline_patch_vi_format(VkFormat format)
925 {
926 switch (format) {
927 case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
928 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
929 case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
930 case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
931 case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
932 case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
933 case VK_FORMAT_A2B10G10R10_USCALED_PACK32:
934 return VK_FORMAT_R32_UINT;
935 case VK_FORMAT_R8G8B8A8_SSCALED:
936 return VK_FORMAT_R8G8B8A8_SINT;
937 case VK_FORMAT_R8G8B8A8_USCALED:
938 return VK_FORMAT_R8G8B8A8_UINT;
939 case VK_FORMAT_R16G16B16A16_USCALED:
940 return VK_FORMAT_R16G16B16A16_UINT;
941 case VK_FORMAT_R16G16B16A16_SSCALED:
942 return VK_FORMAT_R16G16B16A16_SINT;
943 default:
944 return format;
945 }
946 }
947
948 static VkResult
dzn_graphics_pipeline_translate_vi(struct dzn_graphics_pipeline * pipeline,const VkGraphicsPipelineCreateInfo * in,D3D12_INPUT_ELEMENT_DESC * inputs,enum pipe_format * vi_conversions)949 dzn_graphics_pipeline_translate_vi(struct dzn_graphics_pipeline *pipeline,
950 const VkGraphicsPipelineCreateInfo *in,
951 D3D12_INPUT_ELEMENT_DESC *inputs,
952 enum pipe_format *vi_conversions)
953 {
954 const VkPipelineVertexInputStateCreateInfo *in_vi =
955 in->pVertexInputState;
956 const VkPipelineVertexInputDivisorStateCreateInfoEXT *divisors =
957 (const VkPipelineVertexInputDivisorStateCreateInfoEXT *)
958 vk_find_struct_const(in_vi, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
959
960 if (!in_vi->vertexAttributeDescriptionCount)
961 return VK_SUCCESS;
962
963 D3D12_INPUT_CLASSIFICATION slot_class[MAX_VBS];
964
965 pipeline->vb.count = 0;
966 for (uint32_t i = 0; i < in_vi->vertexBindingDescriptionCount; i++) {
967 const struct VkVertexInputBindingDescription *bdesc =
968 &in_vi->pVertexBindingDescriptions[i];
969
970 pipeline->vb.count = MAX2(pipeline->vb.count, bdesc->binding + 1);
971 pipeline->vb.strides[bdesc->binding] = bdesc->stride;
972 if (bdesc->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) {
973 slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
974 } else {
975 assert(bdesc->inputRate == VK_VERTEX_INPUT_RATE_VERTEX);
976 slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
977 }
978 }
979
980 for (uint32_t i = 0; i < in_vi->vertexAttributeDescriptionCount; i++) {
981 const VkVertexInputAttributeDescription *attr =
982 &in_vi->pVertexAttributeDescriptions[i];
983 const VkVertexInputBindingDivisorDescriptionEXT *divisor = NULL;
984
985 if (slot_class[attr->binding] == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA &&
986 divisors) {
987 for (uint32_t d = 0; d < divisors->vertexBindingDivisorCount; d++) {
988 if (attr->binding == divisors->pVertexBindingDivisors[d].binding) {
989 divisor = &divisors->pVertexBindingDivisors[d];
990 break;
991 }
992 }
993 }
994
995 VkFormat patched_format = dzn_graphics_pipeline_patch_vi_format(attr->format);
996 if (patched_format != attr->format)
997 vi_conversions[attr->location] = vk_format_to_pipe_format(attr->format);
998
999 /* nir_to_dxil() name all vertex inputs as TEXCOORDx */
1000 inputs[attr->location] = (D3D12_INPUT_ELEMENT_DESC) {
1001 .SemanticName = "TEXCOORD",
1002 .Format = dzn_buffer_get_dxgi_format(patched_format),
1003 .InputSlot = attr->binding,
1004 .InputSlotClass = slot_class[attr->binding],
1005 .InstanceDataStepRate =
1006 divisor ? divisor->divisor :
1007 slot_class[attr->binding] == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA ? 1 : 0,
1008 .AlignedByteOffset = attr->offset,
1009 };
1010 }
1011
1012 return VK_SUCCESS;
1013 }
1014
1015 static D3D12_PRIMITIVE_TOPOLOGY_TYPE
to_prim_topology_type(VkPrimitiveTopology in)1016 to_prim_topology_type(VkPrimitiveTopology in)
1017 {
1018 switch (in) {
1019 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
1020 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
1021 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
1022 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
1023 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
1024 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
1025 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
1026 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
1027 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
1028 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
1029 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
1030 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
1031 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
1032 case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
1033 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH;
1034 default: unreachable("Invalid primitive topology");
1035 }
1036 }
1037
1038 static D3D12_PRIMITIVE_TOPOLOGY
to_prim_topology(VkPrimitiveTopology in,unsigned patch_control_points)1039 to_prim_topology(VkPrimitiveTopology in, unsigned patch_control_points)
1040 {
1041 switch (in) {
1042 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: return D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
1043 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: return D3D_PRIMITIVE_TOPOLOGY_LINELIST;
1044 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
1045 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;
1046 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ;
1047 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
1048 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
1049 /* Triangle fans are emulated using an intermediate index buffer. */
1050 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
1051 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ;
1052 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ;
1053 case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
1054 assert(patch_control_points);
1055 return (D3D12_PRIMITIVE_TOPOLOGY)(D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + patch_control_points - 1);
1056 default: unreachable("Invalid primitive topology");
1057 }
1058 }
1059
1060 static VkResult
dzn_graphics_pipeline_translate_ia(struct dzn_device * device,struct dzn_graphics_pipeline * pipeline,D3D12_PIPELINE_STATE_STREAM_DESC * out,const VkGraphicsPipelineCreateInfo * in)1061 dzn_graphics_pipeline_translate_ia(struct dzn_device *device,
1062 struct dzn_graphics_pipeline *pipeline,
1063 D3D12_PIPELINE_STATE_STREAM_DESC *out,
1064 const VkGraphicsPipelineCreateInfo *in)
1065 {
1066 const VkPipelineInputAssemblyStateCreateInfo *in_ia =
1067 in->pInputAssemblyState;
1068 bool has_tes = false;
1069 for (uint32_t i = 0; i < in->stageCount; i++) {
1070 if (in->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT ||
1071 in->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) {
1072 has_tes = true;
1073 break;
1074 }
1075 }
1076 const VkPipelineTessellationStateCreateInfo *in_tes =
1077 has_tes ? in->pTessellationState : NULL;
1078 VkResult ret = VK_SUCCESS;
1079
1080 d3d12_gfx_pipeline_state_stream_new_desc(out, PRIMITIVE_TOPOLOGY, D3D12_PRIMITIVE_TOPOLOGY_TYPE, prim_top_type);
1081 *prim_top_type = to_prim_topology_type(in_ia->topology);
1082 pipeline->ia.triangle_fan = in_ia->topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
1083 pipeline->ia.topology =
1084 to_prim_topology(in_ia->topology, in_tes ? in_tes->patchControlPoints : 0);
1085
1086 if (in_ia->primitiveRestartEnable) {
1087 d3d12_gfx_pipeline_state_stream_new_desc(out, IB_STRIP_CUT_VALUE, D3D12_INDEX_BUFFER_STRIP_CUT_VALUE, ib_strip_cut);
1088 pipeline->templates.desc_offsets.ib_strip_cut =
1089 (uintptr_t)ib_strip_cut - (uintptr_t)out->pPipelineStateSubobjectStream;
1090 *ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;
1091 ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline);
1092 }
1093
1094 return ret;
1095 }
1096
1097 static D3D12_FILL_MODE
translate_polygon_mode(VkPolygonMode in)1098 translate_polygon_mode(VkPolygonMode in)
1099 {
1100 switch (in) {
1101 case VK_POLYGON_MODE_FILL: return D3D12_FILL_MODE_SOLID;
1102 case VK_POLYGON_MODE_LINE: return D3D12_FILL_MODE_WIREFRAME;
1103 default: unreachable("Unsupported polygon mode");
1104 }
1105 }
1106
1107 static D3D12_CULL_MODE
translate_cull_mode(VkCullModeFlags in)1108 translate_cull_mode(VkCullModeFlags in)
1109 {
1110 switch (in) {
1111 case VK_CULL_MODE_NONE: return D3D12_CULL_MODE_NONE;
1112 case VK_CULL_MODE_FRONT_BIT: return D3D12_CULL_MODE_FRONT;
1113 case VK_CULL_MODE_BACK_BIT: return D3D12_CULL_MODE_BACK;
1114 /* Front+back face culling is equivalent to 'rasterization disabled' */
1115 case VK_CULL_MODE_FRONT_AND_BACK: return D3D12_CULL_MODE_NONE;
1116 default: unreachable("Unsupported cull mode");
1117 }
1118 }
1119
1120 static int32_t
translate_depth_bias(double depth_bias)1121 translate_depth_bias(double depth_bias)
1122 {
1123 if (depth_bias > INT32_MAX)
1124 return INT32_MAX;
1125 else if (depth_bias < INT32_MIN)
1126 return INT32_MIN;
1127
1128 return depth_bias;
1129 }
1130
1131 static void
dzn_graphics_pipeline_translate_rast(struct dzn_graphics_pipeline * pipeline,D3D12_PIPELINE_STATE_STREAM_DESC * out,const VkGraphicsPipelineCreateInfo * in)1132 dzn_graphics_pipeline_translate_rast(struct dzn_graphics_pipeline *pipeline,
1133 D3D12_PIPELINE_STATE_STREAM_DESC *out,
1134 const VkGraphicsPipelineCreateInfo *in)
1135 {
1136 const VkPipelineRasterizationStateCreateInfo *in_rast =
1137 in->pRasterizationState;
1138 const VkPipelineViewportStateCreateInfo *in_vp =
1139 in_rast->rasterizerDiscardEnable ? NULL : in->pViewportState;
1140
1141 if (in_vp) {
1142 pipeline->vp.count = in_vp->viewportCount;
1143 if (in_vp->pViewports) {
1144 for (uint32_t i = 0; in_vp->pViewports && i < in_vp->viewportCount; i++)
1145 dzn_translate_viewport(&pipeline->vp.desc[i], &in_vp->pViewports[i]);
1146 }
1147
1148 pipeline->scissor.count = in_vp->scissorCount;
1149 if (in_vp->pScissors) {
1150 for (uint32_t i = 0; i < in_vp->scissorCount; i++)
1151 dzn_translate_rect(&pipeline->scissor.desc[i], &in_vp->pScissors[i]);
1152 }
1153 }
1154
1155 d3d12_gfx_pipeline_state_stream_new_desc(out, RASTERIZER, D3D12_RASTERIZER_DESC, desc);
1156 pipeline->templates.desc_offsets.rast =
1157 (uintptr_t)desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1158 desc->DepthClipEnable = !in_rast->depthClampEnable;
1159 desc->FillMode = translate_polygon_mode(in_rast->polygonMode);
1160 desc->CullMode = translate_cull_mode(in_rast->cullMode);
1161 desc->FrontCounterClockwise =
1162 in_rast->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE;
1163 if (in_rast->depthBiasEnable) {
1164 desc->DepthBias = translate_depth_bias(in_rast->depthBiasConstantFactor);
1165 desc->SlopeScaledDepthBias = in_rast->depthBiasSlopeFactor;
1166 desc->DepthBiasClamp = in_rast->depthBiasClamp;
1167 }
1168
1169 assert(in_rast->lineWidth == 1.0f);
1170 }
1171
1172 static void
dzn_graphics_pipeline_translate_ms(struct dzn_graphics_pipeline * pipeline,D3D12_PIPELINE_STATE_STREAM_DESC * out,const VkGraphicsPipelineCreateInfo * in)1173 dzn_graphics_pipeline_translate_ms(struct dzn_graphics_pipeline *pipeline,
1174 D3D12_PIPELINE_STATE_STREAM_DESC *out,
1175 const VkGraphicsPipelineCreateInfo *in)
1176 {
1177 const VkPipelineRasterizationStateCreateInfo *in_rast =
1178 in->pRasterizationState;
1179 const VkPipelineMultisampleStateCreateInfo *in_ms =
1180 in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState;
1181
1182 if (!in_ms)
1183 return;
1184
1185 /* TODO: minSampleShading (use VRS), alphaToOneEnable */
1186 d3d12_gfx_pipeline_state_stream_new_desc(out, SAMPLE_DESC, DXGI_SAMPLE_DESC, desc);
1187 desc->Count = in_ms ? in_ms->rasterizationSamples : 1;
1188 desc->Quality = 0;
1189
1190 if (!in_ms->pSampleMask)
1191 return;
1192
1193 d3d12_gfx_pipeline_state_stream_new_desc(out, SAMPLE_MASK, UINT, mask);
1194 *mask = *in_ms->pSampleMask;
1195 }
1196
1197 static D3D12_STENCIL_OP
translate_stencil_op(VkStencilOp in)1198 translate_stencil_op(VkStencilOp in)
1199 {
1200 switch (in) {
1201 case VK_STENCIL_OP_KEEP: return D3D12_STENCIL_OP_KEEP;
1202 case VK_STENCIL_OP_ZERO: return D3D12_STENCIL_OP_ZERO;
1203 case VK_STENCIL_OP_REPLACE: return D3D12_STENCIL_OP_REPLACE;
1204 case VK_STENCIL_OP_INCREMENT_AND_CLAMP: return D3D12_STENCIL_OP_INCR_SAT;
1205 case VK_STENCIL_OP_DECREMENT_AND_CLAMP: return D3D12_STENCIL_OP_DECR_SAT;
1206 case VK_STENCIL_OP_INCREMENT_AND_WRAP: return D3D12_STENCIL_OP_INCR;
1207 case VK_STENCIL_OP_DECREMENT_AND_WRAP: return D3D12_STENCIL_OP_DECR;
1208 case VK_STENCIL_OP_INVERT: return D3D12_STENCIL_OP_INVERT;
1209 default: unreachable("Invalid stencil op");
1210 }
1211 }
1212
1213 static void
translate_stencil_test(struct dzn_graphics_pipeline * pipeline,D3D12_DEPTH_STENCIL_DESC1 * out,const VkGraphicsPipelineCreateInfo * in)1214 translate_stencil_test(struct dzn_graphics_pipeline *pipeline,
1215 D3D12_DEPTH_STENCIL_DESC1 *out,
1216 const VkGraphicsPipelineCreateInfo *in)
1217 {
1218 const VkPipelineDepthStencilStateCreateInfo *in_zsa =
1219 in->pDepthStencilState;
1220
1221 bool front_test_uses_ref =
1222 !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) &&
1223 in_zsa->front.compareOp != VK_COMPARE_OP_NEVER &&
1224 in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS &&
1225 (pipeline->zsa.stencil_test.dynamic_compare_mask ||
1226 in_zsa->front.compareMask != 0);
1227 bool back_test_uses_ref =
1228 !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) &&
1229 in_zsa->back.compareOp != VK_COMPARE_OP_NEVER &&
1230 in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS &&
1231 (pipeline->zsa.stencil_test.dynamic_compare_mask ||
1232 in_zsa->back.compareMask != 0);
1233
1234 if (front_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask)
1235 pipeline->zsa.stencil_test.front.compare_mask = UINT32_MAX;
1236 else if (front_test_uses_ref)
1237 pipeline->zsa.stencil_test.front.compare_mask = in_zsa->front.compareMask;
1238 else
1239 pipeline->zsa.stencil_test.front.compare_mask = 0;
1240
1241 if (back_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask)
1242 pipeline->zsa.stencil_test.back.compare_mask = UINT32_MAX;
1243 else if (back_test_uses_ref)
1244 pipeline->zsa.stencil_test.back.compare_mask = in_zsa->back.compareMask;
1245 else
1246 pipeline->zsa.stencil_test.back.compare_mask = 0;
1247
1248 bool diff_wr_mask =
1249 in->pRasterizationState->cullMode == VK_CULL_MODE_NONE &&
1250 (pipeline->zsa.stencil_test.dynamic_write_mask ||
1251 in_zsa->back.writeMask != in_zsa->front.writeMask);
1252 bool diff_ref =
1253 in->pRasterizationState->cullMode == VK_CULL_MODE_NONE &&
1254 (pipeline->zsa.stencil_test.dynamic_ref ||
1255 in_zsa->back.reference != in_zsa->front.reference);
1256 bool diff_cmp_mask =
1257 back_test_uses_ref && front_test_uses_ref &&
1258 (pipeline->zsa.stencil_test.dynamic_compare_mask ||
1259 pipeline->zsa.stencil_test.front.compare_mask != pipeline->zsa.stencil_test.back.compare_mask);
1260
1261 if (diff_cmp_mask || diff_wr_mask)
1262 pipeline->zsa.stencil_test.independent_front_back = true;
1263
1264 bool back_wr_uses_ref =
1265 !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) &&
1266 ((in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS &&
1267 in_zsa->back.failOp == VK_STENCIL_OP_REPLACE) ||
1268 (in_zsa->back.compareOp != VK_COMPARE_OP_NEVER &&
1269 (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) &&
1270 in_zsa->back.passOp == VK_STENCIL_OP_REPLACE) ||
1271 (in_zsa->depthTestEnable &&
1272 in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS &&
1273 in_zsa->back.depthFailOp == VK_STENCIL_OP_REPLACE));
1274 bool front_wr_uses_ref =
1275 !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) &&
1276 ((in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS &&
1277 in_zsa->front.failOp == VK_STENCIL_OP_REPLACE) ||
1278 (in_zsa->front.compareOp != VK_COMPARE_OP_NEVER &&
1279 (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) &&
1280 in_zsa->front.passOp == VK_STENCIL_OP_REPLACE) ||
1281 (in_zsa->depthTestEnable &&
1282 in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS &&
1283 in_zsa->front.depthFailOp == VK_STENCIL_OP_REPLACE));
1284
1285 pipeline->zsa.stencil_test.front.write_mask =
1286 (pipeline->zsa.stencil_test.dynamic_write_mask ||
1287 (in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT)) ?
1288 0 : in_zsa->front.writeMask;
1289 pipeline->zsa.stencil_test.back.write_mask =
1290 (pipeline->zsa.stencil_test.dynamic_write_mask ||
1291 (in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT)) ?
1292 0 : in_zsa->back.writeMask;
1293
1294 pipeline->zsa.stencil_test.front.uses_ref = front_test_uses_ref || front_wr_uses_ref;
1295 pipeline->zsa.stencil_test.back.uses_ref = back_test_uses_ref || back_wr_uses_ref;
1296
1297 if (diff_ref &&
1298 pipeline->zsa.stencil_test.front.uses_ref &&
1299 pipeline->zsa.stencil_test.back.uses_ref)
1300 pipeline->zsa.stencil_test.independent_front_back = true;
1301
1302 pipeline->zsa.stencil_test.front.ref =
1303 pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->front.reference;
1304 pipeline->zsa.stencil_test.back.ref =
1305 pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->back.reference;
1306
1307 /* FIXME: We don't support independent {compare,write}_mask and stencil
1308 * reference. Until we have proper support for independent front/back
1309 * stencil test, let's prioritize the front setup when both are active.
1310 */
1311 out->StencilReadMask =
1312 front_test_uses_ref ?
1313 pipeline->zsa.stencil_test.front.compare_mask :
1314 back_test_uses_ref ?
1315 pipeline->zsa.stencil_test.back.compare_mask : 0;
1316 out->StencilWriteMask =
1317 pipeline->zsa.stencil_test.front.write_mask ?
1318 pipeline->zsa.stencil_test.front.write_mask :
1319 pipeline->zsa.stencil_test.back.write_mask;
1320
1321 assert(!pipeline->zsa.stencil_test.independent_front_back);
1322 }
1323
1324 static void
dzn_graphics_pipeline_translate_zsa(struct dzn_graphics_pipeline * pipeline,D3D12_PIPELINE_STATE_STREAM_DESC * out,const VkGraphicsPipelineCreateInfo * in)1325 dzn_graphics_pipeline_translate_zsa(struct dzn_graphics_pipeline *pipeline,
1326 D3D12_PIPELINE_STATE_STREAM_DESC *out,
1327 const VkGraphicsPipelineCreateInfo *in)
1328 {
1329 const VkPipelineRasterizationStateCreateInfo *in_rast =
1330 in->pRasterizationState;
1331 const VkPipelineDepthStencilStateCreateInfo *in_zsa =
1332 in_rast->rasterizerDiscardEnable ? NULL : in->pDepthStencilState;
1333
1334 if (!in_zsa)
1335 return;
1336
1337 d3d12_gfx_pipeline_state_stream_new_desc(out, DEPTH_STENCIL1, D3D12_DEPTH_STENCIL_DESC1, desc);
1338 pipeline->templates.desc_offsets.ds =
1339 (uintptr_t)desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1340
1341 desc->DepthEnable =
1342 in_zsa->depthTestEnable || in_zsa->depthBoundsTestEnable;
1343 desc->DepthWriteMask =
1344 in_zsa->depthWriteEnable ?
1345 D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
1346 desc->DepthFunc =
1347 in_zsa->depthTestEnable ?
1348 dzn_translate_compare_op(in_zsa->depthCompareOp) :
1349 D3D12_COMPARISON_FUNC_ALWAYS;
1350 pipeline->zsa.depth_bounds.enable = in_zsa->depthBoundsTestEnable;
1351 pipeline->zsa.depth_bounds.min = in_zsa->minDepthBounds;
1352 pipeline->zsa.depth_bounds.max = in_zsa->maxDepthBounds;
1353 desc->DepthBoundsTestEnable = in_zsa->depthBoundsTestEnable;
1354 desc->StencilEnable = in_zsa->stencilTestEnable;
1355 if (in_zsa->stencilTestEnable) {
1356 desc->FrontFace.StencilFailOp =
1357 translate_stencil_op(in_zsa->front.failOp);
1358 desc->FrontFace.StencilDepthFailOp =
1359 translate_stencil_op(in_zsa->front.depthFailOp);
1360 desc->FrontFace.StencilPassOp =
1361 translate_stencil_op(in_zsa->front.passOp);
1362 desc->FrontFace.StencilFunc =
1363 dzn_translate_compare_op(in_zsa->front.compareOp);
1364 desc->BackFace.StencilFailOp =
1365 translate_stencil_op(in_zsa->back.failOp);
1366 desc->BackFace.StencilDepthFailOp =
1367 translate_stencil_op(in_zsa->back.depthFailOp);
1368 desc->BackFace.StencilPassOp =
1369 translate_stencil_op(in_zsa->back.passOp);
1370 desc->BackFace.StencilFunc =
1371 dzn_translate_compare_op(in_zsa->back.compareOp);
1372
1373 pipeline->zsa.stencil_test.enable = true;
1374
1375 translate_stencil_test(pipeline, desc, in);
1376 }
1377 }
1378
1379 static D3D12_BLEND
translate_blend_factor(VkBlendFactor in,bool is_alpha)1380 translate_blend_factor(VkBlendFactor in, bool is_alpha)
1381 {
1382 switch (in) {
1383 case VK_BLEND_FACTOR_ZERO: return D3D12_BLEND_ZERO;
1384 case VK_BLEND_FACTOR_ONE: return D3D12_BLEND_ONE;
1385 case VK_BLEND_FACTOR_SRC_COLOR:
1386 return is_alpha ? D3D12_BLEND_SRC_ALPHA : D3D12_BLEND_SRC_COLOR;
1387 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
1388 return is_alpha ? D3D12_BLEND_INV_SRC_ALPHA : D3D12_BLEND_INV_SRC_COLOR;
1389 case VK_BLEND_FACTOR_DST_COLOR:
1390 return is_alpha ? D3D12_BLEND_DEST_ALPHA : D3D12_BLEND_DEST_COLOR;
1391 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
1392 return is_alpha ? D3D12_BLEND_INV_DEST_ALPHA : D3D12_BLEND_INV_DEST_COLOR;
1393 case VK_BLEND_FACTOR_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA;
1394 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA;
1395 case VK_BLEND_FACTOR_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA;
1396 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA;
1397 /* FIXME: no way to isolate the alpla and color constants */
1398 case VK_BLEND_FACTOR_CONSTANT_COLOR:
1399 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
1400 return D3D12_BLEND_BLEND_FACTOR;
1401 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
1402 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
1403 return D3D12_BLEND_INV_BLEND_FACTOR;
1404 case VK_BLEND_FACTOR_SRC1_COLOR:
1405 return is_alpha ? D3D12_BLEND_SRC1_ALPHA : D3D12_BLEND_SRC1_COLOR;
1406 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
1407 return is_alpha ? D3D12_BLEND_INV_SRC1_ALPHA : D3D12_BLEND_INV_SRC1_COLOR;
1408 case VK_BLEND_FACTOR_SRC1_ALPHA: return D3D12_BLEND_SRC1_ALPHA;
1409 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: return D3D12_BLEND_INV_SRC1_ALPHA;
1410 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT;
1411 default: unreachable("Invalid blend factor");
1412 }
1413 }
1414
1415 static D3D12_BLEND_OP
translate_blend_op(VkBlendOp in)1416 translate_blend_op(VkBlendOp in)
1417 {
1418 switch (in) {
1419 case VK_BLEND_OP_ADD: return D3D12_BLEND_OP_ADD;
1420 case VK_BLEND_OP_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT;
1421 case VK_BLEND_OP_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT;
1422 case VK_BLEND_OP_MIN: return D3D12_BLEND_OP_MIN;
1423 case VK_BLEND_OP_MAX: return D3D12_BLEND_OP_MAX;
1424 default: unreachable("Invalid blend op");
1425 }
1426 }
1427
1428 static D3D12_LOGIC_OP
translate_logic_op(VkLogicOp in)1429 translate_logic_op(VkLogicOp in)
1430 {
1431 switch (in) {
1432 case VK_LOGIC_OP_CLEAR: return D3D12_LOGIC_OP_CLEAR;
1433 case VK_LOGIC_OP_AND: return D3D12_LOGIC_OP_AND;
1434 case VK_LOGIC_OP_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE;
1435 case VK_LOGIC_OP_COPY: return D3D12_LOGIC_OP_COPY;
1436 case VK_LOGIC_OP_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED;
1437 case VK_LOGIC_OP_NO_OP: return D3D12_LOGIC_OP_NOOP;
1438 case VK_LOGIC_OP_XOR: return D3D12_LOGIC_OP_XOR;
1439 case VK_LOGIC_OP_OR: return D3D12_LOGIC_OP_OR;
1440 case VK_LOGIC_OP_NOR: return D3D12_LOGIC_OP_NOR;
1441 case VK_LOGIC_OP_EQUIVALENT: return D3D12_LOGIC_OP_EQUIV;
1442 case VK_LOGIC_OP_INVERT: return D3D12_LOGIC_OP_INVERT;
1443 case VK_LOGIC_OP_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE;
1444 case VK_LOGIC_OP_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED;
1445 case VK_LOGIC_OP_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED;
1446 case VK_LOGIC_OP_NAND: return D3D12_LOGIC_OP_NAND;
1447 case VK_LOGIC_OP_SET: return D3D12_LOGIC_OP_SET;
1448 default: unreachable("Invalid logic op");
1449 }
1450 }
1451
1452 static void
dzn_graphics_pipeline_translate_blend(struct dzn_graphics_pipeline * pipeline,D3D12_PIPELINE_STATE_STREAM_DESC * out,const VkGraphicsPipelineCreateInfo * in)1453 dzn_graphics_pipeline_translate_blend(struct dzn_graphics_pipeline *pipeline,
1454 D3D12_PIPELINE_STATE_STREAM_DESC *out,
1455 const VkGraphicsPipelineCreateInfo *in)
1456 {
1457 const VkPipelineRasterizationStateCreateInfo *in_rast =
1458 in->pRasterizationState;
1459 const VkPipelineColorBlendStateCreateInfo *in_blend =
1460 in_rast->rasterizerDiscardEnable ? NULL : in->pColorBlendState;
1461 const VkPipelineMultisampleStateCreateInfo *in_ms =
1462 in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState;
1463
1464 if (!in_blend || !in_ms)
1465 return;
1466
1467 d3d12_gfx_pipeline_state_stream_new_desc(out, BLEND, D3D12_BLEND_DESC, desc);
1468 D3D12_LOGIC_OP logicop =
1469 in_blend->logicOpEnable ?
1470 translate_logic_op(in_blend->logicOp) : D3D12_LOGIC_OP_NOOP;
1471 desc->AlphaToCoverageEnable = in_ms->alphaToCoverageEnable;
1472 memcpy(pipeline->blend.constants, in_blend->blendConstants,
1473 sizeof(pipeline->blend.constants));
1474
1475 for (uint32_t i = 0; i < in_blend->attachmentCount; i++) {
1476 if (i > 0 &&
1477 !memcmp(&in_blend->pAttachments[i - 1], &in_blend->pAttachments[i],
1478 sizeof(*in_blend->pAttachments)))
1479 desc->IndependentBlendEnable = true;
1480
1481 desc->RenderTarget[i].BlendEnable =
1482 in_blend->pAttachments[i].blendEnable;
1483 desc->RenderTarget[i].RenderTargetWriteMask =
1484 in_blend->pAttachments[i].colorWriteMask;
1485
1486 if (in_blend->logicOpEnable) {
1487 desc->RenderTarget[i].LogicOpEnable = true;
1488 desc->RenderTarget[i].LogicOp = logicop;
1489 } else {
1490 desc->RenderTarget[i].SrcBlend =
1491 translate_blend_factor(in_blend->pAttachments[i].srcColorBlendFactor, false);
1492 desc->RenderTarget[i].DestBlend =
1493 translate_blend_factor(in_blend->pAttachments[i].dstColorBlendFactor, false);
1494 desc->RenderTarget[i].BlendOp =
1495 translate_blend_op(in_blend->pAttachments[i].colorBlendOp);
1496 desc->RenderTarget[i].SrcBlendAlpha =
1497 translate_blend_factor(in_blend->pAttachments[i].srcAlphaBlendFactor, true);
1498 desc->RenderTarget[i].DestBlendAlpha =
1499 translate_blend_factor(in_blend->pAttachments[i].dstAlphaBlendFactor, true);
1500 desc->RenderTarget[i].BlendOpAlpha =
1501 translate_blend_op(in_blend->pAttachments[i].alphaBlendOp);
1502 }
1503 }
1504 }
1505
1506
1507 static void
dzn_pipeline_init(struct dzn_pipeline * pipeline,struct dzn_device * device,VkPipelineBindPoint type,struct dzn_pipeline_layout * layout,D3D12_PIPELINE_STATE_STREAM_DESC * stream_desc)1508 dzn_pipeline_init(struct dzn_pipeline *pipeline,
1509 struct dzn_device *device,
1510 VkPipelineBindPoint type,
1511 struct dzn_pipeline_layout *layout,
1512 D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc)
1513 {
1514 pipeline->type = type;
1515 pipeline->root.sets_param_count = layout->root.sets_param_count;
1516 pipeline->root.sysval_cbv_param_idx = layout->root.sysval_cbv_param_idx;
1517 pipeline->root.push_constant_cbv_param_idx = layout->root.push_constant_cbv_param_idx;
1518 STATIC_ASSERT(sizeof(pipeline->root.type) == sizeof(layout->root.type));
1519 memcpy(pipeline->root.type, layout->root.type, sizeof(pipeline->root.type));
1520 pipeline->root.sig = layout->root.sig;
1521 ID3D12RootSignature_AddRef(pipeline->root.sig);
1522
1523 STATIC_ASSERT(sizeof(layout->desc_count) == sizeof(pipeline->desc_count));
1524 memcpy(pipeline->desc_count, layout->desc_count, sizeof(pipeline->desc_count));
1525
1526 STATIC_ASSERT(sizeof(layout->sets) == sizeof(pipeline->sets));
1527 memcpy(pipeline->sets, layout->sets, sizeof(pipeline->sets));
1528 vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
1529
1530 ASSERTED uint32_t max_streamsz =
1531 type == VK_PIPELINE_BIND_POINT_GRAPHICS ?
1532 MAX_GFX_PIPELINE_STATE_STREAM_SIZE :
1533 MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE;
1534
1535 d3d12_pipeline_state_stream_new_desc(stream_desc, max_streamsz, ROOT_SIGNATURE,
1536 ID3D12RootSignature *, root_sig);
1537 *root_sig = pipeline->root.sig;
1538 }
1539
1540 static void
dzn_pipeline_finish(struct dzn_pipeline * pipeline)1541 dzn_pipeline_finish(struct dzn_pipeline *pipeline)
1542 {
1543 if (pipeline->state)
1544 ID3D12PipelineState_Release(pipeline->state);
1545 if (pipeline->root.sig)
1546 ID3D12RootSignature_Release(pipeline->root.sig);
1547
1548 vk_object_base_finish(&pipeline->base);
1549 }
1550
dzn_graphics_pipeline_delete_variant(struct hash_entry * he)1551 static void dzn_graphics_pipeline_delete_variant(struct hash_entry *he)
1552 {
1553 struct dzn_graphics_pipeline_variant *variant = he->data;
1554
1555 if (variant->state)
1556 ID3D12PipelineState_Release(variant->state);
1557 }
1558
1559 static void
dzn_graphics_pipeline_cleanup_nir_shaders(struct dzn_graphics_pipeline * pipeline)1560 dzn_graphics_pipeline_cleanup_nir_shaders(struct dzn_graphics_pipeline *pipeline)
1561 {
1562 for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->templates.shaders); i++) {
1563 ralloc_free(pipeline->templates.shaders[i].nir);
1564 pipeline->templates.shaders[i].nir = NULL;
1565 }
1566 }
1567
1568 static void
dzn_graphics_pipeline_cleanup_dxil_shaders(struct dzn_graphics_pipeline * pipeline)1569 dzn_graphics_pipeline_cleanup_dxil_shaders(struct dzn_graphics_pipeline *pipeline)
1570 {
1571 for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->templates.shaders); i++) {
1572 if (pipeline->templates.shaders[i].bc) {
1573 free((void *)pipeline->templates.shaders[i].bc->pShaderBytecode);
1574 pipeline->templates.shaders[i].bc = NULL;
1575 }
1576 }
1577 }
1578
1579 static void
dzn_graphics_pipeline_destroy(struct dzn_graphics_pipeline * pipeline,const VkAllocationCallbacks * alloc)1580 dzn_graphics_pipeline_destroy(struct dzn_graphics_pipeline *pipeline,
1581 const VkAllocationCallbacks *alloc)
1582 {
1583 if (!pipeline)
1584 return;
1585
1586 _mesa_hash_table_destroy(pipeline->variants,
1587 dzn_graphics_pipeline_delete_variant);
1588
1589 dzn_graphics_pipeline_cleanup_nir_shaders(pipeline);
1590 dzn_graphics_pipeline_cleanup_dxil_shaders(pipeline);
1591
1592 for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->indirect_cmd_sigs); i++) {
1593 if (pipeline->indirect_cmd_sigs[i])
1594 ID3D12CommandSignature_Release(pipeline->indirect_cmd_sigs[i]);
1595 }
1596
1597 dzn_pipeline_finish(&pipeline->base);
1598 vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline);
1599 }
1600
1601 static VkResult
dzn_graphics_pipeline_create(struct dzn_device * device,VkPipelineCache cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * out)1602 dzn_graphics_pipeline_create(struct dzn_device *device,
1603 VkPipelineCache cache,
1604 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1605 const VkAllocationCallbacks *pAllocator,
1606 VkPipeline *out)
1607 {
1608 const VkPipelineRenderingCreateInfo *ri = (const VkPipelineRenderingCreateInfo *)
1609 vk_find_struct_const(pCreateInfo, PIPELINE_RENDERING_CREATE_INFO);
1610 VK_FROM_HANDLE(vk_pipeline_cache, pcache, cache);
1611 VK_FROM_HANDLE(vk_render_pass, pass, pCreateInfo->renderPass);
1612 VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout);
1613 uint32_t color_count = 0;
1614 VkFormat color_fmts[MAX_RTS] = { 0 };
1615 VkFormat zs_fmt = VK_FORMAT_UNDEFINED;
1616 VkResult ret;
1617 HRESULT hres = 0;
1618
1619 struct dzn_graphics_pipeline *pipeline =
1620 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
1621 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1622 if (!pipeline)
1623 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1624
1625 D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc = &pipeline->templates.stream_desc;
1626 stream_desc->pPipelineStateSubobjectStream = pipeline->templates.stream_buf;
1627
1628 dzn_pipeline_init(&pipeline->base, device,
1629 VK_PIPELINE_BIND_POINT_GRAPHICS,
1630 layout, stream_desc);
1631 D3D12_INPUT_ELEMENT_DESC attribs[MAX_VERTEX_GENERIC_ATTRIBS] = { 0 };
1632 enum pipe_format vi_conversions[MAX_VERTEX_GENERIC_ATTRIBS] = { 0 };
1633
1634 ret = dzn_graphics_pipeline_translate_vi(pipeline, pCreateInfo,
1635 attribs, vi_conversions);
1636 if (ret != VK_SUCCESS)
1637 goto out;
1638
1639 if (pCreateInfo->pDynamicState) {
1640 for (uint32_t i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; i++) {
1641 switch (pCreateInfo->pDynamicState->pDynamicStates[i]) {
1642 case VK_DYNAMIC_STATE_VIEWPORT:
1643 pipeline->vp.dynamic = true;
1644 break;
1645 case VK_DYNAMIC_STATE_SCISSOR:
1646 pipeline->scissor.dynamic = true;
1647 break;
1648 case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
1649 pipeline->zsa.stencil_test.dynamic_ref = true;
1650 break;
1651 case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
1652 pipeline->zsa.stencil_test.dynamic_compare_mask = true;
1653 break;
1654 case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
1655 pipeline->zsa.stencil_test.dynamic_write_mask = true;
1656 break;
1657 case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
1658 pipeline->blend.dynamic_constants = true;
1659 break;
1660 case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
1661 pipeline->zsa.depth_bounds.dynamic = true;
1662 break;
1663 case VK_DYNAMIC_STATE_DEPTH_BIAS:
1664 pipeline->zsa.dynamic_depth_bias = true;
1665 ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline);
1666 if (ret)
1667 goto out;
1668 break;
1669 case VK_DYNAMIC_STATE_LINE_WIDTH:
1670 /* Nothing to do since we just support lineWidth = 1. */
1671 break;
1672 default: unreachable("Unsupported dynamic state");
1673 }
1674 }
1675 }
1676
1677 ret = dzn_graphics_pipeline_translate_ia(device, pipeline, stream_desc, pCreateInfo);
1678 if (ret)
1679 goto out;
1680
1681 dzn_graphics_pipeline_translate_rast(pipeline, stream_desc, pCreateInfo);
1682 dzn_graphics_pipeline_translate_ms(pipeline, stream_desc, pCreateInfo);
1683 dzn_graphics_pipeline_translate_zsa(pipeline, stream_desc, pCreateInfo);
1684 dzn_graphics_pipeline_translate_blend(pipeline, stream_desc, pCreateInfo);
1685
1686 if (pass) {
1687 const struct vk_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
1688 color_count = subpass->color_count;
1689 for (uint32_t i = 0; i < subpass->color_count; i++) {
1690 uint32_t idx = subpass->color_attachments[i].attachment;
1691
1692 if (idx == VK_ATTACHMENT_UNUSED) continue;
1693
1694 const struct vk_render_pass_attachment *attachment =
1695 &pass->attachments[idx];
1696
1697 color_fmts[i] = attachment->format;
1698 }
1699
1700 if (subpass->depth_stencil_attachment &&
1701 subpass->depth_stencil_attachment->attachment != VK_ATTACHMENT_UNUSED) {
1702 const struct vk_render_pass_attachment *attachment =
1703 &pass->attachments[subpass->depth_stencil_attachment->attachment];
1704
1705 zs_fmt = attachment->format;
1706 }
1707 } else if (ri) {
1708 color_count = ri->colorAttachmentCount;
1709 memcpy(color_fmts, ri->pColorAttachmentFormats,
1710 sizeof(color_fmts[0]) * color_count);
1711 if (ri->depthAttachmentFormat != VK_FORMAT_UNDEFINED)
1712 zs_fmt = ri->depthAttachmentFormat;
1713 else if (ri->stencilAttachmentFormat != VK_FORMAT_UNDEFINED)
1714 zs_fmt = ri->stencilAttachmentFormat;
1715 }
1716
1717 if (color_count > 0) {
1718 d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, RENDER_TARGET_FORMATS, struct D3D12_RT_FORMAT_ARRAY, rts);
1719 rts->NumRenderTargets = color_count;
1720 for (uint32_t i = 0; i < color_count; i++) {
1721 rts->RTFormats[i] =
1722 dzn_image_get_dxgi_format(color_fmts[i],
1723 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
1724 VK_IMAGE_ASPECT_COLOR_BIT);
1725 }
1726 }
1727
1728 if (zs_fmt != VK_FORMAT_UNDEFINED) {
1729 d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, DEPTH_STENCIL_FORMAT, DXGI_FORMAT, ds_fmt);
1730 *ds_fmt =
1731 dzn_image_get_dxgi_format(zs_fmt,
1732 VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
1733 VK_IMAGE_ASPECT_DEPTH_BIT |
1734 VK_IMAGE_ASPECT_STENCIL_BIT);
1735 }
1736
1737 ret = dzn_graphics_pipeline_compile_shaders(device, pipeline, pcache,
1738 layout, stream_desc,
1739 attribs, vi_conversions,
1740 pCreateInfo);
1741 if (ret != VK_SUCCESS)
1742 goto out;
1743
1744 if (!pipeline->variants) {
1745 hres = ID3D12Device2_CreatePipelineState(device->dev, stream_desc,
1746 &IID_ID3D12PipelineState,
1747 (void **)&pipeline->base.state);
1748 if (FAILED(hres)) {
1749 ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1750 goto out;
1751 }
1752
1753 dzn_graphics_pipeline_cleanup_dxil_shaders(pipeline);
1754 }
1755
1756 dzn_graphics_pipeline_cleanup_nir_shaders(pipeline);
1757 ret = VK_SUCCESS;
1758
1759 out:
1760 if (ret != VK_SUCCESS)
1761 dzn_graphics_pipeline_destroy(pipeline, pAllocator);
1762 else
1763 *out = dzn_graphics_pipeline_to_handle(pipeline);
1764
1765 return ret;
1766 }
1767
1768 ID3D12PipelineState *
dzn_graphics_pipeline_get_state(struct dzn_graphics_pipeline * pipeline,const struct dzn_graphics_pipeline_variant_key * key)1769 dzn_graphics_pipeline_get_state(struct dzn_graphics_pipeline *pipeline,
1770 const struct dzn_graphics_pipeline_variant_key *key)
1771 {
1772 if (!pipeline->variants)
1773 return pipeline->base.state;
1774
1775 struct dzn_graphics_pipeline_variant_key masked_key = { 0 };
1776
1777 if (dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut))
1778 masked_key.ib_strip_cut = key->ib_strip_cut;
1779
1780 if (dzn_graphics_pipeline_get_desc_template(pipeline, rast) &&
1781 pipeline->zsa.dynamic_depth_bias)
1782 masked_key.depth_bias = key->depth_bias;
1783
1784 const D3D12_DEPTH_STENCIL_DESC1 *ds_templ =
1785 dzn_graphics_pipeline_get_desc_template(pipeline, ds);
1786 if (ds_templ && ds_templ->StencilEnable) {
1787 if (ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
1788 ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
1789 masked_key.stencil_test.front.compare_mask = key->stencil_test.front.compare_mask;
1790 if (ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
1791 ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
1792 masked_key.stencil_test.back.compare_mask = key->stencil_test.back.compare_mask;
1793 if (pipeline->zsa.stencil_test.dynamic_write_mask) {
1794 masked_key.stencil_test.front.write_mask = key->stencil_test.front.write_mask;
1795 masked_key.stencil_test.back.write_mask = key->stencil_test.back.write_mask;
1796 }
1797 }
1798
1799 struct dzn_device *device =
1800 container_of(pipeline->base.base.device, struct dzn_device, vk);
1801 struct hash_entry *he =
1802 _mesa_hash_table_search(pipeline->variants, &masked_key);
1803
1804 struct dzn_graphics_pipeline_variant *variant;
1805
1806 if (!he) {
1807 variant = rzalloc(pipeline->variants, struct dzn_graphics_pipeline_variant);
1808 variant->key = masked_key;
1809
1810 uintptr_t stream_buf[MAX_GFX_PIPELINE_STATE_STREAM_SIZE / sizeof(uintptr_t)];
1811 D3D12_PIPELINE_STATE_STREAM_DESC stream_desc = {
1812 .SizeInBytes = pipeline->templates.stream_desc.SizeInBytes,
1813 .pPipelineStateSubobjectStream = stream_buf,
1814 };
1815
1816 memcpy(stream_buf, pipeline->templates.stream_buf, stream_desc.SizeInBytes);
1817
1818 D3D12_INDEX_BUFFER_STRIP_CUT_VALUE *ib_strip_cut =
1819 dzn_graphics_pipeline_get_desc(pipeline, stream_buf, ib_strip_cut);
1820 if (ib_strip_cut)
1821 *ib_strip_cut = masked_key.ib_strip_cut;
1822
1823 D3D12_RASTERIZER_DESC *rast =
1824 dzn_graphics_pipeline_get_desc(pipeline, stream_buf, rast);
1825 if (rast && pipeline->zsa.dynamic_depth_bias) {
1826 rast->DepthBias = translate_depth_bias(masked_key.depth_bias.constant_factor);
1827 rast->DepthBiasClamp = masked_key.depth_bias.clamp;
1828 rast->SlopeScaledDepthBias = masked_key.depth_bias.slope_factor;
1829 }
1830
1831 D3D12_DEPTH_STENCIL_DESC1 *ds =
1832 dzn_graphics_pipeline_get_desc(pipeline, stream_buf, ds);
1833 if (ds && ds->StencilEnable) {
1834 if (pipeline->zsa.stencil_test.dynamic_compare_mask) {
1835 if (ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
1836 ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) {
1837 ds->StencilReadMask = masked_key.stencil_test.front.compare_mask;
1838 }
1839
1840 if (ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
1841 ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) {
1842 ds->StencilReadMask = masked_key.stencil_test.back.compare_mask;
1843 }
1844
1845 if (ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
1846 ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS &&
1847 ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
1848 ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
1849 assert(masked_key.stencil_test.front.compare_mask == masked_key.stencil_test.back.compare_mask);
1850 }
1851
1852 if (pipeline->zsa.stencil_test.dynamic_write_mask) {
1853 assert(!masked_key.stencil_test.front.write_mask ||
1854 !masked_key.stencil_test.back.write_mask ||
1855 masked_key.stencil_test.front.write_mask == masked_key.stencil_test.back.write_mask);
1856 ds->StencilWriteMask =
1857 masked_key.stencil_test.front.write_mask |
1858 masked_key.stencil_test.back.write_mask;
1859 }
1860 }
1861
1862 ASSERTED HRESULT hres = ID3D12Device2_CreatePipelineState(device->dev, &stream_desc,
1863 &IID_ID3D12PipelineState,
1864 (void**)(&variant->state));
1865 assert(!FAILED(hres));
1866 he = _mesa_hash_table_insert(pipeline->variants, &variant->key, variant);
1867 assert(he);
1868 } else {
1869 variant = he->data;
1870 }
1871
1872 if (variant->state)
1873 ID3D12PipelineState_AddRef(variant->state);
1874
1875 if (pipeline->base.state)
1876 ID3D12PipelineState_Release(pipeline->base.state);
1877
1878 pipeline->base.state = variant->state;
1879 return variant->state;
1880 }
1881
1882 #define DZN_INDIRECT_CMD_SIG_MAX_ARGS 4
1883
1884 ID3D12CommandSignature *
dzn_graphics_pipeline_get_indirect_cmd_sig(struct dzn_graphics_pipeline * pipeline,enum dzn_indirect_draw_cmd_sig_type type)1885 dzn_graphics_pipeline_get_indirect_cmd_sig(struct dzn_graphics_pipeline *pipeline,
1886 enum dzn_indirect_draw_cmd_sig_type type)
1887 {
1888 assert(type < DZN_NUM_INDIRECT_DRAW_CMD_SIGS);
1889
1890 struct dzn_device *device =
1891 container_of(pipeline->base.base.device, struct dzn_device, vk);
1892 ID3D12CommandSignature *cmdsig = pipeline->indirect_cmd_sigs[type];
1893
1894 if (cmdsig)
1895 return cmdsig;
1896
1897 bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG;
1898 bool indexed = type == DZN_INDIRECT_INDEXED_DRAW_CMD_SIG || triangle_fan;
1899
1900 uint32_t cmd_arg_count = 0;
1901 D3D12_INDIRECT_ARGUMENT_DESC cmd_args[DZN_INDIRECT_CMD_SIG_MAX_ARGS];
1902
1903 if (triangle_fan) {
1904 cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
1905 .Type = D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW,
1906 };
1907 }
1908
1909 cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
1910 .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
1911 .Constant = {
1912 .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
1913 .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) / 4,
1914 .Num32BitValuesToSet = 2,
1915 },
1916 };
1917
1918 cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
1919 .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
1920 .Constant = {
1921 .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
1922 .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, draw_id) / 4,
1923 .Num32BitValuesToSet = 1,
1924 },
1925 };
1926
1927 cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
1928 .Type = indexed ?
1929 D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED :
1930 D3D12_INDIRECT_ARGUMENT_TYPE_DRAW,
1931 };
1932
1933 assert(cmd_arg_count <= ARRAY_SIZE(cmd_args));
1934 assert(offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) == 0);
1935
1936 D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = {
1937 .ByteStride =
1938 triangle_fan ?
1939 sizeof(struct dzn_indirect_triangle_fan_draw_exec_params) :
1940 sizeof(struct dzn_indirect_draw_exec_params),
1941 .NumArgumentDescs = cmd_arg_count,
1942 .pArgumentDescs = cmd_args,
1943 };
1944 HRESULT hres =
1945 ID3D12Device1_CreateCommandSignature(device->dev, &cmd_sig_desc,
1946 pipeline->base.root.sig,
1947 &IID_ID3D12CommandSignature,
1948 (void **)&cmdsig);
1949 if (FAILED(hres))
1950 return NULL;
1951
1952 pipeline->indirect_cmd_sigs[type] = cmdsig;
1953 return cmdsig;
1954 }
1955
1956 VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateGraphicsPipelines(VkDevice dev,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1957 dzn_CreateGraphicsPipelines(VkDevice dev,
1958 VkPipelineCache pipelineCache,
1959 uint32_t count,
1960 const VkGraphicsPipelineCreateInfo *pCreateInfos,
1961 const VkAllocationCallbacks *pAllocator,
1962 VkPipeline *pPipelines)
1963 {
1964 VK_FROM_HANDLE(dzn_device, device, dev);
1965 VkResult result = VK_SUCCESS;
1966
1967 unsigned i;
1968 for (i = 0; i < count; i++) {
1969 result = dzn_graphics_pipeline_create(device,
1970 pipelineCache,
1971 &pCreateInfos[i],
1972 pAllocator,
1973 &pPipelines[i]);
1974 if (result != VK_SUCCESS) {
1975 pPipelines[i] = VK_NULL_HANDLE;
1976
1977 /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it
1978 * is not obvious what error should be report upon 2 different failures.
1979 */
1980 if (result != VK_PIPELINE_COMPILE_REQUIRED)
1981 break;
1982
1983 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
1984 break;
1985 }
1986 }
1987
1988 for (; i < count; i++)
1989 pPipelines[i] = VK_NULL_HANDLE;
1990
1991 return result;
1992 }
1993
1994 static void
dzn_compute_pipeline_destroy(struct dzn_compute_pipeline * pipeline,const VkAllocationCallbacks * alloc)1995 dzn_compute_pipeline_destroy(struct dzn_compute_pipeline *pipeline,
1996 const VkAllocationCallbacks *alloc)
1997 {
1998 if (!pipeline)
1999 return;
2000
2001 if (pipeline->indirect_cmd_sig)
2002 ID3D12CommandSignature_Release(pipeline->indirect_cmd_sig);
2003
2004 dzn_pipeline_finish(&pipeline->base);
2005 vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline);
2006 }
2007
2008 static VkResult
dzn_pipeline_cache_lookup_compute_pipeline(struct vk_pipeline_cache * cache,uint8_t * pipeline_hash,D3D12_PIPELINE_STATE_STREAM_DESC * stream_desc,D3D12_SHADER_BYTECODE * dxil,bool * cache_hit)2009 dzn_pipeline_cache_lookup_compute_pipeline(struct vk_pipeline_cache *cache,
2010 uint8_t *pipeline_hash,
2011 D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc,
2012 D3D12_SHADER_BYTECODE *dxil,
2013 bool *cache_hit)
2014 {
2015 *cache_hit = false;
2016
2017 if (!cache)
2018 return VK_SUCCESS;
2019
2020 struct vk_pipeline_cache_object *cache_obj = NULL;
2021
2022 cache_obj =
2023 vk_pipeline_cache_lookup_object(cache, pipeline_hash, SHA1_DIGEST_LENGTH,
2024 &dzn_cached_blob_ops,
2025 NULL);
2026 if (!cache_obj)
2027 return VK_SUCCESS;
2028
2029 struct dzn_cached_blob *cached_blob =
2030 container_of(cache_obj, struct dzn_cached_blob, base);
2031
2032 assert(cached_blob->size == SHA1_DIGEST_LENGTH);
2033
2034 const uint8_t *dxil_hash = cached_blob->data;
2035 gl_shader_stage stage;
2036
2037 VkResult ret =
2038 dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, dxil);
2039
2040 if (ret != VK_SUCCESS || stage == MESA_SHADER_NONE)
2041 goto out;
2042
2043 assert(stage == MESA_SHADER_COMPUTE);
2044
2045 d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, slot);
2046 *slot = *dxil;
2047 *cache_hit = true;
2048
2049 out:
2050 vk_pipeline_cache_object_unref(cache_obj);
2051 return ret;
2052 }
2053
2054 static void
dzn_pipeline_cache_add_compute_pipeline(struct vk_pipeline_cache * cache,uint8_t * pipeline_hash,uint8_t * dxil_hash)2055 dzn_pipeline_cache_add_compute_pipeline(struct vk_pipeline_cache *cache,
2056 uint8_t *pipeline_hash,
2057 uint8_t *dxil_hash)
2058 {
2059 struct vk_pipeline_cache_object *cache_obj =
2060 dzn_cached_blob_create(cache->base.device, pipeline_hash, NULL, SHA1_DIGEST_LENGTH);
2061 if (!cache_obj)
2062 return;
2063
2064 struct dzn_cached_blob *cached_blob =
2065 container_of(cache_obj, struct dzn_cached_blob, base);
2066
2067 memcpy((void *)cached_blob->data, dxil_hash, SHA1_DIGEST_LENGTH);
2068
2069 cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
2070 vk_pipeline_cache_object_unref(cache_obj);
2071 }
2072
2073 static VkResult
dzn_compute_pipeline_compile_shader(struct dzn_device * device,struct dzn_compute_pipeline * pipeline,struct vk_pipeline_cache * cache,const struct dzn_pipeline_layout * layout,D3D12_PIPELINE_STATE_STREAM_DESC * stream_desc,D3D12_SHADER_BYTECODE * shader,const VkComputePipelineCreateInfo * info)2074 dzn_compute_pipeline_compile_shader(struct dzn_device *device,
2075 struct dzn_compute_pipeline *pipeline,
2076 struct vk_pipeline_cache *cache,
2077 const struct dzn_pipeline_layout *layout,
2078 D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc,
2079 D3D12_SHADER_BYTECODE *shader,
2080 const VkComputePipelineCreateInfo *info)
2081 {
2082 uint8_t spirv_hash[SHA1_DIGEST_LENGTH], pipeline_hash[SHA1_DIGEST_LENGTH];
2083 VkResult ret = VK_SUCCESS;
2084 nir_shader *nir = NULL;
2085
2086 if (cache) {
2087 struct mesa_sha1 pipeline_hash_ctx;
2088
2089 _mesa_sha1_init(&pipeline_hash_ctx);
2090 vk_pipeline_hash_shader_stage(&info->stage, spirv_hash);
2091 _mesa_sha1_update(&pipeline_hash_ctx, spirv_hash, sizeof(spirv_hash));
2092 _mesa_sha1_update(&pipeline_hash_ctx, layout->stages[MESA_SHADER_COMPUTE].hash,
2093 sizeof(layout->stages[MESA_SHADER_COMPUTE].hash));
2094 _mesa_sha1_final(&pipeline_hash_ctx, pipeline_hash);
2095
2096 bool cache_hit = false;
2097 ret = dzn_pipeline_cache_lookup_compute_pipeline(cache, pipeline_hash,
2098 stream_desc, shader,
2099 &cache_hit);
2100 if (ret != VK_SUCCESS || cache_hit)
2101 goto out;
2102 }
2103
2104 ret = dzn_pipeline_get_nir_shader(device, layout, cache, spirv_hash,
2105 &info->stage, MESA_SHADER_COMPUTE,
2106 DXIL_SPIRV_YZ_FLIP_NONE, 0, 0,
2107 false, NULL,
2108 dxil_get_nir_compiler_options(), &nir);
2109 if (ret != VK_SUCCESS)
2110 return ret;
2111
2112 uint8_t bindings_hash[SHA1_DIGEST_LENGTH], dxil_hash[SHA1_DIGEST_LENGTH];
2113
2114 NIR_PASS_V(nir, adjust_var_bindings, layout, cache ? bindings_hash : NULL);
2115
2116 if (cache) {
2117 struct mesa_sha1 dxil_hash_ctx;
2118
2119 _mesa_sha1_init(&dxil_hash_ctx);
2120 _mesa_sha1_update(&dxil_hash_ctx, spirv_hash, sizeof(spirv_hash));
2121 _mesa_sha1_update(&dxil_hash_ctx, bindings_hash, sizeof(bindings_hash));
2122 _mesa_sha1_final(&dxil_hash_ctx, dxil_hash);
2123
2124 gl_shader_stage stage;
2125
2126 ret = dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, shader);
2127 if (ret != VK_SUCCESS)
2128 goto out;
2129
2130 if (stage != MESA_SHADER_NONE) {
2131 assert(stage == MESA_SHADER_COMPUTE);
2132 d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, cs);
2133 *cs = *shader;
2134 dzn_pipeline_cache_add_compute_pipeline(cache, pipeline_hash, dxil_hash);
2135 goto out;
2136 }
2137 }
2138
2139 ret = dzn_pipeline_compile_shader(device, nir, shader);
2140 if (ret != VK_SUCCESS)
2141 goto out;
2142
2143 d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, cs);
2144 *cs = *shader;
2145
2146 if (cache) {
2147 dzn_pipeline_cache_add_dxil_shader(cache, dxil_hash, MESA_SHADER_COMPUTE, shader);
2148 dzn_pipeline_cache_add_compute_pipeline(cache, pipeline_hash, dxil_hash);
2149 }
2150
2151 out:
2152 ralloc_free(nir);
2153 return ret;
2154 }
2155
2156 static VkResult
dzn_compute_pipeline_create(struct dzn_device * device,VkPipelineCache cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * out)2157 dzn_compute_pipeline_create(struct dzn_device *device,
2158 VkPipelineCache cache,
2159 const VkComputePipelineCreateInfo *pCreateInfo,
2160 const VkAllocationCallbacks *pAllocator,
2161 VkPipeline *out)
2162 {
2163 VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout);
2164 VK_FROM_HANDLE(vk_pipeline_cache, pcache, cache);
2165
2166 struct dzn_compute_pipeline *pipeline =
2167 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
2168 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2169 if (!pipeline)
2170 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2171
2172 uintptr_t state_buf[MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE / sizeof(uintptr_t)];
2173 D3D12_PIPELINE_STATE_STREAM_DESC stream_desc = {
2174 .pPipelineStateSubobjectStream = state_buf,
2175 };
2176
2177 dzn_pipeline_init(&pipeline->base, device,
2178 VK_PIPELINE_BIND_POINT_COMPUTE,
2179 layout, &stream_desc);
2180
2181 D3D12_SHADER_BYTECODE shader = { 0 };
2182 VkResult ret =
2183 dzn_compute_pipeline_compile_shader(device, pipeline, pcache, layout,
2184 &stream_desc, &shader, pCreateInfo);
2185 if (ret != VK_SUCCESS)
2186 goto out;
2187
2188 if (FAILED(ID3D12Device2_CreatePipelineState(device->dev, &stream_desc,
2189 &IID_ID3D12PipelineState,
2190 (void **)&pipeline->base.state)))
2191 ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2192
2193 out:
2194 free((void *)shader.pShaderBytecode);
2195 if (ret != VK_SUCCESS)
2196 dzn_compute_pipeline_destroy(pipeline, pAllocator);
2197 else
2198 *out = dzn_compute_pipeline_to_handle(pipeline);
2199
2200 return ret;
2201 }
2202
2203 ID3D12CommandSignature *
dzn_compute_pipeline_get_indirect_cmd_sig(struct dzn_compute_pipeline * pipeline)2204 dzn_compute_pipeline_get_indirect_cmd_sig(struct dzn_compute_pipeline *pipeline)
2205 {
2206 if (pipeline->indirect_cmd_sig)
2207 return pipeline->indirect_cmd_sig;
2208
2209 struct dzn_device *device =
2210 container_of(pipeline->base.base.device, struct dzn_device, vk);
2211
2212 D3D12_INDIRECT_ARGUMENT_DESC indirect_dispatch_args[] = {
2213 {
2214 .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
2215 .Constant = {
2216 .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
2217 .DestOffsetIn32BitValues = 0,
2218 .Num32BitValuesToSet = 3,
2219 },
2220 },
2221 {
2222 .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH,
2223 },
2224 };
2225
2226 D3D12_COMMAND_SIGNATURE_DESC indirect_dispatch_desc = {
2227 .ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS) * 2,
2228 .NumArgumentDescs = ARRAY_SIZE(indirect_dispatch_args),
2229 .pArgumentDescs = indirect_dispatch_args,
2230 };
2231
2232 HRESULT hres =
2233 ID3D12Device1_CreateCommandSignature(device->dev, &indirect_dispatch_desc,
2234 pipeline->base.root.sig,
2235 &IID_ID3D12CommandSignature,
2236 (void **)&pipeline->indirect_cmd_sig);
2237 if (FAILED(hres))
2238 return NULL;
2239
2240 return pipeline->indirect_cmd_sig;
2241 }
2242
2243 VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateComputePipelines(VkDevice dev,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)2244 dzn_CreateComputePipelines(VkDevice dev,
2245 VkPipelineCache pipelineCache,
2246 uint32_t count,
2247 const VkComputePipelineCreateInfo *pCreateInfos,
2248 const VkAllocationCallbacks *pAllocator,
2249 VkPipeline *pPipelines)
2250 {
2251 VK_FROM_HANDLE(dzn_device, device, dev);
2252 VkResult result = VK_SUCCESS;
2253
2254 unsigned i;
2255 for (i = 0; i < count; i++) {
2256 result = dzn_compute_pipeline_create(device,
2257 pipelineCache,
2258 &pCreateInfos[i],
2259 pAllocator,
2260 &pPipelines[i]);
2261 if (result != VK_SUCCESS) {
2262 pPipelines[i] = VK_NULL_HANDLE;
2263
2264 /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it
2265 * is not obvious what error should be report upon 2 different failures.
2266 */
2267 if (result != VK_PIPELINE_COMPILE_REQUIRED)
2268 break;
2269
2270 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
2271 break;
2272 }
2273 }
2274
2275 for (; i < count; i++)
2276 pPipelines[i] = VK_NULL_HANDLE;
2277
2278 return result;
2279 }
2280
2281 VKAPI_ATTR void VKAPI_CALL
dzn_DestroyPipeline(VkDevice device,VkPipeline pipeline,const VkAllocationCallbacks * pAllocator)2282 dzn_DestroyPipeline(VkDevice device,
2283 VkPipeline pipeline,
2284 const VkAllocationCallbacks *pAllocator)
2285 {
2286 VK_FROM_HANDLE(dzn_pipeline, pipe, pipeline);
2287
2288 if (!pipe)
2289 return;
2290
2291 if (pipe->type == VK_PIPELINE_BIND_POINT_GRAPHICS) {
2292 struct dzn_graphics_pipeline *gfx = container_of(pipe, struct dzn_graphics_pipeline, base);
2293 dzn_graphics_pipeline_destroy(gfx, pAllocator);
2294 } else {
2295 assert(pipe->type == VK_PIPELINE_BIND_POINT_COMPUTE);
2296 struct dzn_compute_pipeline *compute = container_of(pipe, struct dzn_compute_pipeline, base);
2297 dzn_compute_pipeline_destroy(compute, pAllocator);
2298 }
2299 }
2300