• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "dzn_private.h"
25 
26 #include "spirv_to_dxil.h"
27 #include "nir_to_dxil.h"
28 
29 #include "dxil_nir.h"
30 #include "dxil_nir_lower_int_samplers.h"
31 #include "dxil_validator.h"
32 
33 static void
dzn_meta_compile_shader(struct dzn_device * device,nir_shader * nir,D3D12_SHADER_BYTECODE * slot)34 dzn_meta_compile_shader(struct dzn_device *device, nir_shader *nir,
35                         D3D12_SHADER_BYTECODE *slot)
36 {
37    struct dzn_instance *instance =
38       container_of(device->vk.physical->instance, struct dzn_instance, vk);
39    struct dzn_physical_device *pdev =
40       container_of(device->vk.physical, struct dzn_physical_device, vk);
41 
42    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
43 
44    if ((instance->debug_flags & DZN_DEBUG_NIR) &&
45        (instance->debug_flags & DZN_DEBUG_INTERNAL))
46       nir_print_shader(nir, stderr);
47 
48    struct nir_to_dxil_options opts = {
49       .environment = DXIL_ENVIRONMENT_VULKAN,
50       .shader_model_max = dzn_get_shader_model(pdev),
51 #ifdef _WIN32
52       .validator_version_max = dxil_get_validator_version(instance->dxil_validator),
53 #endif
54    };
55    struct blob dxil_blob;
56    ASSERTED bool ret = nir_to_dxil(nir, &opts, NULL, &dxil_blob);
57    assert(ret);
58 
59 #ifdef _WIN32
60    char *err = NULL;
61    bool res = dxil_validate_module(instance->dxil_validator,
62                                    dxil_blob.data,
63                                    dxil_blob.size, &err);
64 
65    if ((instance->debug_flags & DZN_DEBUG_DXIL) &&
66        (instance->debug_flags & DZN_DEBUG_INTERNAL)) {
67       char *disasm = dxil_disasm_module(instance->dxil_validator,
68                                         dxil_blob.data,
69                                         dxil_blob.size);
70       if (disasm) {
71          fprintf(stderr,
72                  "== BEGIN SHADER ============================================\n"
73                  "%s\n"
74                  "== END SHADER ==============================================\n",
75                   disasm);
76          ralloc_free(disasm);
77       }
78    }
79 
80    if ((instance->debug_flags & DZN_DEBUG_DXIL) &&
81        (instance->debug_flags & DZN_DEBUG_INTERNAL) &&
82        !res && !(instance->debug_flags & DZN_DEBUG_EXPERIMENTAL)) {
83       fprintf(stderr,
84             "== VALIDATION ERROR =============================================\n"
85             "%s\n"
86             "== END ==========================================================\n",
87             err ? err : "unknown");
88       ralloc_free(err);
89    }
90    assert(res || (instance->debug_flags & DZN_DEBUG_EXPERIMENTAL));
91 #endif
92 
93    void *data;
94    size_t size;
95    blob_finish_get_buffer(&dxil_blob, &data, &size);
96    slot->pShaderBytecode = data;
97    slot->BytecodeLength = size;
98 }
99 
100 #define DZN_META_INDIRECT_DRAW_MAX_PARAM_COUNT 5
101 
102 static void
dzn_meta_indirect_draw_finish(struct dzn_device * device,enum dzn_indirect_draw_type type)103 dzn_meta_indirect_draw_finish(struct dzn_device *device, enum dzn_indirect_draw_type type)
104 {
105    struct dzn_meta_indirect_draw *meta = &device->indirect_draws[type];
106 
107    if (meta->root_sig)
108       ID3D12RootSignature_Release(meta->root_sig);
109 
110    if (meta->pipeline_state)
111       ID3D12PipelineState_Release(meta->pipeline_state);
112 }
113 
114 static VkResult
dzn_meta_indirect_draw_init(struct dzn_device * device,enum dzn_indirect_draw_type type)115 dzn_meta_indirect_draw_init(struct dzn_device *device,
116                             enum dzn_indirect_draw_type type)
117 {
118    struct dzn_meta_indirect_draw *meta = &device->indirect_draws[type];
119    struct dzn_instance *instance =
120       container_of(device->vk.physical->instance, struct dzn_instance, vk);
121    VkResult ret = VK_SUCCESS;
122 
123    glsl_type_singleton_init_or_ref();
124 
125    nir_shader *nir = dzn_nir_indirect_draw_shader(type);
126    bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN ||
127                        type == DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN ||
128                        type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN ||
129                        type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
130                        type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
131                        type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
132    bool indirect_count = type == DZN_INDIRECT_DRAW_COUNT ||
133                          type == DZN_INDIRECT_INDEXED_DRAW_COUNT ||
134                          type == DZN_INDIRECT_DRAW_COUNT_TRIANGLE_FAN ||
135                          type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN ||
136                          type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
137    bool prim_restart = type == DZN_INDIRECT_INDEXED_DRAW_TRIANGLE_FAN_PRIM_RESTART ||
138                        type == DZN_INDIRECT_INDEXED_DRAW_COUNT_TRIANGLE_FAN_PRIM_RESTART;
139    uint32_t shader_params_size =
140       triangle_fan && prim_restart ?
141       sizeof(struct dzn_indirect_draw_triangle_fan_prim_restart_rewrite_params) :
142       triangle_fan ?
143       sizeof(struct dzn_indirect_draw_triangle_fan_rewrite_params) :
144       sizeof(struct dzn_indirect_draw_rewrite_params);
145 
146    uint32_t root_param_count = 0;
147    D3D12_ROOT_PARAMETER1 root_params[DZN_META_INDIRECT_DRAW_MAX_PARAM_COUNT];
148 
149    root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) {
150       .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS,
151       .Constants = {
152          .ShaderRegister = 0,
153          .RegisterSpace = 0,
154          .Num32BitValues = shader_params_size / 4,
155       },
156       .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
157    };
158 
159    root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) {
160       .ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV,
161       .Descriptor = {
162          .ShaderRegister = 1,
163          .RegisterSpace = 0,
164          .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE,
165       },
166       .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
167    };
168 
169    root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) {
170       .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV,
171       .Descriptor = {
172          .ShaderRegister = 2,
173          .RegisterSpace = 0,
174          .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE,
175       },
176       .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
177    };
178 
179    if (indirect_count) {
180       root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) {
181          .ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV,
182          .Descriptor = {
183             .ShaderRegister = 3,
184             .RegisterSpace = 0,
185             .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE,
186          },
187          .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
188       };
189    }
190 
191 
192    if (triangle_fan) {
193       root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) {
194          .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV,
195          .Descriptor = {
196             .ShaderRegister = 4,
197             .RegisterSpace = 0,
198             .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE,
199          },
200          .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
201       };
202    }
203 
204    assert(root_param_count <= ARRAY_SIZE(root_params));
205 
206    D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {
207       .Version = D3D_ROOT_SIGNATURE_VERSION_1_1,
208       .Desc_1_1 = {
209          .NumParameters = root_param_count,
210          .pParameters = root_params,
211          .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE,
212       },
213    };
214 
215    D3D12_COMPUTE_PIPELINE_STATE_DESC desc = {
216       .Flags = D3D12_PIPELINE_STATE_FLAG_NONE,
217    };
218 
219    meta->root_sig =
220       dzn_device_create_root_sig(device, &root_sig_desc);
221    if (!meta->root_sig) {
222       ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
223       goto out;
224    }
225 
226    desc.pRootSignature = meta->root_sig;
227    dzn_meta_compile_shader(device, nir, &desc.CS);
228    assert(desc.CS.pShaderBytecode);
229 
230    if (FAILED(ID3D12Device1_CreateComputePipelineState(device->dev, &desc,
231                                                        &IID_ID3D12PipelineState,
232                                                        (void **)&meta->pipeline_state)))
233       ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
234 
235 out:
236    if (ret != VK_SUCCESS)
237       dzn_meta_indirect_draw_finish(device, type);
238 
239    free((void *)desc.CS.pShaderBytecode);
240    ralloc_free(nir);
241    glsl_type_singleton_decref();
242 
243    return ret;
244 }
245 
246 #define DZN_META_TRIANGLE_FAN_REWRITE_IDX_MAX_PARAM_COUNT 4
247 
248 static void
dzn_meta_triangle_fan_rewrite_index_finish(struct dzn_device * device,enum dzn_index_type old_index_type)249 dzn_meta_triangle_fan_rewrite_index_finish(struct dzn_device *device,
250                                            enum dzn_index_type old_index_type)
251 {
252    struct dzn_meta_triangle_fan_rewrite_index *meta =
253       &device->triangle_fan[old_index_type];
254 
255    if (meta->root_sig)
256       ID3D12RootSignature_Release(meta->root_sig);
257    if (meta->pipeline_state)
258       ID3D12PipelineState_Release(meta->pipeline_state);
259    if (meta->cmd_sig)
260       ID3D12CommandSignature_Release(meta->cmd_sig);
261 }
262 
263 static VkResult
dzn_meta_triangle_fan_rewrite_index_init(struct dzn_device * device,enum dzn_index_type old_index_type)264 dzn_meta_triangle_fan_rewrite_index_init(struct dzn_device *device,
265                                          enum dzn_index_type old_index_type)
266 {
267    struct dzn_meta_triangle_fan_rewrite_index *meta =
268       &device->triangle_fan[old_index_type];
269    struct dzn_instance *instance =
270       container_of(device->vk.physical->instance, struct dzn_instance, vk);
271    VkResult ret = VK_SUCCESS;
272 
273    glsl_type_singleton_init_or_ref();
274 
275    uint8_t old_index_size = dzn_index_size(old_index_type);
276    bool prim_restart =
277       old_index_type == DZN_INDEX_2B_WITH_PRIM_RESTART ||
278       old_index_type == DZN_INDEX_4B_WITH_PRIM_RESTART;
279 
280    nir_shader *nir =
281       prim_restart ?
282       dzn_nir_triangle_fan_prim_restart_rewrite_index_shader(old_index_size) :
283       dzn_nir_triangle_fan_rewrite_index_shader(old_index_size);
284 
285    uint32_t root_param_count = 0;
286    D3D12_ROOT_PARAMETER1 root_params[DZN_META_TRIANGLE_FAN_REWRITE_IDX_MAX_PARAM_COUNT];
287 
288    root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) {
289       .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV,
290       .Descriptor = {
291          .ShaderRegister = 1,
292          .RegisterSpace = 0,
293          .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE,
294       },
295       .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
296    };
297 
298    uint32_t params_size =
299       prim_restart ?
300       sizeof(struct dzn_triangle_fan_prim_restart_rewrite_index_params) :
301       sizeof(struct dzn_triangle_fan_rewrite_index_params);
302 
303    root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) {
304       .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS,
305       .Constants = {
306          .ShaderRegister = 0,
307          .RegisterSpace = 0,
308          .Num32BitValues = params_size / 4,
309       },
310       .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
311    };
312 
313    if (old_index_type != DZN_NO_INDEX) {
314       root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) {
315          .ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV,
316          .Descriptor = {
317             .ShaderRegister = 2,
318             .RegisterSpace = 0,
319             .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE,
320          },
321          .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
322       };
323    }
324 
325    if (prim_restart) {
326       root_params[root_param_count++] = (D3D12_ROOT_PARAMETER1) {
327          .ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV,
328          .Descriptor = {
329             .ShaderRegister = 3,
330             .RegisterSpace = 0,
331             .Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE,
332          },
333          .ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL,
334       };
335    }
336 
337    assert(root_param_count <= ARRAY_SIZE(root_params));
338 
339    D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {
340       .Version = D3D_ROOT_SIGNATURE_VERSION_1_1,
341       .Desc_1_1 = {
342          .NumParameters = root_param_count,
343          .pParameters = root_params,
344          .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE,
345       },
346    };
347 
348    D3D12_COMPUTE_PIPELINE_STATE_DESC desc = {
349       .Flags = D3D12_PIPELINE_STATE_FLAG_NONE,
350    };
351 
352    uint32_t cmd_arg_count = 0;
353    D3D12_INDIRECT_ARGUMENT_DESC cmd_args[4];
354 
355    cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
356       .Type = D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW,
357       .UnorderedAccessView = {
358          .RootParameterIndex = 0,
359       },
360    };
361 
362    cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
363       .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
364       .Constant = {
365          .RootParameterIndex = 1,
366          .DestOffsetIn32BitValues = 0,
367          .Num32BitValuesToSet = params_size / 4,
368       },
369    };
370 
371    if (prim_restart) {
372       cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
373          .Type = D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW,
374          .UnorderedAccessView = {
375             .RootParameterIndex = 3,
376          },
377       };
378    }
379 
380    cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
381       .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH,
382    };
383 
384    assert(cmd_arg_count <= ARRAY_SIZE(cmd_args));
385 
386    uint32_t exec_params_size =
387       prim_restart ?
388       sizeof(struct dzn_indirect_triangle_fan_prim_restart_rewrite_index_exec_params) :
389       sizeof(struct dzn_indirect_triangle_fan_rewrite_index_exec_params);
390 
391    D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = {
392       .ByteStride = exec_params_size,
393       .NumArgumentDescs = cmd_arg_count,
394       .pArgumentDescs = cmd_args,
395    };
396 
397    assert((cmd_sig_desc.ByteStride & 7) == 0);
398 
399    meta->root_sig = dzn_device_create_root_sig(device, &root_sig_desc);
400    if (!meta->root_sig) {
401       ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
402       goto out;
403    }
404 
405 
406    desc.pRootSignature = meta->root_sig;
407    dzn_meta_compile_shader(device, nir, &desc.CS);
408 
409    if (FAILED(ID3D12Device1_CreateComputePipelineState(device->dev, &desc,
410                                                        &IID_ID3D12PipelineState,
411                                                        (void **)&meta->pipeline_state))) {
412       ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
413       goto out;
414    }
415 
416    if (FAILED(ID3D12Device1_CreateCommandSignature(device->dev, &cmd_sig_desc,
417                                                    meta->root_sig,
418                                                    &IID_ID3D12CommandSignature,
419                                                    (void **)&meta->cmd_sig)))
420       ret = vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
421 
422 out:
423    if (ret != VK_SUCCESS)
424       dzn_meta_triangle_fan_rewrite_index_finish(device, old_index_type);
425 
426    free((void *)desc.CS.pShaderBytecode);
427    ralloc_free(nir);
428    glsl_type_singleton_decref();
429 
430    return ret;
431 }
432 
433 static const D3D12_SHADER_BYTECODE *
dzn_meta_blits_get_vs(struct dzn_device * device)434 dzn_meta_blits_get_vs(struct dzn_device *device)
435 {
436    struct dzn_meta_blits *meta = &device->blits;
437 
438    mtx_lock(&meta->shaders_lock);
439 
440    if (meta->vs.pShaderBytecode == NULL) {
441       nir_shader *nir = dzn_nir_blit_vs();
442 
443       NIR_PASS_V(nir, nir_lower_system_values);
444 
445       gl_system_value system_values[] = {
446          SYSTEM_VALUE_FIRST_VERTEX,
447          SYSTEM_VALUE_BASE_VERTEX,
448       };
449 
450       NIR_PASS_V(nir, dxil_nir_lower_system_values_to_zero, system_values,
451                 ARRAY_SIZE(system_values));
452 
453       D3D12_SHADER_BYTECODE bc;
454 
455       dzn_meta_compile_shader(device, nir, &bc);
456       meta->vs.pShaderBytecode =
457          vk_alloc(&device->vk.alloc, bc.BytecodeLength, 8,
458                   VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
459       if (meta->vs.pShaderBytecode) {
460          meta->vs.BytecodeLength = bc.BytecodeLength;
461          memcpy((void *)meta->vs.pShaderBytecode, bc.pShaderBytecode, bc.BytecodeLength);
462       }
463       free((void *)bc.pShaderBytecode);
464       ralloc_free(nir);
465    }
466 
467    mtx_unlock(&meta->shaders_lock);
468 
469    return &meta->vs;
470 }
471 
472 static const D3D12_SHADER_BYTECODE *
dzn_meta_blits_get_fs(struct dzn_device * device,const struct dzn_nir_blit_info * info)473 dzn_meta_blits_get_fs(struct dzn_device *device,
474                       const struct dzn_nir_blit_info *info)
475 {
476    struct dzn_meta_blits *meta = &device->blits;
477    D3D12_SHADER_BYTECODE *out = NULL;
478 
479    mtx_lock(&meta->shaders_lock);
480 
481    STATIC_ASSERT(sizeof(struct dzn_nir_blit_info) == sizeof(uint32_t));
482 
483    struct hash_entry *he =
484       _mesa_hash_table_search(meta->fs, (void *)(uintptr_t)info->hash_key);
485 
486    if (!he) {
487       nir_shader *nir = dzn_nir_blit_fs(info);
488 
489       if (info->out_type != GLSL_TYPE_FLOAT) {
490          dxil_wrap_sampler_state wrap_state = {
491             .is_int_sampler = 1,
492             .is_linear_filtering = 0,
493             .skip_boundary_conditions = 1,
494          };
495          dxil_lower_sample_to_txf_for_integer_tex(nir, 1, &wrap_state, NULL, 0);
496       }
497 
498       D3D12_SHADER_BYTECODE bc;
499 
500       dzn_meta_compile_shader(device, nir, &bc);
501 
502       out = vk_alloc(&device->vk.alloc,
503                      sizeof(D3D12_SHADER_BYTECODE) + bc.BytecodeLength, 8,
504                      VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
505       if (out) {
506          out->pShaderBytecode = out + 1;
507          memcpy((void *)out->pShaderBytecode, bc.pShaderBytecode, bc.BytecodeLength);
508          out->BytecodeLength = bc.BytecodeLength;
509          _mesa_hash_table_insert(meta->fs, &info->hash_key, out);
510       }
511       free((void *)bc.pShaderBytecode);
512       ralloc_free(nir);
513    } else {
514       out = he->data;
515    }
516 
517    mtx_unlock(&meta->shaders_lock);
518 
519    return out;
520 }
521 
522 static void
dzn_meta_blit_destroy(struct dzn_device * device,struct dzn_meta_blit * blit)523 dzn_meta_blit_destroy(struct dzn_device *device, struct dzn_meta_blit *blit)
524 {
525    if (!blit)
526       return;
527 
528    if (blit->root_sig)
529       ID3D12RootSignature_Release(blit->root_sig);
530    if (blit->pipeline_state)
531       ID3D12PipelineState_Release(blit->pipeline_state);
532 
533    vk_free(&device->vk.alloc, blit);
534 }
535 
536 static struct dzn_meta_blit *
dzn_meta_blit_create(struct dzn_device * device,const struct dzn_meta_blit_key * key)537 dzn_meta_blit_create(struct dzn_device *device, const struct dzn_meta_blit_key *key)
538 {
539    struct dzn_meta_blit *blit =
540       vk_zalloc(&device->vk.alloc, sizeof(*blit), 8,
541                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
542 
543    if (!blit)
544       return NULL;
545 
546    D3D12_DESCRIPTOR_RANGE1 ranges[] = {
547       {
548          .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
549          .NumDescriptors = 1,
550          .BaseShaderRegister = 0,
551          .RegisterSpace = 0,
552          .Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS,
553          .OffsetInDescriptorsFromTableStart = 0,
554       },
555    };
556    D3D12_DESCRIPTOR_RANGE1 sampler_ranges[] = {
557       {
558          .RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER,
559          .NumDescriptors = 1,
560          .BaseShaderRegister = 0,
561          .RegisterSpace = 0,
562          .Flags = 0,
563          .OffsetInDescriptorsFromTableStart = 0,
564       },
565    };
566 
567    D3D12_STATIC_SAMPLER_DESC samplers[] = {
568       {
569          .Filter = key->linear_filter ?
570                    D3D12_FILTER_MIN_MAG_MIP_LINEAR :
571                    D3D12_FILTER_MIN_MAG_MIP_POINT,
572          .AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
573          .AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
574          .AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP,
575          .MipLODBias = 0,
576          .MaxAnisotropy = 0,
577          .MinLOD = 0,
578          .MaxLOD = D3D12_FLOAT32_MAX,
579          .ShaderRegister = 0,
580          .RegisterSpace = 0,
581          .ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL,
582       },
583    };
584 
585    D3D12_ROOT_PARAMETER1 root_params[4] = {
586       {
587          .ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
588          .DescriptorTable = {
589             .NumDescriptorRanges = ARRAY_SIZE(ranges),
590             .pDescriptorRanges = ranges,
591          },
592          .ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL,
593       },
594       {
595          .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS,
596          .Constants = {
597             .ShaderRegister = 0,
598             .RegisterSpace = 0,
599             .Num32BitValues = 17,
600          },
601          .ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX,
602       },
603       {
604          .ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
605          .DescriptorTable = {
606             .NumDescriptorRanges = ARRAY_SIZE(sampler_ranges),
607             .pDescriptorRanges = sampler_ranges,
608          },
609          .ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL,
610       },
611       {
612          .ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS,
613          .Constants = {
614             .ShaderRegister = 0,
615             .RegisterSpace = 0,
616             .Num32BitValues = 1,
617          },
618          .ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL,
619       },
620    };
621    uint32_t num_root_params = 2;
622 
623    uint32_t samples = key->resolve_mode == dzn_blit_resolve_none ?
624       key->samples : 1;
625    D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = {
626       .SampleMask = (1ULL << samples) - 1,
627       .RasterizerState = {
628          .FillMode = D3D12_FILL_MODE_SOLID,
629          .CullMode = D3D12_CULL_MODE_NONE,
630          .DepthClipEnable = true,
631       },
632       .PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE,
633       .SampleDesc = {
634          .Count = samples,
635          .Quality = 0,
636       },
637       .Flags = D3D12_PIPELINE_STATE_FLAG_NONE,
638    };
639 
640    struct dzn_nir_blit_info blit_fs_info = {
641       .src_samples = key->samples,
642       .loc = key->loc,
643       .out_type = key->out_type,
644       .sampler_dim = key->sampler_dim,
645       .src_is_array = key->src_is_array,
646       .resolve_mode = key->resolve_mode,
647       .stencil_fallback = key->loc == FRAG_RESULT_STENCIL && key->stencil_bit != 0xf,
648       .padding = 0,
649    };
650 
651    D3D12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {
652       .Version = D3D_ROOT_SIGNATURE_VERSION_1_1,
653       .Desc_1_1 = {
654          .NumParameters = num_root_params,
655          .pParameters = root_params,
656          .NumStaticSamplers = ARRAY_SIZE(samplers),
657          .pStaticSamplers = samplers,
658          .Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE,
659       },
660    };
661 
662    if (!device->support_static_samplers) {
663       root_sig_desc.Desc_1_1.NumStaticSamplers = 0;
664       root_sig_desc.Desc_1_1.NumParameters = 3;
665    }
666 
667    /* Don't need fs constants unless we're doing the stencil fallback */
668    if (blit_fs_info.stencil_fallback) {
669       if (device->support_static_samplers) {
670          root_params[2] = root_params[3];
671          root_sig_desc.Desc_1_1.NumParameters = 3;
672       } else {
673          root_sig_desc.Desc_1_1.NumParameters = 4;
674       }
675    }
676 
677    blit->root_sig = dzn_device_create_root_sig(device, &root_sig_desc);
678    if (!blit->root_sig) {
679       dzn_meta_blit_destroy(device, blit);
680       return NULL;
681    }
682 
683    desc.pRootSignature = blit->root_sig;
684 
685    const D3D12_SHADER_BYTECODE *vs, *fs;
686 
687    vs = dzn_meta_blits_get_vs(device);
688    if (!vs) {
689       dzn_meta_blit_destroy(device, blit);
690       return NULL;
691    }
692 
693    desc.VS = *vs;
694    assert(desc.VS.pShaderBytecode);
695 
696    fs = dzn_meta_blits_get_fs(device, &blit_fs_info);
697    if (!fs) {
698       dzn_meta_blit_destroy(device, blit);
699       return NULL;
700    }
701 
702    desc.PS = *fs;
703    assert(desc.PS.pShaderBytecode);
704 
705    assert(key->loc == FRAG_RESULT_DATA0 ||
706           key->loc == FRAG_RESULT_DEPTH ||
707           key->loc == FRAG_RESULT_STENCIL);
708 
709    if (key->loc == FRAG_RESULT_DATA0) {
710       desc.NumRenderTargets = 1;
711       desc.RTVFormats[0] = key->out_format;
712       desc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
713    } else {
714       desc.DSVFormat = key->out_format;
715       if (key->loc == FRAG_RESULT_DEPTH) {
716          desc.DepthStencilState.DepthEnable = true;
717          desc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
718          desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
719       } else {
720          assert(key->loc == FRAG_RESULT_STENCIL);
721          desc.DepthStencilState.StencilEnable = true;
722          desc.DepthStencilState.StencilWriteMask = key->stencil_bit == 0xf ? 0xff : (1 << key->stencil_bit);
723          desc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_REPLACE;
724          desc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_REPLACE;
725          desc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_REPLACE;
726          desc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
727          desc.DepthStencilState.BackFace = desc.DepthStencilState.FrontFace;
728       }
729    }
730 
731    if (FAILED(ID3D12Device1_CreateGraphicsPipelineState(device->dev, &desc,
732                                                         &IID_ID3D12PipelineState,
733                                                         (void **)&blit->pipeline_state))) {
734       dzn_meta_blit_destroy(device, blit);
735       return NULL;
736    }
737 
738    return blit;
739 }
740 
741 const struct dzn_meta_blit *
dzn_meta_blits_get_context(struct dzn_device * device,const struct dzn_meta_blit_key * key)742 dzn_meta_blits_get_context(struct dzn_device *device,
743                            const struct dzn_meta_blit_key *key)
744 {
745    struct dzn_meta_blit *out = NULL;
746 
747    STATIC_ASSERT(sizeof(*key) == sizeof(uint64_t));
748 
749    mtx_lock(&device->blits.contexts_lock);
750 
751    out =
752       _mesa_hash_table_u64_search(device->blits.contexts, key->u64);
753    if (!out) {
754       out = dzn_meta_blit_create(device, key);
755 
756       if (out)
757          _mesa_hash_table_u64_insert(device->blits.contexts, key->u64, out);
758    }
759 
760    mtx_unlock(&device->blits.contexts_lock);
761 
762    return out;
763 }
764 
765 static void
dzn_meta_blits_finish(struct dzn_device * device)766 dzn_meta_blits_finish(struct dzn_device *device)
767 {
768    struct dzn_meta_blits *meta = &device->blits;
769 
770    vk_free(&device->vk.alloc, (void *)meta->vs.pShaderBytecode);
771 
772    if (meta->fs) {
773       hash_table_foreach(meta->fs, he)
774          vk_free(&device->vk.alloc, he->data);
775       _mesa_hash_table_destroy(meta->fs, NULL);
776    }
777 
778    if (meta->contexts) {
779       hash_table_foreach(meta->contexts->table, he)
780          dzn_meta_blit_destroy(device, he->data);
781       _mesa_hash_table_u64_destroy(meta->contexts);
782    }
783 
784    mtx_destroy(&meta->shaders_lock);
785    mtx_destroy(&meta->contexts_lock);
786 }
787 
788 static VkResult
dzn_meta_blits_init(struct dzn_device * device)789 dzn_meta_blits_init(struct dzn_device *device)
790 {
791    struct dzn_instance *instance =
792       container_of(device->vk.physical->instance, struct dzn_instance, vk);
793    struct dzn_meta_blits *meta = &device->blits;
794 
795    mtx_init(&meta->shaders_lock, mtx_plain);
796    mtx_init(&meta->contexts_lock, mtx_plain);
797 
798    meta->fs = _mesa_hash_table_create_u32_keys(NULL);
799    if (!meta->fs) {
800       dzn_meta_blits_finish(device);
801       return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
802    }
803 
804    meta->contexts = _mesa_hash_table_u64_create(NULL);
805    if (!meta->contexts) {
806       dzn_meta_blits_finish(device);
807       return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
808    }
809 
810    return VK_SUCCESS;
811 }
812 
813 void
dzn_meta_finish(struct dzn_device * device)814 dzn_meta_finish(struct dzn_device *device)
815 {
816    for (uint32_t i = 0; i < ARRAY_SIZE(device->triangle_fan); i++)
817       dzn_meta_triangle_fan_rewrite_index_finish(device, i);
818 
819    for (uint32_t i = 0; i < ARRAY_SIZE(device->indirect_draws); i++)
820       dzn_meta_indirect_draw_finish(device, i);
821 
822    dzn_meta_blits_finish(device);
823 }
824 
825 VkResult
dzn_meta_init(struct dzn_device * device)826 dzn_meta_init(struct dzn_device *device)
827 {
828    VkResult result = dzn_meta_blits_init(device);
829    if (result != VK_SUCCESS)
830       goto out;
831 
832    for (uint32_t i = 0; i < ARRAY_SIZE(device->indirect_draws); i++) {
833       VkResult result =
834          dzn_meta_indirect_draw_init(device, i);
835       if (result != VK_SUCCESS)
836          goto out;
837    }
838 
839    for (uint32_t i = 0; i < ARRAY_SIZE(device->triangle_fan); i++) {
840       VkResult result =
841          dzn_meta_triangle_fan_rewrite_index_init(device, i);
842       if (result != VK_SUCCESS)
843          goto out;
844    }
845 
846 out:
847    if (result != VK_SUCCESS) {
848       dzn_meta_finish(device);
849       return result;
850    }
851 
852    return VK_SUCCESS;
853 }
854