1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "util/mesa-sha1.h"
31 #include "util/os_time.h"
32 #include "common/intel_compute_slm.h"
33 #include "common/intel_l3_config.h"
34 #include "common/intel_sample_positions.h"
35 #include "compiler/brw_disasm.h"
36 #include "anv_private.h"
37 #include "compiler/brw_nir.h"
38 #include "compiler/brw_nir_rt.h"
39 #include "compiler/intel_nir.h"
40 #include "anv_nir.h"
41 #include "nir/nir_xfb_info.h"
42 #include "spirv/nir_spirv.h"
43 #include "vk_nir_convert_ycbcr.h"
44 #include "vk_nir.h"
45 #include "vk_pipeline.h"
46 #include "vk_render_pass.h"
47 #include "vk_util.h"
48
49 /* Eventually, this will become part of anv_CreateShader. Unfortunately,
50 * we can't do that yet because we don't have the ability to copy nir.
51 */
52 static nir_shader *
anv_shader_stage_to_nir(struct anv_device * device,VkPipelineCreateFlags2KHR pipeline_flags,const VkPipelineShaderStageCreateInfo * stage_info,enum brw_robustness_flags robust_flags,void * mem_ctx)53 anv_shader_stage_to_nir(struct anv_device *device,
54 VkPipelineCreateFlags2KHR pipeline_flags,
55 const VkPipelineShaderStageCreateInfo *stage_info,
56 enum brw_robustness_flags robust_flags,
57 void *mem_ctx)
58 {
59 const struct anv_physical_device *pdevice = device->physical;
60 const struct brw_compiler *compiler = pdevice->compiler;
61 gl_shader_stage stage = vk_to_mesa_shader_stage(stage_info->stage);
62 const nir_shader_compiler_options *nir_options =
63 compiler->nir_options[stage];
64
65 const struct spirv_to_nir_options spirv_options = {
66 .ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_flags),
67 .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_flags),
68 .phys_ssbo_addr_format = nir_address_format_64bit_global,
69 .push_const_addr_format = nir_address_format_logical,
70
71 /* TODO: Consider changing this to an address format that has the NULL
72 * pointer equals to 0. That might be a better format to play nice
73 * with certain code / code generators.
74 */
75 .shared_addr_format = nir_address_format_32bit_offset,
76
77 .min_ubo_alignment = ANV_UBO_ALIGNMENT,
78 .min_ssbo_alignment = ANV_SSBO_ALIGNMENT,
79 };
80
81 nir_shader *nir;
82 VkResult result =
83 vk_pipeline_shader_stage_to_nir(&device->vk, pipeline_flags, stage_info,
84 &spirv_options, nir_options,
85 mem_ctx, &nir);
86 if (result != VK_SUCCESS)
87 return NULL;
88
89 if (INTEL_DEBUG(intel_debug_flag_for_shader_stage(stage))) {
90 fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
91 gl_shader_stage_name(stage));
92 nir_print_shader(nir, stderr);
93 }
94
95 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
96 nir_shader_get_entrypoint(nir), true, false);
97
98 return nir;
99 }
100
101 static VkResult
anv_pipeline_init(struct anv_pipeline * pipeline,struct anv_device * device,enum anv_pipeline_type type,VkPipelineCreateFlags2KHR flags,const VkAllocationCallbacks * pAllocator)102 anv_pipeline_init(struct anv_pipeline *pipeline,
103 struct anv_device *device,
104 enum anv_pipeline_type type,
105 VkPipelineCreateFlags2KHR flags,
106 const VkAllocationCallbacks *pAllocator)
107 {
108 VkResult result;
109
110 memset(pipeline, 0, sizeof(*pipeline));
111
112 vk_object_base_init(&device->vk, &pipeline->base,
113 VK_OBJECT_TYPE_PIPELINE);
114 pipeline->device = device;
115
116 /* It's the job of the child class to provide actual backing storage for
117 * the batch by setting batch.start, batch.next, and batch.end.
118 */
119 pipeline->batch.alloc = pAllocator ? pAllocator : &device->vk.alloc;
120 pipeline->batch.relocs = &pipeline->batch_relocs;
121 pipeline->batch.status = VK_SUCCESS;
122
123 const bool uses_relocs = device->physical->uses_relocs;
124 result = anv_reloc_list_init(&pipeline->batch_relocs,
125 pipeline->batch.alloc, uses_relocs);
126 if (result != VK_SUCCESS)
127 return result;
128
129 pipeline->mem_ctx = ralloc_context(NULL);
130
131 pipeline->type = type;
132 pipeline->flags = flags;
133
134 util_dynarray_init(&pipeline->executables, pipeline->mem_ctx);
135
136 anv_pipeline_sets_layout_init(&pipeline->layout, device,
137 false /* independent_sets */);
138
139 return VK_SUCCESS;
140 }
141
142 static void
anv_pipeline_init_layout(struct anv_pipeline * pipeline,struct anv_pipeline_layout * pipeline_layout)143 anv_pipeline_init_layout(struct anv_pipeline *pipeline,
144 struct anv_pipeline_layout *pipeline_layout)
145 {
146 if (pipeline_layout) {
147 struct anv_pipeline_sets_layout *layout = &pipeline_layout->sets_layout;
148 for (uint32_t s = 0; s < layout->num_sets; s++) {
149 if (layout->set[s].layout == NULL)
150 continue;
151
152 anv_pipeline_sets_layout_add(&pipeline->layout, s,
153 layout->set[s].layout);
154 }
155 }
156
157 anv_pipeline_sets_layout_hash(&pipeline->layout);
158 assert(!pipeline_layout ||
159 !memcmp(pipeline->layout.sha1,
160 pipeline_layout->sets_layout.sha1,
161 sizeof(pipeline_layout->sets_layout.sha1)));
162 }
163
164 static void
anv_pipeline_finish(struct anv_pipeline * pipeline,struct anv_device * device)165 anv_pipeline_finish(struct anv_pipeline *pipeline,
166 struct anv_device *device)
167 {
168 anv_pipeline_sets_layout_fini(&pipeline->layout);
169 anv_reloc_list_finish(&pipeline->batch_relocs);
170 ralloc_free(pipeline->mem_ctx);
171 vk_object_base_finish(&pipeline->base);
172 }
173
anv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)174 void anv_DestroyPipeline(
175 VkDevice _device,
176 VkPipeline _pipeline,
177 const VkAllocationCallbacks* pAllocator)
178 {
179 ANV_FROM_HANDLE(anv_device, device, _device);
180 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
181
182 if (!pipeline)
183 return;
184
185 ANV_RMV(resource_destroy, device, pipeline);
186
187 switch (pipeline->type) {
188 case ANV_PIPELINE_GRAPHICS:
189 case ANV_PIPELINE_GRAPHICS_LIB: {
190 struct anv_graphics_base_pipeline *gfx_pipeline =
191 anv_pipeline_to_graphics_base(pipeline);
192
193 for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) {
194 if (gfx_pipeline->shaders[s])
195 anv_shader_bin_unref(device, gfx_pipeline->shaders[s]);
196 }
197 break;
198 }
199
200 case ANV_PIPELINE_COMPUTE: {
201 struct anv_compute_pipeline *compute_pipeline =
202 anv_pipeline_to_compute(pipeline);
203
204 if (compute_pipeline->cs)
205 anv_shader_bin_unref(device, compute_pipeline->cs);
206
207 break;
208 }
209
210 case ANV_PIPELINE_RAY_TRACING: {
211 struct anv_ray_tracing_pipeline *rt_pipeline =
212 anv_pipeline_to_ray_tracing(pipeline);
213
214 util_dynarray_foreach(&rt_pipeline->shaders,
215 struct anv_shader_bin *, shader) {
216 anv_shader_bin_unref(device, *shader);
217 }
218 break;
219 }
220
221 default:
222 unreachable("invalid pipeline type");
223 }
224
225 anv_pipeline_finish(pipeline, device);
226 vk_free2(&device->vk.alloc, pAllocator, pipeline);
227 }
228
229 struct anv_pipeline_stage {
230 gl_shader_stage stage;
231
232 VkPipelineCreateFlags2KHR pipeline_flags;
233 struct vk_pipeline_robustness_state rstate;
234
235 /* VkComputePipelineCreateInfo, VkGraphicsPipelineCreateInfo or
236 * VkRayTracingPipelineCreateInfoKHR pNext field
237 */
238 const void *pipeline_pNext;
239 const VkPipelineShaderStageCreateInfo *info;
240
241 unsigned char shader_sha1[20];
242 uint32_t source_hash;
243
244 union brw_any_prog_key key;
245
246 struct {
247 gl_shader_stage stage;
248 unsigned char sha1[20];
249 } cache_key;
250
251 nir_shader *nir;
252
253 struct {
254 nir_shader *nir;
255 struct anv_shader_bin *bin;
256 } imported;
257
258 struct anv_push_descriptor_info push_desc_info;
259
260 enum gl_subgroup_size subgroup_size_type;
261
262 enum brw_robustness_flags robust_flags;
263
264 struct anv_pipeline_bind_map bind_map;
265
266 bool uses_bt_for_push_descs;
267
268 enum anv_dynamic_push_bits dynamic_push_values;
269
270 union brw_any_prog_data prog_data;
271
272 uint32_t num_stats;
273 struct brw_compile_stats stats[3];
274 char *disasm[3];
275
276 VkPipelineCreationFeedback feedback;
277 uint32_t feedback_idx;
278
279 const unsigned *code;
280
281 struct anv_shader_bin *bin;
282 };
283
284 static void
anv_stage_allocate_bind_map_tables(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage,void * mem_ctx)285 anv_stage_allocate_bind_map_tables(struct anv_pipeline *pipeline,
286 struct anv_pipeline_stage *stage,
287 void *mem_ctx)
288 {
289 struct anv_pipeline_binding *surface_bindings =
290 brw_shader_stage_requires_bindless_resources(stage->stage) ? NULL :
291 rzalloc_array(mem_ctx, struct anv_pipeline_binding, 256);
292 struct anv_pipeline_binding *sampler_bindings =
293 brw_shader_stage_requires_bindless_resources(stage->stage) ? NULL :
294 rzalloc_array(mem_ctx, struct anv_pipeline_binding, 256);
295 struct anv_pipeline_embedded_sampler_binding *embedded_sampler_bindings =
296 rzalloc_array(mem_ctx, struct anv_pipeline_embedded_sampler_binding,
297 anv_pipeline_sets_layout_embedded_sampler_count(
298 &pipeline->layout));
299
300 stage->bind_map = (struct anv_pipeline_bind_map) {
301 .surface_to_descriptor = surface_bindings,
302 .sampler_to_descriptor = sampler_bindings,
303 .embedded_sampler_to_binding = embedded_sampler_bindings,
304 };
305 }
306
307 static enum brw_robustness_flags
anv_get_robust_flags(const struct vk_pipeline_robustness_state * rstate)308 anv_get_robust_flags(const struct vk_pipeline_robustness_state *rstate)
309 {
310 return
311 ((rstate->storage_buffers !=
312 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT) ?
313 BRW_ROBUSTNESS_SSBO : 0) |
314 ((rstate->uniform_buffers !=
315 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT) ?
316 BRW_ROBUSTNESS_UBO : 0);
317 }
318
319 static void
populate_base_prog_key(struct anv_pipeline_stage * stage,const struct anv_device * device)320 populate_base_prog_key(struct anv_pipeline_stage *stage,
321 const struct anv_device *device)
322 {
323 stage->key.base.robust_flags = anv_get_robust_flags(&stage->rstate);
324 stage->key.base.limit_trig_input_range =
325 device->physical->instance->limit_trig_input_range;
326 }
327
328 static void
populate_vs_prog_key(struct anv_pipeline_stage * stage,const struct anv_device * device)329 populate_vs_prog_key(struct anv_pipeline_stage *stage,
330 const struct anv_device *device)
331 {
332 memset(&stage->key, 0, sizeof(stage->key));
333
334 populate_base_prog_key(stage, device);
335 }
336
337 static void
populate_tcs_prog_key(struct anv_pipeline_stage * stage,const struct anv_device * device,unsigned input_vertices)338 populate_tcs_prog_key(struct anv_pipeline_stage *stage,
339 const struct anv_device *device,
340 unsigned input_vertices)
341 {
342 memset(&stage->key, 0, sizeof(stage->key));
343
344 populate_base_prog_key(stage, device);
345
346 stage->key.tcs.input_vertices = input_vertices;
347 }
348
349 static void
populate_tes_prog_key(struct anv_pipeline_stage * stage,const struct anv_device * device)350 populate_tes_prog_key(struct anv_pipeline_stage *stage,
351 const struct anv_device *device)
352 {
353 memset(&stage->key, 0, sizeof(stage->key));
354
355 populate_base_prog_key(stage, device);
356 }
357
358 static void
populate_gs_prog_key(struct anv_pipeline_stage * stage,const struct anv_device * device)359 populate_gs_prog_key(struct anv_pipeline_stage *stage,
360 const struct anv_device *device)
361 {
362 memset(&stage->key, 0, sizeof(stage->key));
363
364 populate_base_prog_key(stage, device);
365 }
366
367 static bool
pipeline_has_coarse_pixel(const BITSET_WORD * dynamic,const struct vk_multisample_state * ms,const struct vk_fragment_shading_rate_state * fsr)368 pipeline_has_coarse_pixel(const BITSET_WORD *dynamic,
369 const struct vk_multisample_state *ms,
370 const struct vk_fragment_shading_rate_state *fsr)
371 {
372 /* The Vulkan 1.2.199 spec says:
373 *
374 * "If any of the following conditions are met, Cxy' must be set to
375 * {1,1}:
376 *
377 * * If Sample Shading is enabled.
378 * * [...]"
379 *
380 * And "sample shading" is defined as follows:
381 *
382 * "Sample shading is enabled for a graphics pipeline:
383 *
384 * * If the interface of the fragment shader entry point of the
385 * graphics pipeline includes an input variable decorated with
386 * SampleId or SamplePosition. In this case minSampleShadingFactor
387 * takes the value 1.0.
388 *
389 * * Else if the sampleShadingEnable member of the
390 * VkPipelineMultisampleStateCreateInfo structure specified when
391 * creating the graphics pipeline is set to VK_TRUE. In this case
392 * minSampleShadingFactor takes the value of
393 * VkPipelineMultisampleStateCreateInfo::minSampleShading.
394 *
395 * Otherwise, sample shading is considered disabled."
396 *
397 * The first bullet above is handled by the back-end compiler because those
398 * inputs both force per-sample dispatch. The second bullet is handled
399 * here. Note that this sample shading being enabled has nothing to do
400 * with minSampleShading.
401 */
402 if (ms != NULL && ms->sample_shading_enable)
403 return false;
404
405 /* Not dynamic & pipeline has a 1x1 fragment shading rate with no
406 * possibility for element of the pipeline to change the value or fragment
407 * shading rate not specified at all.
408 */
409 if (!BITSET_TEST(dynamic, MESA_VK_DYNAMIC_FSR) &&
410 (fsr == NULL ||
411 (fsr->fragment_size.width <= 1 &&
412 fsr->fragment_size.height <= 1 &&
413 fsr->combiner_ops[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR &&
414 fsr->combiner_ops[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR)))
415 return false;
416
417 return true;
418 }
419
420 static void
populate_task_prog_key(struct anv_pipeline_stage * stage,const struct anv_device * device)421 populate_task_prog_key(struct anv_pipeline_stage *stage,
422 const struct anv_device *device)
423 {
424 memset(&stage->key, 0, sizeof(stage->key));
425
426 populate_base_prog_key(stage, device);
427 }
428
429 static void
populate_mesh_prog_key(struct anv_pipeline_stage * stage,const struct anv_device * device,bool compact_mue)430 populate_mesh_prog_key(struct anv_pipeline_stage *stage,
431 const struct anv_device *device,
432 bool compact_mue)
433 {
434 memset(&stage->key, 0, sizeof(stage->key));
435
436 populate_base_prog_key(stage, device);
437
438 stage->key.mesh.compact_mue = compact_mue;
439 }
440
441 static uint32_t
rp_color_mask(const struct vk_render_pass_state * rp)442 rp_color_mask(const struct vk_render_pass_state *rp)
443 {
444 if (rp == NULL || !vk_render_pass_state_has_attachment_info(rp))
445 return ((1u << MAX_RTS) - 1);
446
447 uint32_t color_mask = 0;
448 for (uint32_t i = 0; i < rp->color_attachment_count; i++) {
449 if (rp->color_attachment_formats[i] != VK_FORMAT_UNDEFINED)
450 color_mask |= BITFIELD_BIT(i);
451 }
452
453 /* If there is depth/stencil attachment, even if the fragment shader
454 * doesn't write the depth/stencil output, we need a valid render target so
455 * that the compiler doesn't use the null-rt which would cull the
456 * depth/stencil output.
457 */
458 if (rp->depth_attachment_format != VK_FORMAT_UNDEFINED ||
459 rp->stencil_attachment_format != VK_FORMAT_UNDEFINED)
460 color_mask |= 1;
461
462 return color_mask;
463 }
464
465 static void
populate_wm_prog_key(struct anv_pipeline_stage * stage,const struct anv_graphics_base_pipeline * pipeline,const BITSET_WORD * dynamic,const struct vk_multisample_state * ms,const struct vk_fragment_shading_rate_state * fsr,const struct vk_render_pass_state * rp,const enum intel_sometimes is_mesh)466 populate_wm_prog_key(struct anv_pipeline_stage *stage,
467 const struct anv_graphics_base_pipeline *pipeline,
468 const BITSET_WORD *dynamic,
469 const struct vk_multisample_state *ms,
470 const struct vk_fragment_shading_rate_state *fsr,
471 const struct vk_render_pass_state *rp,
472 const enum intel_sometimes is_mesh)
473 {
474 const struct anv_device *device = pipeline->base.device;
475
476 memset(&stage->key, 0, sizeof(stage->key));
477
478 populate_base_prog_key(stage, device);
479
480 struct brw_wm_prog_key *key = &stage->key.wm;
481
482 /* We set this to 0 here and set to the actual value before we call
483 * brw_compile_fs.
484 */
485 key->input_slots_valid = 0;
486
487 /* XXX Vulkan doesn't appear to specify */
488 key->clamp_fragment_color = false;
489
490 key->ignore_sample_mask_out = false;
491
492 assert(rp == NULL || rp->color_attachment_count <= MAX_RTS);
493 /* Consider all inputs as valid until look at the NIR variables. */
494 key->color_outputs_valid = rp_color_mask(rp);
495 key->nr_color_regions = util_last_bit(key->color_outputs_valid);
496
497 /* To reduce possible shader recompilations we would need to know if
498 * there is a SampleMask output variable to compute if we should emit
499 * code to workaround the issue that hardware disables alpha to coverage
500 * when there is SampleMask output.
501 *
502 * If the pipeline we compile the fragment shader in includes the output
503 * interface, then we can be sure whether alpha_coverage is enabled or not.
504 * If we don't have that output interface, then we have to compile the
505 * shader with some conditionals.
506 */
507 if (ms != NULL) {
508 /* VUID-VkGraphicsPipelineCreateInfo-rasterizerDiscardEnable-00751:
509 *
510 * "If the pipeline is being created with fragment shader state,
511 * pMultisampleState must be a valid pointer to a valid
512 * VkPipelineMultisampleStateCreateInfo structure"
513 *
514 * It's also required for the fragment output interface.
515 */
516 key->multisample_fbo =
517 BITSET_TEST(dynamic, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) ?
518 INTEL_SOMETIMES :
519 ms->rasterization_samples > 1 ? INTEL_ALWAYS : INTEL_NEVER;
520 key->persample_interp =
521 BITSET_TEST(dynamic, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) ?
522 INTEL_SOMETIMES :
523 (ms->sample_shading_enable &&
524 (ms->min_sample_shading * ms->rasterization_samples) > 1) ?
525 INTEL_ALWAYS : INTEL_NEVER;
526 key->alpha_to_coverage =
527 BITSET_TEST(dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) ?
528 INTEL_SOMETIMES :
529 (ms->alpha_to_coverage_enable ? INTEL_ALWAYS : INTEL_NEVER);
530
531 /* TODO: We should make this dynamic */
532 if (device->physical->instance->sample_mask_out_opengl_behaviour)
533 key->ignore_sample_mask_out = !key->multisample_fbo;
534 } else {
535 /* Consider all inputs as valid until we look at the NIR variables. */
536 key->color_outputs_valid = (1u << MAX_RTS) - 1;
537 key->nr_color_regions = MAX_RTS;
538
539 key->alpha_to_coverage = INTEL_SOMETIMES;
540 key->multisample_fbo = INTEL_SOMETIMES;
541 key->persample_interp = INTEL_SOMETIMES;
542 }
543
544 key->mesh_input = is_mesh;
545
546 /* Vulkan doesn't support fixed-function alpha test */
547 key->alpha_test_replicate_alpha = false;
548
549 key->coarse_pixel =
550 device->vk.enabled_extensions.KHR_fragment_shading_rate &&
551 pipeline_has_coarse_pixel(dynamic, ms, fsr);
552
553 key->null_push_constant_tbimr_workaround =
554 device->info->needs_null_push_constant_tbimr_workaround;
555 }
556
557 static void
populate_cs_prog_key(struct anv_pipeline_stage * stage,const struct anv_device * device)558 populate_cs_prog_key(struct anv_pipeline_stage *stage,
559 const struct anv_device *device)
560 {
561 memset(&stage->key, 0, sizeof(stage->key));
562
563 populate_base_prog_key(stage, device);
564 }
565
566 static void
populate_bs_prog_key(struct anv_pipeline_stage * stage,const struct anv_device * device,uint32_t ray_flags)567 populate_bs_prog_key(struct anv_pipeline_stage *stage,
568 const struct anv_device *device,
569 uint32_t ray_flags)
570 {
571 memset(&stage->key, 0, sizeof(stage->key));
572
573 populate_base_prog_key(stage, device);
574
575 stage->key.bs.pipeline_ray_flags = ray_flags;
576 stage->key.bs.pipeline_ray_flags = ray_flags;
577 }
578
579 static void
anv_stage_write_shader_hash(struct anv_pipeline_stage * stage,const struct anv_device * device)580 anv_stage_write_shader_hash(struct anv_pipeline_stage *stage,
581 const struct anv_device *device)
582 {
583 vk_pipeline_robustness_state_fill(&device->vk,
584 &stage->rstate,
585 stage->pipeline_pNext,
586 stage->info->pNext);
587
588 vk_pipeline_hash_shader_stage(stage->pipeline_flags, stage->info,
589 &stage->rstate, stage->shader_sha1);
590
591 stage->robust_flags = anv_get_robust_flags(&stage->rstate);
592
593 /* Use lowest dword of source shader sha1 for shader hash. */
594 stage->source_hash = ((uint32_t*)stage->shader_sha1)[0];
595 }
596
597 static bool
anv_graphics_pipeline_stage_fragment_dynamic(const struct anv_pipeline_stage * stage)598 anv_graphics_pipeline_stage_fragment_dynamic(const struct anv_pipeline_stage *stage)
599 {
600 if (stage->stage != MESA_SHADER_FRAGMENT)
601 return false;
602
603 return stage->key.wm.persample_interp == INTEL_SOMETIMES ||
604 stage->key.wm.multisample_fbo == INTEL_SOMETIMES ||
605 stage->key.wm.alpha_to_coverage == INTEL_SOMETIMES;
606 }
607
608 static void
anv_pipeline_hash_common(struct mesa_sha1 * ctx,const struct anv_pipeline * pipeline)609 anv_pipeline_hash_common(struct mesa_sha1 *ctx,
610 const struct anv_pipeline *pipeline)
611 {
612 struct anv_device *device = pipeline->device;
613
614 _mesa_sha1_update(ctx, pipeline->layout.sha1, sizeof(pipeline->layout.sha1));
615
616 const bool indirect_descriptors = device->physical->indirect_descriptors;
617 _mesa_sha1_update(ctx, &indirect_descriptors, sizeof(indirect_descriptors));
618
619 const bool rba = device->robust_buffer_access;
620 _mesa_sha1_update(ctx, &rba, sizeof(rba));
621
622 const int spilling_rate = device->physical->compiler->spilling_rate;
623 _mesa_sha1_update(ctx, &spilling_rate, sizeof(spilling_rate));
624 }
625
626 static void
anv_pipeline_hash_graphics(struct anv_graphics_base_pipeline * pipeline,struct anv_pipeline_stage * stages,uint32_t view_mask,unsigned char * sha1_out)627 anv_pipeline_hash_graphics(struct anv_graphics_base_pipeline *pipeline,
628 struct anv_pipeline_stage *stages,
629 uint32_t view_mask,
630 unsigned char *sha1_out)
631 {
632 const struct anv_device *device = pipeline->base.device;
633 struct mesa_sha1 ctx;
634 _mesa_sha1_init(&ctx);
635
636 anv_pipeline_hash_common(&ctx, &pipeline->base);
637
638 _mesa_sha1_update(&ctx, &view_mask, sizeof(view_mask));
639
640 for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
641 if (pipeline->base.active_stages & BITFIELD_BIT(s)) {
642 _mesa_sha1_update(&ctx, stages[s].shader_sha1,
643 sizeof(stages[s].shader_sha1));
644 _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s));
645 }
646 }
647
648 if (stages[MESA_SHADER_MESH].info || stages[MESA_SHADER_TASK].info) {
649 const uint8_t afs = device->physical->instance->assume_full_subgroups;
650 _mesa_sha1_update(&ctx, &afs, sizeof(afs));
651 }
652
653 _mesa_sha1_final(&ctx, sha1_out);
654 }
655
656 static void
anv_pipeline_hash_compute(struct anv_compute_pipeline * pipeline,struct anv_pipeline_stage * stage,unsigned char * sha1_out)657 anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
658 struct anv_pipeline_stage *stage,
659 unsigned char *sha1_out)
660 {
661 const struct anv_device *device = pipeline->base.device;
662 struct mesa_sha1 ctx;
663 _mesa_sha1_init(&ctx);
664
665 anv_pipeline_hash_common(&ctx, &pipeline->base);
666
667 const uint8_t afs = device->physical->instance->assume_full_subgroups;
668 _mesa_sha1_update(&ctx, &afs, sizeof(afs));
669
670 const bool afswb = device->physical->instance->assume_full_subgroups_with_barrier;
671 _mesa_sha1_update(&ctx, &afswb, sizeof(afswb));
672
673 _mesa_sha1_update(&ctx, stage->shader_sha1,
674 sizeof(stage->shader_sha1));
675 _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
676
677 _mesa_sha1_final(&ctx, sha1_out);
678 }
679
680 static void
anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_stage * stage,unsigned char * sha1_out)681 anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline,
682 struct anv_pipeline_stage *stage,
683 unsigned char *sha1_out)
684 {
685 struct mesa_sha1 ctx;
686 _mesa_sha1_init(&ctx);
687
688 anv_pipeline_hash_common(&ctx, &pipeline->base);
689
690 _mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1));
691 _mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key.bs));
692
693 _mesa_sha1_final(&ctx, sha1_out);
694 }
695
696 static void
anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_stage * intersection,struct anv_pipeline_stage * any_hit,unsigned char * sha1_out)697 anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline,
698 struct anv_pipeline_stage *intersection,
699 struct anv_pipeline_stage *any_hit,
700 unsigned char *sha1_out)
701 {
702 struct mesa_sha1 ctx;
703 _mesa_sha1_init(&ctx);
704
705 _mesa_sha1_update(&ctx, pipeline->base.layout.sha1,
706 sizeof(pipeline->base.layout.sha1));
707
708 const bool rba = pipeline->base.device->robust_buffer_access;
709 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
710
711 _mesa_sha1_update(&ctx, intersection->shader_sha1, sizeof(intersection->shader_sha1));
712 _mesa_sha1_update(&ctx, &intersection->key, sizeof(intersection->key.bs));
713 _mesa_sha1_update(&ctx, any_hit->shader_sha1, sizeof(any_hit->shader_sha1));
714 _mesa_sha1_update(&ctx, &any_hit->key, sizeof(any_hit->key.bs));
715
716 _mesa_sha1_final(&ctx, sha1_out);
717 }
718
719 static VkResult
anv_pipeline_stage_get_nir(struct anv_pipeline * pipeline,struct vk_pipeline_cache * cache,void * mem_ctx,struct anv_pipeline_stage * stage)720 anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
721 struct vk_pipeline_cache *cache,
722 void *mem_ctx,
723 struct anv_pipeline_stage *stage)
724 {
725 const struct brw_compiler *compiler =
726 pipeline->device->physical->compiler;
727 const nir_shader_compiler_options *nir_options =
728 compiler->nir_options[stage->stage];
729
730 stage->nir = anv_device_search_for_nir(pipeline->device, cache,
731 nir_options,
732 stage->shader_sha1,
733 mem_ctx);
734 if (stage->nir) {
735 assert(stage->nir->info.stage == stage->stage);
736 return VK_SUCCESS;
737 }
738
739 /* VkPipelineShaderStageCreateInfo:
740 *
741 * "If a pipeline is not found, pipeline compilation is not possible and
742 * the implementation must fail as specified by
743 * VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT."
744 */
745 if (vk_pipeline_shader_stage_has_identifier(stage->info))
746 return VK_PIPELINE_COMPILE_REQUIRED;
747
748 stage->nir = anv_shader_stage_to_nir(pipeline->device,
749 stage->pipeline_flags, stage->info,
750 stage->key.base.robust_flags, mem_ctx);
751 if (stage->nir) {
752 anv_device_upload_nir(pipeline->device, cache,
753 stage->nir, stage->shader_sha1);
754 return VK_SUCCESS;
755 }
756
757 return vk_errorf(&pipeline->device->vk, VK_ERROR_UNKNOWN,
758 "Unable to load NIR");
759 }
760
761 static const struct vk_ycbcr_conversion_state *
lookup_ycbcr_conversion(const void * _sets_layout,uint32_t set,uint32_t binding,uint32_t array_index)762 lookup_ycbcr_conversion(const void *_sets_layout, uint32_t set,
763 uint32_t binding, uint32_t array_index)
764 {
765 const struct anv_pipeline_sets_layout *sets_layout = _sets_layout;
766
767 assert(set < MAX_SETS);
768 assert(binding < sets_layout->set[set].layout->binding_count);
769 const struct anv_descriptor_set_binding_layout *bind_layout =
770 &sets_layout->set[set].layout->binding[binding];
771
772 if (bind_layout->immutable_samplers == NULL)
773 return NULL;
774
775 array_index = MIN2(array_index, bind_layout->array_size - 1);
776
777 const struct anv_sampler *sampler =
778 bind_layout->immutable_samplers[array_index];
779
780 return sampler && sampler->vk.ycbcr_conversion ?
781 &sampler->vk.ycbcr_conversion->state : NULL;
782 }
783
784 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)785 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
786 {
787 assert(glsl_type_is_vector_or_scalar(type));
788
789 uint32_t comp_size = glsl_type_is_boolean(type)
790 ? 4 : glsl_get_bit_size(type) / 8;
791 unsigned length = glsl_get_vector_elements(type);
792 *size = comp_size * length,
793 *align = comp_size * (length == 3 ? 4 : length);
794 }
795
796 static enum anv_dynamic_push_bits
anv_nir_compute_dynamic_push_bits(nir_shader * shader)797 anv_nir_compute_dynamic_push_bits(nir_shader *shader)
798 {
799 enum anv_dynamic_push_bits ret = 0;
800
801 nir_foreach_function_impl(impl, shader) {
802 nir_foreach_block(block, impl) {
803 nir_foreach_instr(instr, block) {
804 if (instr->type != nir_instr_type_intrinsic)
805 continue;
806
807 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
808 if (intrin->intrinsic != nir_intrinsic_load_push_constant)
809 continue;
810
811 switch (nir_intrinsic_base(intrin)) {
812 case anv_drv_const_offset(gfx.tcs_input_vertices):
813 ret |= ANV_DYNAMIC_PUSH_INPUT_VERTICES;
814 break;
815
816 default:
817 break;
818 }
819 }
820 }
821 }
822
823 return ret;
824 }
825
826 static void
anv_fixup_subgroup_size(struct anv_device * device,struct shader_info * info)827 anv_fixup_subgroup_size(struct anv_device *device, struct shader_info *info)
828 {
829 switch (info->stage) {
830 case MESA_SHADER_COMPUTE:
831 case MESA_SHADER_TASK:
832 case MESA_SHADER_MESH:
833 break;
834 default:
835 return;
836 }
837
838 unsigned local_size = info->workgroup_size[0] *
839 info->workgroup_size[1] *
840 info->workgroup_size[2];
841
842 /* Games don't always request full subgroups when they should,
843 * which can cause bugs, as they may expect bigger size of the
844 * subgroup than we choose for the execution.
845 */
846 if (device->physical->instance->assume_full_subgroups &&
847 info->uses_wide_subgroup_intrinsics &&
848 info->subgroup_size == SUBGROUP_SIZE_API_CONSTANT &&
849 local_size &&
850 local_size % BRW_SUBGROUP_SIZE == 0)
851 info->subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
852
853 if (device->physical->instance->assume_full_subgroups_with_barrier &&
854 info->stage == MESA_SHADER_COMPUTE &&
855 device->info->verx10 <= 125 &&
856 info->uses_control_barrier &&
857 info->subgroup_size == SUBGROUP_SIZE_VARYING &&
858 local_size &&
859 local_size % BRW_SUBGROUP_SIZE == 0)
860 info->subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
861
862 /* If the client requests that we dispatch full subgroups but doesn't
863 * allow us to pick a subgroup size, we have to smash it to the API
864 * value of 32. Performance will likely be terrible in this case but
865 * there's nothing we can do about that. The client should have chosen
866 * a size.
867 */
868 if (info->subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS)
869 info->subgroup_size =
870 device->physical->instance->assume_full_subgroups != 0 ?
871 device->physical->instance->assume_full_subgroups : BRW_SUBGROUP_SIZE;
872
873 /* Cooperative matrix extension requires that all invocations in a subgroup
874 * be active. As a result, when the application does not request a specific
875 * subgroup size, we must use SIMD32.
876 */
877 if (info->stage == MESA_SHADER_COMPUTE && info->cs.has_cooperative_matrix &&
878 info->subgroup_size < SUBGROUP_SIZE_REQUIRE_8) {
879 info->subgroup_size = BRW_SUBGROUP_SIZE;
880 }
881 }
882
883 /* #define DEBUG_PRINTF_EXAMPLE 0 */
884
885 #if DEBUG_PRINTF_EXAMPLE
886 static bool
print_ubo_load(nir_builder * b,nir_intrinsic_instr * intrin,UNUSED void * cb_data)887 print_ubo_load(nir_builder *b,
888 nir_intrinsic_instr *intrin,
889 UNUSED void *cb_data)
890 {
891 if (intrin->intrinsic != nir_intrinsic_load_uniform)
892 return false;
893
894 b->cursor = nir_after_instr(&intrin->instr);
895 nir_printf_fmt(b, true, 64,
896 "uniform<= pos=%02.2fx%02.2f offset=0x%08x val=0x%08x\n",
897 nir_channel(b, nir_load_frag_coord(b), 0),
898 nir_channel(b, nir_load_frag_coord(b), 1),
899 intrin->src[0].ssa,
900 &intrin->def);
901 return true;
902 }
903 #endif
904
905 static void
anv_pipeline_lower_nir(struct anv_pipeline * pipeline,void * mem_ctx,struct anv_pipeline_stage * stage,struct anv_pipeline_sets_layout * layout,uint32_t view_mask,bool use_primitive_replication)906 anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
907 void *mem_ctx,
908 struct anv_pipeline_stage *stage,
909 struct anv_pipeline_sets_layout *layout,
910 uint32_t view_mask,
911 bool use_primitive_replication)
912 {
913 const struct anv_physical_device *pdevice = pipeline->device->physical;
914 const struct brw_compiler *compiler = pdevice->compiler;
915
916 struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
917 nir_shader *nir = stage->nir;
918
919 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
920 NIR_PASS(_, nir, nir_lower_wpos_center);
921 NIR_PASS(_, nir, nir_lower_input_attachments,
922 &(nir_input_attachment_options) {
923 .use_fragcoord_sysval = true,
924 .use_layer_id_sysval = true,
925 });
926 }
927
928 if (nir->info.stage == MESA_SHADER_MESH ||
929 nir->info.stage == MESA_SHADER_TASK) {
930 nir_lower_compute_system_values_options options = {
931 .lower_workgroup_id_to_index = true,
932 /* nir_lower_idiv generates expensive code */
933 .shortcut_1d_workgroup_id = compiler->devinfo->verx10 >= 125,
934 };
935
936 NIR_PASS(_, nir, nir_lower_compute_system_values, &options);
937 }
938
939 NIR_PASS(_, nir, nir_vk_lower_ycbcr_tex, lookup_ycbcr_conversion, layout);
940
941 if (pipeline->type == ANV_PIPELINE_GRAPHICS ||
942 pipeline->type == ANV_PIPELINE_GRAPHICS_LIB) {
943 NIR_PASS(_, nir, anv_nir_lower_multiview, view_mask,
944 use_primitive_replication);
945 }
946
947 if (nir->info.stage == MESA_SHADER_COMPUTE && nir->info.cs.has_cooperative_matrix) {
948 anv_fixup_subgroup_size(pipeline->device, &nir->info);
949 NIR_PASS(_, nir, brw_nir_lower_cmat, nir->info.subgroup_size);
950 NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_function_temp, 16);
951 }
952
953 /* The patch control points are delivered through a push constant when
954 * dynamic.
955 */
956 if (nir->info.stage == MESA_SHADER_TESS_CTRL &&
957 stage->key.tcs.input_vertices == 0)
958 NIR_PASS(_, nir, anv_nir_lower_load_patch_vertices_in);
959
960 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
961
962 NIR_PASS(_, nir, brw_nir_lower_storage_image,
963 &(struct brw_nir_lower_storage_image_opts) {
964 /* Anv only supports Gfx9+ which has better defined typed read
965 * behavior. It allows us to only have to care about lowering
966 * loads.
967 */
968 .devinfo = compiler->devinfo,
969 .lower_loads = true,
970 });
971
972 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
973 nir_address_format_64bit_global);
974 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const,
975 nir_address_format_32bit_offset);
976
977 NIR_PASS(_, nir, brw_nir_lower_ray_queries, &pdevice->info);
978
979 stage->push_desc_info.used_descriptors =
980 anv_nir_compute_used_push_descriptors(nir, layout);
981
982 struct anv_pipeline_push_map push_map = {};
983
984 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
985 NIR_PASS_V(nir, anv_nir_apply_pipeline_layout,
986 pdevice, stage->key.base.robust_flags,
987 layout->independent_sets,
988 layout, &stage->bind_map, &push_map, mem_ctx);
989
990 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
991 anv_nir_ubo_addr_format(pdevice, stage->key.base.robust_flags));
992 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo,
993 anv_nir_ssbo_addr_format(pdevice, stage->key.base.robust_flags));
994
995 /* First run copy-prop to get rid of all of the vec() that address
996 * calculations often create and then constant-fold so that, when we
997 * get to anv_nir_lower_ubo_loads, we can detect constant offsets.
998 */
999 bool progress;
1000 do {
1001 progress = false;
1002 NIR_PASS(progress, nir, nir_opt_algebraic);
1003 NIR_PASS(progress, nir, nir_copy_prop);
1004 NIR_PASS(progress, nir, nir_opt_constant_folding);
1005 NIR_PASS(progress, nir, nir_opt_dce);
1006 } while (progress);
1007
1008 /* Needed for anv_nir_lower_ubo_loads. */
1009 nir_divergence_analysis(nir);
1010
1011 NIR_PASS(_, nir, anv_nir_lower_ubo_loads);
1012
1013 enum nir_lower_non_uniform_access_type lower_non_uniform_access_types =
1014 nir_lower_non_uniform_texture_access |
1015 nir_lower_non_uniform_image_access |
1016 nir_lower_non_uniform_get_ssbo_size;
1017
1018 /* In practice, most shaders do not have non-uniform-qualified
1019 * accesses (see
1020 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17558#note_1475069)
1021 * thus a cheaper and likely to fail check is run first.
1022 */
1023 if (nir_has_non_uniform_access(nir, lower_non_uniform_access_types)) {
1024 NIR_PASS(_, nir, nir_opt_non_uniform_access);
1025
1026 /* We don't support non-uniform UBOs and non-uniform SSBO access is
1027 * handled naturally by falling back to A64 messages.
1028 */
1029 NIR_PASS(_, nir, nir_lower_non_uniform_access,
1030 &(nir_lower_non_uniform_access_options) {
1031 .types = lower_non_uniform_access_types,
1032 .callback = NULL,
1033 });
1034
1035 NIR_PASS(_, nir, intel_nir_lower_non_uniform_resource_intel);
1036 NIR_PASS(_, nir, intel_nir_cleanup_resource_intel);
1037 NIR_PASS(_, nir, nir_opt_dce);
1038 }
1039
1040 NIR_PASS_V(nir, anv_nir_update_resource_intel_block);
1041
1042 stage->dynamic_push_values = anv_nir_compute_dynamic_push_bits(nir);
1043
1044 NIR_PASS_V(nir, anv_nir_compute_push_layout,
1045 pdevice, stage->key.base.robust_flags,
1046 anv_graphics_pipeline_stage_fragment_dynamic(stage),
1047 prog_data, &stage->bind_map, &push_map,
1048 pipeline->layout.type, mem_ctx);
1049
1050 NIR_PASS_V(nir, anv_nir_lower_resource_intel, pdevice,
1051 pipeline->layout.type);
1052
1053 if (gl_shader_stage_uses_workgroup(nir->info.stage)) {
1054 if (!nir->info.shared_memory_explicit_layout) {
1055 NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
1056 nir_var_mem_shared, shared_type_info);
1057 }
1058
1059 NIR_PASS(_, nir, nir_lower_explicit_io,
1060 nir_var_mem_shared, nir_address_format_32bit_offset);
1061
1062 if (nir->info.zero_initialize_shared_memory &&
1063 nir->info.shared_size > 0) {
1064 /* The effective Shared Local Memory size is at least 1024 bytes and
1065 * is always rounded to a power of two, so it is OK to align the size
1066 * used by the shader to chunk_size -- which does simplify the logic.
1067 */
1068 const unsigned chunk_size = 16;
1069 const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
1070 assert(shared_size <=
1071 intel_compute_slm_calculate_size(compiler->devinfo->ver, nir->info.shared_size));
1072
1073 NIR_PASS(_, nir, nir_zero_initialize_shared_memory,
1074 shared_size, chunk_size);
1075 }
1076 }
1077
1078 if (gl_shader_stage_is_compute(nir->info.stage) ||
1079 gl_shader_stage_is_mesh(nir->info.stage)) {
1080 NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics, compiler->devinfo,
1081 &stage->prog_data.cs);
1082 }
1083
1084 stage->push_desc_info.used_set_buffer =
1085 anv_nir_loads_push_desc_buffer(nir, layout, &stage->bind_map);
1086 stage->push_desc_info.fully_promoted_ubo_descriptors =
1087 anv_nir_push_desc_ubo_fully_promoted(nir, layout, &stage->bind_map);
1088
1089 #if DEBUG_PRINTF_EXAMPLE
1090 if (stage->stage == MESA_SHADER_FRAGMENT) {
1091 nir_shader_intrinsics_pass(nir, print_ubo_load,
1092 nir_metadata_none,
1093 NULL);
1094 }
1095 #endif
1096
1097 stage->nir = nir;
1098 }
1099
1100 static void
anv_pipeline_link_vs(const struct brw_compiler * compiler,struct anv_pipeline_stage * vs_stage,struct anv_pipeline_stage * next_stage)1101 anv_pipeline_link_vs(const struct brw_compiler *compiler,
1102 struct anv_pipeline_stage *vs_stage,
1103 struct anv_pipeline_stage *next_stage)
1104 {
1105 if (next_stage)
1106 brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir);
1107 }
1108
1109 static void
anv_pipeline_compile_vs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_graphics_base_pipeline * pipeline,struct anv_pipeline_stage * vs_stage,uint32_t view_mask,char ** error_str)1110 anv_pipeline_compile_vs(const struct brw_compiler *compiler,
1111 void *mem_ctx,
1112 struct anv_graphics_base_pipeline *pipeline,
1113 struct anv_pipeline_stage *vs_stage,
1114 uint32_t view_mask,
1115 char **error_str)
1116 {
1117 /* When using Primitive Replication for multiview, each view gets its own
1118 * position slot.
1119 */
1120 uint32_t pos_slots =
1121 (vs_stage->nir->info.per_view_outputs & VARYING_BIT_POS) ?
1122 MAX2(1, util_bitcount(view_mask)) : 1;
1123
1124 /* Only position is allowed to be per-view */
1125 assert(!(vs_stage->nir->info.per_view_outputs & ~VARYING_BIT_POS));
1126
1127 brw_compute_vue_map(compiler->devinfo,
1128 &vs_stage->prog_data.vs.base.vue_map,
1129 vs_stage->nir->info.outputs_written,
1130 vs_stage->nir->info.separate_shader,
1131 pos_slots);
1132
1133 vs_stage->num_stats = 1;
1134
1135 struct brw_compile_vs_params params = {
1136 .base = {
1137 .nir = vs_stage->nir,
1138 .stats = vs_stage->stats,
1139 .log_data = pipeline->base.device,
1140 .mem_ctx = mem_ctx,
1141 .source_hash = vs_stage->source_hash,
1142 },
1143 .key = &vs_stage->key.vs,
1144 .prog_data = &vs_stage->prog_data.vs,
1145 };
1146
1147 vs_stage->code = brw_compile_vs(compiler, ¶ms);
1148 *error_str = params.base.error_str;
1149 }
1150
1151 static void
merge_tess_info(struct shader_info * tes_info,const struct shader_info * tcs_info)1152 merge_tess_info(struct shader_info *tes_info,
1153 const struct shader_info *tcs_info)
1154 {
1155 /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
1156 *
1157 * "PointMode. Controls generation of points rather than triangles
1158 * or lines. This functionality defaults to disabled, and is
1159 * enabled if either shader stage includes the execution mode.
1160 *
1161 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
1162 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
1163 * and OutputVertices, it says:
1164 *
1165 * "One mode must be set in at least one of the tessellation
1166 * shader stages."
1167 *
1168 * So, the fields can be set in either the TCS or TES, but they must
1169 * agree if set in both. Our backend looks at TES, so bitwise-or in
1170 * the values from the TCS.
1171 */
1172 assert(tcs_info->tess.tcs_vertices_out == 0 ||
1173 tes_info->tess.tcs_vertices_out == 0 ||
1174 tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
1175 tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
1176
1177 assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
1178 tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
1179 tcs_info->tess.spacing == tes_info->tess.spacing);
1180 tes_info->tess.spacing |= tcs_info->tess.spacing;
1181
1182 assert(tcs_info->tess._primitive_mode == 0 ||
1183 tes_info->tess._primitive_mode == 0 ||
1184 tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
1185 tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
1186 tes_info->tess.ccw |= tcs_info->tess.ccw;
1187 tes_info->tess.point_mode |= tcs_info->tess.point_mode;
1188 }
1189
1190 static void
anv_pipeline_link_tcs(const struct brw_compiler * compiler,struct anv_pipeline_stage * tcs_stage,struct anv_pipeline_stage * tes_stage)1191 anv_pipeline_link_tcs(const struct brw_compiler *compiler,
1192 struct anv_pipeline_stage *tcs_stage,
1193 struct anv_pipeline_stage *tes_stage)
1194 {
1195 assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
1196
1197 brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
1198
1199 nir_lower_patch_vertices(tes_stage->nir,
1200 tcs_stage->nir->info.tess.tcs_vertices_out,
1201 NULL);
1202
1203 /* Copy TCS info into the TES info */
1204 merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
1205
1206 /* Whacking the key after cache lookup is a bit sketchy, but all of
1207 * this comes from the SPIR-V, which is part of the hash used for the
1208 * pipeline cache. So it should be safe.
1209 */
1210 tcs_stage->key.tcs._tes_primitive_mode =
1211 tes_stage->nir->info.tess._primitive_mode;
1212 }
1213
1214 static void
anv_pipeline_compile_tcs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * tcs_stage,struct anv_pipeline_stage * prev_stage,char ** error_str)1215 anv_pipeline_compile_tcs(const struct brw_compiler *compiler,
1216 void *mem_ctx,
1217 struct anv_device *device,
1218 struct anv_pipeline_stage *tcs_stage,
1219 struct anv_pipeline_stage *prev_stage,
1220 char **error_str)
1221 {
1222 tcs_stage->key.tcs.outputs_written =
1223 tcs_stage->nir->info.outputs_written;
1224 tcs_stage->key.tcs.patch_outputs_written =
1225 tcs_stage->nir->info.patch_outputs_written;
1226
1227 tcs_stage->num_stats = 1;
1228
1229 struct brw_compile_tcs_params params = {
1230 .base = {
1231 .nir = tcs_stage->nir,
1232 .stats = tcs_stage->stats,
1233 .log_data = device,
1234 .mem_ctx = mem_ctx,
1235 .source_hash = tcs_stage->source_hash,
1236 },
1237 .key = &tcs_stage->key.tcs,
1238 .prog_data = &tcs_stage->prog_data.tcs,
1239 };
1240
1241 tcs_stage->code = brw_compile_tcs(compiler, ¶ms);
1242 *error_str = params.base.error_str;
1243 }
1244
1245 static void
anv_pipeline_link_tes(const struct brw_compiler * compiler,struct anv_pipeline_stage * tes_stage,struct anv_pipeline_stage * next_stage)1246 anv_pipeline_link_tes(const struct brw_compiler *compiler,
1247 struct anv_pipeline_stage *tes_stage,
1248 struct anv_pipeline_stage *next_stage)
1249 {
1250 if (next_stage)
1251 brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir);
1252 }
1253
1254 static void
anv_pipeline_compile_tes(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * tes_stage,struct anv_pipeline_stage * tcs_stage,char ** error_str)1255 anv_pipeline_compile_tes(const struct brw_compiler *compiler,
1256 void *mem_ctx,
1257 struct anv_device *device,
1258 struct anv_pipeline_stage *tes_stage,
1259 struct anv_pipeline_stage *tcs_stage,
1260 char **error_str)
1261 {
1262 tes_stage->key.tes.inputs_read =
1263 tcs_stage->nir->info.outputs_written;
1264 tes_stage->key.tes.patch_inputs_read =
1265 tcs_stage->nir->info.patch_outputs_written;
1266
1267 tes_stage->num_stats = 1;
1268
1269 struct brw_compile_tes_params params = {
1270 .base = {
1271 .nir = tes_stage->nir,
1272 .stats = tes_stage->stats,
1273 .log_data = device,
1274 .mem_ctx = mem_ctx,
1275 .source_hash = tes_stage->source_hash,
1276 },
1277 .key = &tes_stage->key.tes,
1278 .prog_data = &tes_stage->prog_data.tes,
1279 .input_vue_map = &tcs_stage->prog_data.tcs.base.vue_map,
1280 };
1281
1282 tes_stage->code = brw_compile_tes(compiler, ¶ms);
1283 *error_str = params.base.error_str;
1284 }
1285
1286 static void
anv_pipeline_link_gs(const struct brw_compiler * compiler,struct anv_pipeline_stage * gs_stage,struct anv_pipeline_stage * next_stage)1287 anv_pipeline_link_gs(const struct brw_compiler *compiler,
1288 struct anv_pipeline_stage *gs_stage,
1289 struct anv_pipeline_stage *next_stage)
1290 {
1291 if (next_stage)
1292 brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir);
1293 }
1294
1295 static void
anv_pipeline_compile_gs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * gs_stage,struct anv_pipeline_stage * prev_stage,char ** error_str)1296 anv_pipeline_compile_gs(const struct brw_compiler *compiler,
1297 void *mem_ctx,
1298 struct anv_device *device,
1299 struct anv_pipeline_stage *gs_stage,
1300 struct anv_pipeline_stage *prev_stage,
1301 char **error_str)
1302 {
1303 brw_compute_vue_map(compiler->devinfo,
1304 &gs_stage->prog_data.gs.base.vue_map,
1305 gs_stage->nir->info.outputs_written,
1306 gs_stage->nir->info.separate_shader, 1);
1307
1308 gs_stage->num_stats = 1;
1309
1310 struct brw_compile_gs_params params = {
1311 .base = {
1312 .nir = gs_stage->nir,
1313 .stats = gs_stage->stats,
1314 .log_data = device,
1315 .mem_ctx = mem_ctx,
1316 .source_hash = gs_stage->source_hash,
1317 },
1318 .key = &gs_stage->key.gs,
1319 .prog_data = &gs_stage->prog_data.gs,
1320 };
1321
1322 gs_stage->code = brw_compile_gs(compiler, ¶ms);
1323 *error_str = params.base.error_str;
1324 }
1325
1326 static void
anv_pipeline_link_task(const struct brw_compiler * compiler,struct anv_pipeline_stage * task_stage,struct anv_pipeline_stage * next_stage)1327 anv_pipeline_link_task(const struct brw_compiler *compiler,
1328 struct anv_pipeline_stage *task_stage,
1329 struct anv_pipeline_stage *next_stage)
1330 {
1331 assert(next_stage);
1332 assert(next_stage->stage == MESA_SHADER_MESH);
1333 brw_nir_link_shaders(compiler, task_stage->nir, next_stage->nir);
1334 }
1335
1336 static void
anv_pipeline_compile_task(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * task_stage,char ** error_str)1337 anv_pipeline_compile_task(const struct brw_compiler *compiler,
1338 void *mem_ctx,
1339 struct anv_device *device,
1340 struct anv_pipeline_stage *task_stage,
1341 char **error_str)
1342 {
1343 task_stage->num_stats = 1;
1344
1345 struct brw_compile_task_params params = {
1346 .base = {
1347 .nir = task_stage->nir,
1348 .stats = task_stage->stats,
1349 .log_data = device,
1350 .mem_ctx = mem_ctx,
1351 .source_hash = task_stage->source_hash,
1352 },
1353 .key = &task_stage->key.task,
1354 .prog_data = &task_stage->prog_data.task,
1355 };
1356
1357 task_stage->code = brw_compile_task(compiler, ¶ms);
1358 *error_str = params.base.error_str;
1359 }
1360
1361 static void
anv_pipeline_link_mesh(const struct brw_compiler * compiler,struct anv_pipeline_stage * mesh_stage,struct anv_pipeline_stage * next_stage)1362 anv_pipeline_link_mesh(const struct brw_compiler *compiler,
1363 struct anv_pipeline_stage *mesh_stage,
1364 struct anv_pipeline_stage *next_stage)
1365 {
1366 if (next_stage) {
1367 brw_nir_link_shaders(compiler, mesh_stage->nir, next_stage->nir);
1368 }
1369 }
1370
1371 static void
anv_pipeline_compile_mesh(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * mesh_stage,struct anv_pipeline_stage * prev_stage,char ** error_str)1372 anv_pipeline_compile_mesh(const struct brw_compiler *compiler,
1373 void *mem_ctx,
1374 struct anv_device *device,
1375 struct anv_pipeline_stage *mesh_stage,
1376 struct anv_pipeline_stage *prev_stage,
1377 char **error_str)
1378 {
1379 mesh_stage->num_stats = 1;
1380
1381 struct brw_compile_mesh_params params = {
1382 .base = {
1383 .nir = mesh_stage->nir,
1384 .stats = mesh_stage->stats,
1385 .log_data = device,
1386 .mem_ctx = mem_ctx,
1387 .source_hash = mesh_stage->source_hash,
1388 },
1389 .key = &mesh_stage->key.mesh,
1390 .prog_data = &mesh_stage->prog_data.mesh,
1391 };
1392
1393 if (prev_stage) {
1394 assert(prev_stage->stage == MESA_SHADER_TASK);
1395 params.tue_map = &prev_stage->prog_data.task.map;
1396 }
1397
1398 mesh_stage->code = brw_compile_mesh(compiler, ¶ms);
1399 *error_str = params.base.error_str;
1400 }
1401
1402 static void
anv_pipeline_link_fs(const struct brw_compiler * compiler,struct anv_pipeline_stage * stage,const struct vk_render_pass_state * rp)1403 anv_pipeline_link_fs(const struct brw_compiler *compiler,
1404 struct anv_pipeline_stage *stage,
1405 const struct vk_render_pass_state *rp)
1406 {
1407 /* Initially the valid outputs value is set to all possible render targets
1408 * valid (see populate_wm_prog_key()), before we look at the shader
1409 * variables. Here we look at the output variables of the shader an compute
1410 * a correct number of render target outputs.
1411 */
1412 stage->key.wm.color_outputs_valid = 0;
1413 nir_foreach_shader_out_variable_safe(var, stage->nir) {
1414 if (var->data.location < FRAG_RESULT_DATA0)
1415 continue;
1416
1417 const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
1418 const unsigned array_len =
1419 glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
1420 assert(rt + array_len <= MAX_RTS);
1421
1422 stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len);
1423 }
1424 stage->key.wm.color_outputs_valid &= rp_color_mask(rp);
1425 stage->key.wm.nr_color_regions =
1426 util_last_bit(stage->key.wm.color_outputs_valid);
1427
1428 unsigned num_rt_bindings;
1429 struct anv_pipeline_binding rt_bindings[MAX_RTS];
1430 if (stage->key.wm.nr_color_regions > 0) {
1431 assert(stage->key.wm.nr_color_regions <= MAX_RTS);
1432 for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) {
1433 if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) {
1434 rt_bindings[rt] = (struct anv_pipeline_binding) {
1435 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1436 .index = rt,
1437 .binding = UINT32_MAX,
1438
1439 };
1440 } else {
1441 /* Setup a null render target */
1442 rt_bindings[rt] = (struct anv_pipeline_binding) {
1443 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1444 .index = ANV_COLOR_OUTPUT_UNUSED,
1445 .binding = UINT32_MAX,
1446 };
1447 }
1448 }
1449 num_rt_bindings = stage->key.wm.nr_color_regions;
1450 } else if (brw_nir_fs_needs_null_rt(
1451 compiler->devinfo, stage->nir,
1452 stage->key.wm.multisample_fbo != INTEL_NEVER,
1453 stage->key.wm.alpha_to_coverage != INTEL_NEVER)) {
1454 /* Setup a null render target */
1455 rt_bindings[0] = (struct anv_pipeline_binding) {
1456 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1457 .index = ANV_COLOR_OUTPUT_DISABLED,
1458 .binding = UINT32_MAX,
1459 };
1460 num_rt_bindings = 1;
1461 } else {
1462 num_rt_bindings = 0;
1463 }
1464
1465 assert(num_rt_bindings <= MAX_RTS);
1466 assert(stage->bind_map.surface_count == 0);
1467 typed_memcpy(stage->bind_map.surface_to_descriptor,
1468 rt_bindings, num_rt_bindings);
1469 stage->bind_map.surface_count += num_rt_bindings;
1470 }
1471
1472 static void
anv_pipeline_compile_fs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * fs_stage,struct anv_pipeline_stage * prev_stage,struct anv_graphics_base_pipeline * pipeline,uint32_t view_mask,bool use_primitive_replication,char ** error_str)1473 anv_pipeline_compile_fs(const struct brw_compiler *compiler,
1474 void *mem_ctx,
1475 struct anv_device *device,
1476 struct anv_pipeline_stage *fs_stage,
1477 struct anv_pipeline_stage *prev_stage,
1478 struct anv_graphics_base_pipeline *pipeline,
1479 uint32_t view_mask,
1480 bool use_primitive_replication,
1481 char **error_str)
1482 {
1483 /* When using Primitive Replication for multiview, each view gets its own
1484 * position slot.
1485 */
1486 uint32_t pos_slots = use_primitive_replication ?
1487 MAX2(1, util_bitcount(view_mask)) : 1;
1488
1489 /* If we have a previous stage we can use that to deduce valid slots.
1490 * Otherwise, rely on inputs of the input shader.
1491 */
1492 if (prev_stage) {
1493 fs_stage->key.wm.input_slots_valid =
1494 prev_stage->prog_data.vue.vue_map.slots_valid;
1495 } else {
1496 struct intel_vue_map prev_vue_map;
1497 brw_compute_vue_map(compiler->devinfo,
1498 &prev_vue_map,
1499 fs_stage->nir->info.inputs_read,
1500 fs_stage->nir->info.separate_shader,
1501 pos_slots);
1502
1503 fs_stage->key.wm.input_slots_valid = prev_vue_map.slots_valid;
1504 }
1505
1506 struct brw_compile_fs_params params = {
1507 .base = {
1508 .nir = fs_stage->nir,
1509 .stats = fs_stage->stats,
1510 .log_data = device,
1511 .mem_ctx = mem_ctx,
1512 .source_hash = fs_stage->source_hash,
1513 },
1514 .key = &fs_stage->key.wm,
1515 .prog_data = &fs_stage->prog_data.wm,
1516
1517 .allow_spilling = true,
1518 .max_polygons = UCHAR_MAX,
1519 };
1520
1521 if (prev_stage && prev_stage->stage == MESA_SHADER_MESH) {
1522 params.mue_map = &prev_stage->prog_data.mesh.map;
1523 /* TODO(mesh): Slots valid, do we even use/rely on it? */
1524 }
1525
1526 fs_stage->code = brw_compile_fs(compiler, ¶ms);
1527 *error_str = params.base.error_str;
1528
1529 fs_stage->num_stats = (uint32_t)!!fs_stage->prog_data.wm.dispatch_multi +
1530 (uint32_t)fs_stage->prog_data.wm.dispatch_8 +
1531 (uint32_t)fs_stage->prog_data.wm.dispatch_16 +
1532 (uint32_t)fs_stage->prog_data.wm.dispatch_32;
1533 assert(fs_stage->num_stats <= ARRAY_SIZE(fs_stage->stats));
1534 }
1535
1536 static void
anv_pipeline_add_executable(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage,struct brw_compile_stats * stats,uint32_t code_offset)1537 anv_pipeline_add_executable(struct anv_pipeline *pipeline,
1538 struct anv_pipeline_stage *stage,
1539 struct brw_compile_stats *stats,
1540 uint32_t code_offset)
1541 {
1542 char *nir = NULL;
1543 if (stage->nir &&
1544 (pipeline->flags &
1545 VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1546 nir = nir_shader_as_str(stage->nir, pipeline->mem_ctx);
1547 }
1548
1549 char *disasm = NULL;
1550 if (stage->code &&
1551 (pipeline->flags &
1552 VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1553 char *stream_data = NULL;
1554 size_t stream_size = 0;
1555 FILE *stream = open_memstream(&stream_data, &stream_size);
1556
1557 uint32_t push_size = 0;
1558 for (unsigned i = 0; i < 4; i++)
1559 push_size += stage->bind_map.push_ranges[i].length;
1560 if (push_size > 0) {
1561 fprintf(stream, "Push constant ranges:\n");
1562 for (unsigned i = 0; i < 4; i++) {
1563 if (stage->bind_map.push_ranges[i].length == 0)
1564 continue;
1565
1566 fprintf(stream, " RANGE%d (%dB): ", i,
1567 stage->bind_map.push_ranges[i].length * 32);
1568
1569 switch (stage->bind_map.push_ranges[i].set) {
1570 case ANV_DESCRIPTOR_SET_NULL:
1571 fprintf(stream, "NULL");
1572 break;
1573
1574 case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
1575 fprintf(stream, "Vulkan push constants and API params");
1576 break;
1577
1578 case ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER:
1579 fprintf(stream, "Descriptor buffer (desc buffer) for set %d (start=%dB)",
1580 stage->bind_map.push_ranges[i].index,
1581 stage->bind_map.push_ranges[i].start * 32);
1582 break;
1583
1584 case ANV_DESCRIPTOR_SET_DESCRIPTORS:
1585 fprintf(stream, "Descriptor buffer for set %d (start=%dB)",
1586 stage->bind_map.push_ranges[i].index,
1587 stage->bind_map.push_ranges[i].start * 32);
1588 break;
1589
1590 case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
1591 unreachable("Color attachments can't be pushed");
1592
1593 default:
1594 fprintf(stream, "UBO (set=%d binding=%d start=%dB)",
1595 stage->bind_map.push_ranges[i].set,
1596 stage->bind_map.push_ranges[i].index,
1597 stage->bind_map.push_ranges[i].start * 32);
1598 break;
1599 }
1600 fprintf(stream, "\n");
1601 }
1602 fprintf(stream, "\n");
1603 }
1604
1605 /* Creating this is far cheaper than it looks. It's perfectly fine to
1606 * do it for every binary.
1607 */
1608 brw_disassemble_with_errors(&pipeline->device->physical->compiler->isa,
1609 stage->code, code_offset, stream);
1610
1611 fclose(stream);
1612
1613 /* Copy it to a ralloc'd thing */
1614 disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1);
1615 memcpy(disasm, stream_data, stream_size);
1616 disasm[stream_size] = 0;
1617
1618 free(stream_data);
1619 }
1620
1621 const struct anv_pipeline_executable exe = {
1622 .stage = stage->stage,
1623 .stats = *stats,
1624 .nir = nir,
1625 .disasm = disasm,
1626 };
1627 util_dynarray_append(&pipeline->executables,
1628 struct anv_pipeline_executable, exe);
1629 }
1630
1631 static void
anv_pipeline_add_executables(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage)1632 anv_pipeline_add_executables(struct anv_pipeline *pipeline,
1633 struct anv_pipeline_stage *stage)
1634 {
1635 if (stage->stage == MESA_SHADER_FRAGMENT) {
1636 /* We pull the prog data and stats out of the anv_shader_bin because
1637 * the anv_pipeline_stage may not be fully populated if we successfully
1638 * looked up the shader in a cache.
1639 */
1640 const struct brw_wm_prog_data *wm_prog_data =
1641 (const struct brw_wm_prog_data *)stage->bin->prog_data;
1642 struct brw_compile_stats *stats = stage->bin->stats;
1643
1644 if (wm_prog_data->dispatch_8 ||
1645 wm_prog_data->dispatch_multi) {
1646 anv_pipeline_add_executable(pipeline, stage, stats++, 0);
1647 }
1648
1649 if (wm_prog_data->dispatch_16) {
1650 anv_pipeline_add_executable(pipeline, stage, stats++,
1651 wm_prog_data->prog_offset_16);
1652 }
1653
1654 if (wm_prog_data->dispatch_32) {
1655 anv_pipeline_add_executable(pipeline, stage, stats++,
1656 wm_prog_data->prog_offset_32);
1657 }
1658 } else {
1659 anv_pipeline_add_executable(pipeline, stage, stage->bin->stats, 0);
1660 }
1661 }
1662
1663 static void
anv_pipeline_account_shader(struct anv_pipeline * pipeline,struct anv_shader_bin * shader)1664 anv_pipeline_account_shader(struct anv_pipeline *pipeline,
1665 struct anv_shader_bin *shader)
1666 {
1667 pipeline->scratch_size = MAX2(pipeline->scratch_size,
1668 shader->prog_data->total_scratch);
1669
1670 pipeline->ray_queries = MAX2(pipeline->ray_queries,
1671 shader->prog_data->ray_queries);
1672
1673 if (shader->push_desc_info.used_set_buffer) {
1674 pipeline->use_push_descriptor_buffer |=
1675 mesa_to_vk_shader_stage(shader->stage);
1676 }
1677 if (shader->push_desc_info.used_descriptors &
1678 ~shader->push_desc_info.fully_promoted_ubo_descriptors)
1679 pipeline->use_push_descriptor |= mesa_to_vk_shader_stage(shader->stage);
1680 }
1681
1682 /* This function return true if a shader should not be looked at because of
1683 * fast linking. Instead we should use the shader binaries provided by
1684 * libraries.
1685 */
1686 static bool
anv_graphics_pipeline_skip_shader_compile(struct anv_graphics_base_pipeline * pipeline,struct anv_pipeline_stage * stages,bool link_optimize,gl_shader_stage stage)1687 anv_graphics_pipeline_skip_shader_compile(struct anv_graphics_base_pipeline *pipeline,
1688 struct anv_pipeline_stage *stages,
1689 bool link_optimize,
1690 gl_shader_stage stage)
1691 {
1692 /* Always skip non active stages */
1693 if (!anv_pipeline_base_has_stage(pipeline, stage))
1694 return true;
1695
1696 /* When link optimizing, consider all stages */
1697 if (link_optimize)
1698 return false;
1699
1700 /* Otherwise check if the stage was specified through
1701 * VkGraphicsPipelineCreateInfo
1702 */
1703 assert(stages[stage].info != NULL || stages[stage].imported.bin != NULL);
1704 return stages[stage].info == NULL;
1705 }
1706
1707 static void
anv_graphics_pipeline_init_keys(struct anv_graphics_base_pipeline * pipeline,const struct vk_graphics_pipeline_state * state,struct anv_pipeline_stage * stages)1708 anv_graphics_pipeline_init_keys(struct anv_graphics_base_pipeline *pipeline,
1709 const struct vk_graphics_pipeline_state *state,
1710 struct anv_pipeline_stage *stages)
1711 {
1712 for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
1713 if (!anv_pipeline_base_has_stage(pipeline, s))
1714 continue;
1715
1716 int64_t stage_start = os_time_get_nano();
1717
1718 const struct anv_device *device = pipeline->base.device;
1719 switch (stages[s].stage) {
1720 case MESA_SHADER_VERTEX:
1721 populate_vs_prog_key(&stages[s], device);
1722 break;
1723 case MESA_SHADER_TESS_CTRL:
1724 populate_tcs_prog_key(&stages[s],
1725 device,
1726 BITSET_TEST(state->dynamic,
1727 MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS) ?
1728 0 : state->ts->patch_control_points);
1729 break;
1730 case MESA_SHADER_TESS_EVAL:
1731 populate_tes_prog_key(&stages[s], device);
1732 break;
1733 case MESA_SHADER_GEOMETRY:
1734 populate_gs_prog_key(&stages[s], device);
1735 break;
1736 case MESA_SHADER_FRAGMENT: {
1737 /* Assume rasterization enabled in any of the following case :
1738 *
1739 * - We're a pipeline library without pre-rasterization information
1740 *
1741 * - Rasterization is not disabled in the non dynamic state
1742 *
1743 * - Rasterization disable is dynamic
1744 */
1745 const bool raster_enabled =
1746 state->rs == NULL ||
1747 !state->rs->rasterizer_discard_enable ||
1748 BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE);
1749 enum intel_sometimes is_mesh = INTEL_NEVER;
1750 if (device->vk.enabled_extensions.EXT_mesh_shader) {
1751 if (anv_pipeline_base_has_stage(pipeline, MESA_SHADER_VERTEX))
1752 is_mesh = INTEL_NEVER;
1753 else if (anv_pipeline_base_has_stage(pipeline, MESA_SHADER_MESH))
1754 is_mesh = INTEL_ALWAYS;
1755 else {
1756 assert(pipeline->base.type == ANV_PIPELINE_GRAPHICS_LIB);
1757 is_mesh = INTEL_SOMETIMES;
1758 }
1759 }
1760 populate_wm_prog_key(&stages[s],
1761 pipeline,
1762 state->dynamic,
1763 raster_enabled ? state->ms : NULL,
1764 state->fsr, state->rp, is_mesh);
1765 break;
1766 }
1767
1768 case MESA_SHADER_TASK:
1769 populate_task_prog_key(&stages[s], device);
1770 break;
1771
1772 case MESA_SHADER_MESH: {
1773 const bool compact_mue =
1774 !(pipeline->base.type == ANV_PIPELINE_GRAPHICS_LIB &&
1775 !anv_pipeline_base_has_stage(pipeline, MESA_SHADER_FRAGMENT));
1776 populate_mesh_prog_key(&stages[s], device, compact_mue);
1777 break;
1778 }
1779
1780 default:
1781 unreachable("Invalid graphics shader stage");
1782 }
1783
1784 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1785 stages[s].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
1786 }
1787 }
1788
1789 static void
anv_graphics_lib_retain_shaders(struct anv_graphics_base_pipeline * pipeline,struct anv_pipeline_stage * stages,bool will_compile)1790 anv_graphics_lib_retain_shaders(struct anv_graphics_base_pipeline *pipeline,
1791 struct anv_pipeline_stage *stages,
1792 bool will_compile)
1793 {
1794 /* There isn't much point in retaining NIR shaders on final pipelines. */
1795 assert(pipeline->base.type == ANV_PIPELINE_GRAPHICS_LIB);
1796
1797 struct anv_graphics_lib_pipeline *lib = (struct anv_graphics_lib_pipeline *) pipeline;
1798
1799 for (int s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1800 if (!anv_pipeline_base_has_stage(pipeline, s))
1801 continue;
1802
1803 memcpy(lib->retained_shaders[s].shader_sha1, stages[s].shader_sha1,
1804 sizeof(stages[s].shader_sha1));
1805
1806 lib->retained_shaders[s].subgroup_size_type = stages[s].subgroup_size_type;
1807
1808 nir_shader *nir = stages[s].nir != NULL ? stages[s].nir : stages[s].imported.nir;
1809 assert(nir != NULL);
1810
1811 if (!will_compile) {
1812 lib->retained_shaders[s].nir = nir;
1813 } else {
1814 lib->retained_shaders[s].nir =
1815 nir_shader_clone(pipeline->base.mem_ctx, nir);
1816 }
1817 }
1818 }
1819
1820 static bool
anv_graphics_pipeline_load_cached_shaders(struct anv_graphics_base_pipeline * pipeline,struct vk_pipeline_cache * cache,struct anv_pipeline_stage * stages,bool link_optimize,VkPipelineCreationFeedback * pipeline_feedback)1821 anv_graphics_pipeline_load_cached_shaders(struct anv_graphics_base_pipeline *pipeline,
1822 struct vk_pipeline_cache *cache,
1823 struct anv_pipeline_stage *stages,
1824 bool link_optimize,
1825 VkPipelineCreationFeedback *pipeline_feedback)
1826 {
1827 struct anv_device *device = pipeline->base.device;
1828 unsigned cache_hits = 0, found = 0, imported = 0;
1829
1830 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1831 if (!anv_pipeline_base_has_stage(pipeline, s))
1832 continue;
1833
1834 int64_t stage_start = os_time_get_nano();
1835
1836 bool cache_hit;
1837 stages[s].bin =
1838 anv_device_search_for_kernel(device, cache, &stages[s].cache_key,
1839 sizeof(stages[s].cache_key), &cache_hit);
1840 if (stages[s].bin) {
1841 found++;
1842 pipeline->shaders[s] = stages[s].bin;
1843 }
1844
1845 if (cache_hit) {
1846 cache_hits++;
1847 stages[s].feedback.flags |=
1848 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1849 }
1850 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1851 }
1852
1853 /* When not link optimizing, lookup the missing shader in the imported
1854 * libraries.
1855 */
1856 if (!link_optimize) {
1857 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1858 if (!anv_pipeline_base_has_stage(pipeline, s))
1859 continue;
1860
1861 if (pipeline->shaders[s] != NULL)
1862 continue;
1863
1864 if (stages[s].imported.bin == NULL)
1865 continue;
1866
1867 stages[s].bin = stages[s].imported.bin;
1868 pipeline->shaders[s] = anv_shader_bin_ref(stages[s].imported.bin);
1869 pipeline->source_hashes[s] = stages[s].source_hash;
1870 imported++;
1871 }
1872 }
1873
1874 if ((found + imported) == __builtin_popcount(pipeline->base.active_stages)) {
1875 if (cache_hits == found && found != 0) {
1876 pipeline_feedback->flags |=
1877 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1878 }
1879 /* We found all our shaders in the cache. We're done. */
1880 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1881 if (pipeline->shaders[s] == NULL)
1882 continue;
1883
1884 /* Only add the executables when we're not importing or doing link
1885 * optimizations. The imported executables are added earlier. Link
1886 * optimization can produce different binaries.
1887 */
1888 if (stages[s].imported.bin == NULL || link_optimize)
1889 anv_pipeline_add_executables(&pipeline->base, &stages[s]);
1890 pipeline->source_hashes[s] = stages[s].source_hash;
1891 }
1892 return true;
1893 } else if (found > 0) {
1894 /* We found some but not all of our shaders. This shouldn't happen most
1895 * of the time but it can if we have a partially populated pipeline
1896 * cache.
1897 */
1898 assert(found < __builtin_popcount(pipeline->base.active_stages));
1899
1900 /* With GPL, this might well happen if the app does an optimized
1901 * link.
1902 */
1903 if (!pipeline->base.device->vk.enabled_extensions.EXT_graphics_pipeline_library) {
1904 vk_perf(VK_LOG_OBJS(cache ? &cache->base :
1905 &pipeline->base.device->vk.base),
1906 "Found a partial pipeline in the cache. This is "
1907 "most likely caused by an incomplete pipeline cache "
1908 "import or export");
1909 }
1910
1911 /* We're going to have to recompile anyway, so just throw away our
1912 * references to the shaders in the cache. We'll get them out of the
1913 * cache again as part of the compilation process.
1914 */
1915 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1916 stages[s].feedback.flags = 0;
1917 if (pipeline->shaders[s]) {
1918 anv_shader_bin_unref(device, pipeline->shaders[s]);
1919 pipeline->shaders[s] = NULL;
1920 }
1921 }
1922 }
1923
1924 return false;
1925 }
1926
1927 static const gl_shader_stage graphics_shader_order[] = {
1928 MESA_SHADER_VERTEX,
1929 MESA_SHADER_TESS_CTRL,
1930 MESA_SHADER_TESS_EVAL,
1931 MESA_SHADER_GEOMETRY,
1932
1933 MESA_SHADER_TASK,
1934 MESA_SHADER_MESH,
1935
1936 MESA_SHADER_FRAGMENT,
1937 };
1938
1939 /* This function loads NIR only for stages specified in
1940 * VkGraphicsPipelineCreateInfo::pStages[]
1941 */
1942 static VkResult
anv_graphics_pipeline_load_nir(struct anv_graphics_base_pipeline * pipeline,struct vk_pipeline_cache * cache,struct anv_pipeline_stage * stages,void * mem_ctx,bool need_clone)1943 anv_graphics_pipeline_load_nir(struct anv_graphics_base_pipeline *pipeline,
1944 struct vk_pipeline_cache *cache,
1945 struct anv_pipeline_stage *stages,
1946 void *mem_ctx,
1947 bool need_clone)
1948 {
1949 for (unsigned s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
1950 if (!anv_pipeline_base_has_stage(pipeline, s))
1951 continue;
1952
1953 int64_t stage_start = os_time_get_nano();
1954
1955 assert(stages[s].stage == s);
1956
1957 /* Only use the create NIR from the pStages[] element if we don't have
1958 * an imported library for the same stage.
1959 */
1960 if (stages[s].imported.bin == NULL) {
1961 VkResult result = anv_pipeline_stage_get_nir(&pipeline->base, cache,
1962 mem_ctx, &stages[s]);
1963 if (result != VK_SUCCESS)
1964 return result;
1965 } else {
1966 stages[s].nir = need_clone ?
1967 nir_shader_clone(mem_ctx, stages[s].imported.nir) :
1968 stages[s].imported.nir;
1969 }
1970
1971 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1972 }
1973
1974 return VK_SUCCESS;
1975 }
1976
1977 static void
anv_pipeline_nir_preprocess(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage)1978 anv_pipeline_nir_preprocess(struct anv_pipeline *pipeline,
1979 struct anv_pipeline_stage *stage)
1980 {
1981 struct anv_device *device = pipeline->device;
1982 const struct brw_compiler *compiler = device->physical->compiler;
1983
1984 const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
1985 .point_coord = true,
1986 };
1987 NIR_PASS(_, stage->nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
1988
1989 const nir_opt_access_options opt_access_options = {
1990 .is_vulkan = true,
1991 };
1992 NIR_PASS(_, stage->nir, nir_opt_access, &opt_access_options);
1993
1994 /* Vulkan uses the separate-shader linking model */
1995 stage->nir->info.separate_shader = true;
1996
1997 struct brw_nir_compiler_opts opts = {
1998 .softfp64 = device->fp64_nir,
1999 /* Assume robustness with EXT_pipeline_robustness because this can be
2000 * turned on/off per pipeline and we have no visibility on this here.
2001 */
2002 .robust_image_access = device->vk.enabled_features.robustImageAccess ||
2003 device->vk.enabled_features.robustImageAccess2 ||
2004 device->vk.enabled_extensions.EXT_pipeline_robustness,
2005 .input_vertices = stage->nir->info.stage == MESA_SHADER_TESS_CTRL ?
2006 stage->key.tcs.input_vertices : 0,
2007 };
2008 brw_preprocess_nir(compiler, stage->nir, &opts);
2009
2010 NIR_PASS(_, stage->nir, nir_opt_barrier_modes);
2011
2012 nir_shader_gather_info(stage->nir, nir_shader_get_entrypoint(stage->nir));
2013 }
2014
2015 static void
anv_fill_pipeline_creation_feedback(const struct anv_graphics_base_pipeline * pipeline,VkPipelineCreationFeedback * pipeline_feedback,const VkGraphicsPipelineCreateInfo * info,struct anv_pipeline_stage * stages)2016 anv_fill_pipeline_creation_feedback(const struct anv_graphics_base_pipeline *pipeline,
2017 VkPipelineCreationFeedback *pipeline_feedback,
2018 const VkGraphicsPipelineCreateInfo *info,
2019 struct anv_pipeline_stage *stages)
2020 {
2021 const VkPipelineCreationFeedbackCreateInfo *create_feedback =
2022 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
2023 if (create_feedback) {
2024 *create_feedback->pPipelineCreationFeedback = *pipeline_feedback;
2025
2026 /* VkPipelineCreationFeedbackCreateInfo:
2027 *
2028 * "An implementation must set or clear the
2029 * VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT in
2030 * VkPipelineCreationFeedback::flags for pPipelineCreationFeedback
2031 * and every element of pPipelineStageCreationFeedbacks."
2032 *
2033 */
2034 for (uint32_t i = 0; i < create_feedback->pipelineStageCreationFeedbackCount; i++) {
2035 create_feedback->pPipelineStageCreationFeedbacks[i].flags &=
2036 ~VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2037 }
2038 /* This part is not really specified in the Vulkan spec at the moment.
2039 * We're kind of guessing what the CTS wants. We might need to update
2040 * when https://gitlab.khronos.org/vulkan/vulkan/-/issues/3115 is
2041 * clarified.
2042 */
2043 for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
2044 if (!anv_pipeline_base_has_stage(pipeline, s))
2045 continue;
2046
2047 if (stages[s].feedback_idx < create_feedback->pipelineStageCreationFeedbackCount) {
2048 create_feedback->pPipelineStageCreationFeedbacks[
2049 stages[s].feedback_idx] = stages[s].feedback;
2050 }
2051 }
2052 }
2053 }
2054
2055 static uint32_t
anv_graphics_pipeline_imported_shader_count(struct anv_pipeline_stage * stages)2056 anv_graphics_pipeline_imported_shader_count(struct anv_pipeline_stage *stages)
2057 {
2058 uint32_t count = 0;
2059 for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
2060 if (stages[s].imported.bin != NULL)
2061 count++;
2062 }
2063 return count;
2064 }
2065
2066 static VkResult
anv_graphics_pipeline_compile(struct anv_graphics_base_pipeline * pipeline,struct anv_pipeline_stage * stages,struct vk_pipeline_cache * cache,VkPipelineCreationFeedback * pipeline_feedback,const VkGraphicsPipelineCreateInfo * info,const struct vk_graphics_pipeline_state * state)2067 anv_graphics_pipeline_compile(struct anv_graphics_base_pipeline *pipeline,
2068 struct anv_pipeline_stage *stages,
2069 struct vk_pipeline_cache *cache,
2070 VkPipelineCreationFeedback *pipeline_feedback,
2071 const VkGraphicsPipelineCreateInfo *info,
2072 const struct vk_graphics_pipeline_state *state)
2073 {
2074 int64_t pipeline_start = os_time_get_nano();
2075
2076 struct anv_device *device = pipeline->base.device;
2077 const struct intel_device_info *devinfo = device->info;
2078 const struct brw_compiler *compiler = device->physical->compiler;
2079
2080 /* Setup the shaders given in this VkGraphicsPipelineCreateInfo::pStages[].
2081 * Other shaders imported from libraries should have been added by
2082 * anv_graphics_pipeline_import_lib().
2083 */
2084 uint32_t shader_count = anv_graphics_pipeline_imported_shader_count(stages);
2085 for (uint32_t i = 0; i < info->stageCount; i++) {
2086 gl_shader_stage stage = vk_to_mesa_shader_stage(info->pStages[i].stage);
2087
2088 /* If a pipeline library is loaded in this stage, we should ignore the
2089 * pStages[] entry of the same stage.
2090 */
2091 if (stages[stage].imported.bin != NULL)
2092 continue;
2093
2094 stages[stage].stage = stage;
2095 stages[stage].pipeline_flags = pipeline->base.flags;
2096 stages[stage].pipeline_pNext = info->pNext;
2097 stages[stage].info = &info->pStages[i];
2098 stages[stage].feedback_idx = shader_count++;
2099
2100 anv_stage_write_shader_hash(&stages[stage], device);
2101 }
2102
2103 /* Prepare shader keys for all shaders in pipeline->base.active_stages
2104 * (this includes libraries) before generating the hash for cache look up.
2105 *
2106 * We're doing this because the spec states that :
2107 *
2108 * "When an implementation is looking up a pipeline in a pipeline cache,
2109 * if that pipeline is being created using linked libraries,
2110 * implementations should always return an equivalent pipeline created
2111 * with VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT if available,
2112 * whether or not that bit was specified."
2113 *
2114 * So even if the application does not request link optimization, we have
2115 * to do our cache lookup with the entire set of shader sha1s so that we
2116 * can find what would be the best optimized pipeline in the case as if we
2117 * had compiled all the shaders together and known the full graphics state.
2118 */
2119 anv_graphics_pipeline_init_keys(pipeline, state, stages);
2120
2121 uint32_t view_mask = state->rp ? state->rp->view_mask : 0;
2122
2123 unsigned char sha1[20];
2124 anv_pipeline_hash_graphics(pipeline, stages, view_mask, sha1);
2125
2126 for (unsigned s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
2127 if (!anv_pipeline_base_has_stage(pipeline, s))
2128 continue;
2129
2130 stages[s].cache_key.stage = s;
2131 memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
2132 }
2133
2134 const bool retain_shaders =
2135 pipeline->base.flags & VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT;
2136 const bool link_optimize =
2137 pipeline->base.flags & VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT;
2138
2139 VkResult result = VK_SUCCESS;
2140 const bool skip_cache_lookup =
2141 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
2142
2143 if (!skip_cache_lookup) {
2144 bool found_all_shaders =
2145 anv_graphics_pipeline_load_cached_shaders(pipeline, cache, stages,
2146 link_optimize,
2147 pipeline_feedback);
2148
2149 if (found_all_shaders) {
2150 /* If we need to retain shaders, we need to also load from the NIR
2151 * cache.
2152 */
2153 if (pipeline->base.type == ANV_PIPELINE_GRAPHICS_LIB && retain_shaders) {
2154 result = anv_graphics_pipeline_load_nir(pipeline, cache,
2155 stages,
2156 pipeline->base.mem_ctx,
2157 false /* need_clone */);
2158 if (result != VK_SUCCESS) {
2159 vk_perf(VK_LOG_OBJS(cache ? &cache->base :
2160 &pipeline->base.device->vk.base),
2161 "Found all ISA shaders in the cache but not all NIR shaders.");
2162 } else {
2163 anv_graphics_lib_retain_shaders(pipeline, stages, false /* will_compile */);
2164 }
2165 }
2166
2167 if (result == VK_SUCCESS)
2168 goto done;
2169
2170 for (unsigned s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
2171 if (!anv_pipeline_base_has_stage(pipeline, s))
2172 continue;
2173
2174 if (stages[s].nir) {
2175 ralloc_free(stages[s].nir);
2176 stages[s].nir = NULL;
2177 }
2178
2179 assert(pipeline->shaders[s] != NULL);
2180 anv_shader_bin_unref(device, pipeline->shaders[s]);
2181 pipeline->shaders[s] = NULL;
2182 }
2183 }
2184 }
2185
2186 if (pipeline->base.flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)
2187 return VK_PIPELINE_COMPILE_REQUIRED;
2188
2189 void *tmp_ctx = ralloc_context(NULL);
2190
2191 result = anv_graphics_pipeline_load_nir(pipeline, cache, stages,
2192 tmp_ctx, link_optimize /* need_clone */);
2193 if (result != VK_SUCCESS)
2194 goto fail;
2195
2196 /* Retain shaders now if asked, this only applies to libraries */
2197 if (pipeline->base.type == ANV_PIPELINE_GRAPHICS_LIB && retain_shaders)
2198 anv_graphics_lib_retain_shaders(pipeline, stages, true /* will_compile */);
2199
2200 /* The following steps will be executed for shaders we need to compile :
2201 *
2202 * - specified through VkGraphicsPipelineCreateInfo::pStages[]
2203 *
2204 * - or compiled from libraries with retained shaders (libraries
2205 * compiled with CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT) if the
2206 * pipeline has the CREATE_LINK_TIME_OPTIMIZATION_BIT flag.
2207 */
2208
2209 /* Preprocess all NIR shaders. */
2210 for (int s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
2211 if (anv_graphics_pipeline_skip_shader_compile(pipeline, stages,
2212 link_optimize, s))
2213 continue;
2214
2215 anv_stage_allocate_bind_map_tables(&pipeline->base, &stages[s], tmp_ctx);
2216
2217 anv_pipeline_nir_preprocess(&pipeline->base, &stages[s]);
2218 }
2219
2220 if (stages[MESA_SHADER_MESH].info && stages[MESA_SHADER_FRAGMENT].info) {
2221 anv_apply_per_prim_attr_wa(stages[MESA_SHADER_MESH].nir,
2222 stages[MESA_SHADER_FRAGMENT].nir,
2223 device,
2224 info);
2225 }
2226
2227 /* Walk backwards to link */
2228 struct anv_pipeline_stage *next_stage = NULL;
2229 for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; i--) {
2230 gl_shader_stage s = graphics_shader_order[i];
2231 if (anv_graphics_pipeline_skip_shader_compile(pipeline, stages,
2232 link_optimize, s))
2233 continue;
2234
2235 struct anv_pipeline_stage *stage = &stages[s];
2236
2237 switch (s) {
2238 case MESA_SHADER_VERTEX:
2239 anv_pipeline_link_vs(compiler, stage, next_stage);
2240 break;
2241 case MESA_SHADER_TESS_CTRL:
2242 anv_pipeline_link_tcs(compiler, stage, next_stage);
2243 break;
2244 case MESA_SHADER_TESS_EVAL:
2245 anv_pipeline_link_tes(compiler, stage, next_stage);
2246 break;
2247 case MESA_SHADER_GEOMETRY:
2248 anv_pipeline_link_gs(compiler, stage, next_stage);
2249 break;
2250 case MESA_SHADER_TASK:
2251 anv_pipeline_link_task(compiler, stage, next_stage);
2252 break;
2253 case MESA_SHADER_MESH:
2254 anv_pipeline_link_mesh(compiler, stage, next_stage);
2255 break;
2256 case MESA_SHADER_FRAGMENT:
2257 anv_pipeline_link_fs(compiler, stage, state->rp);
2258 break;
2259 default:
2260 unreachable("Invalid graphics shader stage");
2261 }
2262
2263 next_stage = stage;
2264 }
2265
2266 bool use_primitive_replication = false;
2267 if (devinfo->ver >= 12 && view_mask != 0) {
2268 /* For some pipelines HW Primitive Replication can be used instead of
2269 * instancing to implement Multiview. This depend on how viewIndex is
2270 * used in all the active shaders, so this check can't be done per
2271 * individual shaders.
2272 */
2273 nir_shader *shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {};
2274 for (unsigned s = 0; s < ARRAY_SIZE(shaders); s++)
2275 shaders[s] = stages[s].nir;
2276
2277 use_primitive_replication =
2278 anv_check_for_primitive_replication(device,
2279 pipeline->base.active_stages,
2280 shaders, view_mask);
2281 }
2282
2283 struct anv_pipeline_stage *prev_stage = NULL;
2284 for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
2285 gl_shader_stage s = graphics_shader_order[i];
2286 if (anv_graphics_pipeline_skip_shader_compile(pipeline, stages,
2287 link_optimize, s))
2288 continue;
2289
2290 struct anv_pipeline_stage *stage = &stages[s];
2291
2292 int64_t stage_start = os_time_get_nano();
2293
2294 anv_pipeline_lower_nir(&pipeline->base, tmp_ctx, stage,
2295 &pipeline->base.layout, view_mask,
2296 use_primitive_replication);
2297
2298 struct shader_info *cur_info = &stage->nir->info;
2299
2300 if (prev_stage && compiler->nir_options[s]->unify_interfaces) {
2301 struct shader_info *prev_info = &prev_stage->nir->info;
2302
2303 prev_info->outputs_written |= cur_info->inputs_read &
2304 ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
2305 cur_info->inputs_read |= prev_info->outputs_written &
2306 ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
2307 prev_info->patch_outputs_written |= cur_info->patch_inputs_read;
2308 cur_info->patch_inputs_read |= prev_info->patch_outputs_written;
2309 }
2310
2311 anv_fixup_subgroup_size(device, cur_info);
2312
2313 stage->feedback.duration += os_time_get_nano() - stage_start;
2314
2315 prev_stage = stage;
2316 }
2317
2318 /* In the case the platform can write the primitive variable shading rate
2319 * and KHR_fragment_shading_rate is enabled :
2320 * - there can be a fragment shader but we don't have it yet
2321 * - the fragment shader needs fragment shading rate
2322 *
2323 * figure out the last geometry stage that should write the primitive
2324 * shading rate, and ensure it is marked as used there. The backend will
2325 * write a default value if the shader doesn't actually write it.
2326 *
2327 * We iterate backwards in the stage and stop on the first shader that can
2328 * set the value.
2329 *
2330 * Don't apply this to MESH stages, as this is a per primitive thing.
2331 */
2332 if (devinfo->has_coarse_pixel_primitive_and_cb &&
2333 device->vk.enabled_extensions.KHR_fragment_shading_rate &&
2334 pipeline_has_coarse_pixel(state->dynamic, state->ms, state->fsr) &&
2335 (!stages[MESA_SHADER_FRAGMENT].info ||
2336 stages[MESA_SHADER_FRAGMENT].key.wm.coarse_pixel) &&
2337 stages[MESA_SHADER_MESH].nir == NULL) {
2338 struct anv_pipeline_stage *last_psr = NULL;
2339
2340 for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
2341 gl_shader_stage s =
2342 graphics_shader_order[ARRAY_SIZE(graphics_shader_order) - i - 1];
2343
2344 if (anv_graphics_pipeline_skip_shader_compile(pipeline, stages,
2345 link_optimize, s) ||
2346 !gl_shader_stage_can_set_fragment_shading_rate(s))
2347 continue;
2348
2349 last_psr = &stages[s];
2350 break;
2351 }
2352
2353 /* Only set primitive shading rate if there is a pre-rasterization
2354 * shader in this pipeline/pipeline-library.
2355 */
2356 if (last_psr)
2357 last_psr->nir->info.outputs_written |= VARYING_BIT_PRIMITIVE_SHADING_RATE;
2358 }
2359
2360 prev_stage = NULL;
2361 for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
2362 gl_shader_stage s = graphics_shader_order[i];
2363 struct anv_pipeline_stage *stage = &stages[s];
2364
2365 if (anv_graphics_pipeline_skip_shader_compile(pipeline, stages, link_optimize, s))
2366 continue;
2367
2368 int64_t stage_start = os_time_get_nano();
2369
2370 void *stage_ctx = ralloc_context(NULL);
2371 char *error_str = NULL;
2372
2373 switch (s) {
2374 case MESA_SHADER_VERTEX:
2375 anv_pipeline_compile_vs(compiler, stage_ctx, pipeline,
2376 stage, view_mask, &error_str);
2377 break;
2378 case MESA_SHADER_TESS_CTRL:
2379 anv_pipeline_compile_tcs(compiler, stage_ctx, device,
2380 stage, prev_stage, &error_str);
2381 break;
2382 case MESA_SHADER_TESS_EVAL:
2383 anv_pipeline_compile_tes(compiler, stage_ctx, device,
2384 stage, prev_stage, &error_str);
2385 break;
2386 case MESA_SHADER_GEOMETRY:
2387 anv_pipeline_compile_gs(compiler, stage_ctx, device,
2388 stage, prev_stage, &error_str);
2389 break;
2390 case MESA_SHADER_TASK:
2391 anv_pipeline_compile_task(compiler, stage_ctx, device,
2392 stage, &error_str);
2393 break;
2394 case MESA_SHADER_MESH:
2395 anv_pipeline_compile_mesh(compiler, stage_ctx, device,
2396 stage, prev_stage, &error_str);
2397 break;
2398 case MESA_SHADER_FRAGMENT:
2399 anv_pipeline_compile_fs(compiler, stage_ctx, device,
2400 stage, prev_stage, pipeline,
2401 view_mask, use_primitive_replication,
2402 &error_str);
2403 break;
2404 default:
2405 unreachable("Invalid graphics shader stage");
2406 }
2407 if (stage->code == NULL) {
2408 if (error_str)
2409 result = vk_errorf(pipeline, VK_ERROR_UNKNOWN, "%s", error_str);
2410 else
2411 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2412 ralloc_free(stage_ctx);
2413 goto fail;
2414 }
2415
2416 anv_nir_validate_push_layout(device->physical, &stage->prog_data.base,
2417 &stage->bind_map);
2418
2419 struct anv_shader_upload_params upload_params = {
2420 .stage = s,
2421 .key_data = &stage->cache_key,
2422 .key_size = sizeof(stage->cache_key),
2423 .kernel_data = stage->code,
2424 .kernel_size = stage->prog_data.base.program_size,
2425 .prog_data = &stage->prog_data.base,
2426 .prog_data_size = brw_prog_data_size(s),
2427 .stats = stage->stats,
2428 .num_stats = stage->num_stats,
2429 .xfb_info = stage->nir->xfb_info,
2430 .bind_map = &stage->bind_map,
2431 .push_desc_info = &stage->push_desc_info,
2432 .dynamic_push_values = stage->dynamic_push_values,
2433 };
2434
2435 stage->bin =
2436 anv_device_upload_kernel(device, cache, &upload_params);
2437 if (!stage->bin) {
2438 ralloc_free(stage_ctx);
2439 result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2440 goto fail;
2441 }
2442
2443 anv_pipeline_add_executables(&pipeline->base, stage);
2444 pipeline->source_hashes[s] = stage->source_hash;
2445 pipeline->shaders[s] = stage->bin;
2446
2447 ralloc_free(stage_ctx);
2448
2449 stage->feedback.duration += os_time_get_nano() - stage_start;
2450
2451 prev_stage = stage;
2452 }
2453
2454 /* Finally add the imported shaders that were not compiled as part of this
2455 * step.
2456 */
2457 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
2458 if (!anv_pipeline_base_has_stage(pipeline, s))
2459 continue;
2460
2461 if (pipeline->shaders[s] != NULL)
2462 continue;
2463
2464 /* We should have recompiled everything with link optimization. */
2465 assert(!link_optimize);
2466
2467 struct anv_pipeline_stage *stage = &stages[s];
2468
2469 pipeline->source_hashes[s] = stage->source_hash;
2470 pipeline->shaders[s] = anv_shader_bin_ref(stage->imported.bin);
2471 }
2472
2473 ralloc_free(tmp_ctx);
2474
2475 done:
2476
2477 /* Write the feedback index into the pipeline */
2478 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
2479 if (!anv_pipeline_base_has_stage(pipeline, s))
2480 continue;
2481
2482 struct anv_pipeline_stage *stage = &stages[s];
2483 pipeline->feedback_index[s] = stage->feedback_idx;
2484 pipeline->robust_flags[s] = stage->robust_flags;
2485
2486 anv_pipeline_account_shader(&pipeline->base, pipeline->shaders[s]);
2487 }
2488
2489 pipeline_feedback->duration = os_time_get_nano() - pipeline_start;
2490
2491 if (pipeline->shaders[MESA_SHADER_FRAGMENT]) {
2492 pipeline->fragment_dynamic =
2493 anv_graphics_pipeline_stage_fragment_dynamic(
2494 &stages[MESA_SHADER_FRAGMENT]);
2495 }
2496
2497 return VK_SUCCESS;
2498
2499 fail:
2500 ralloc_free(tmp_ctx);
2501
2502 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
2503 if (pipeline->shaders[s])
2504 anv_shader_bin_unref(device, pipeline->shaders[s]);
2505 }
2506
2507 return result;
2508 }
2509
2510 static VkResult
anv_pipeline_compile_cs(struct anv_compute_pipeline * pipeline,struct vk_pipeline_cache * cache,const VkComputePipelineCreateInfo * info)2511 anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
2512 struct vk_pipeline_cache *cache,
2513 const VkComputePipelineCreateInfo *info)
2514 {
2515 ASSERTED const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
2516 assert(sinfo->stage == VK_SHADER_STAGE_COMPUTE_BIT);
2517
2518 VkPipelineCreationFeedback pipeline_feedback = {
2519 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2520 };
2521 int64_t pipeline_start = os_time_get_nano();
2522
2523 struct anv_device *device = pipeline->base.device;
2524 const struct brw_compiler *compiler = device->physical->compiler;
2525
2526 struct anv_pipeline_stage stage = {
2527 .stage = MESA_SHADER_COMPUTE,
2528 .info = &info->stage,
2529 .pipeline_flags = pipeline->base.flags,
2530 .pipeline_pNext = info->pNext,
2531 .cache_key = {
2532 .stage = MESA_SHADER_COMPUTE,
2533 },
2534 .feedback = {
2535 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2536 },
2537 };
2538 anv_stage_write_shader_hash(&stage, device);
2539
2540 populate_cs_prog_key(&stage, device);
2541
2542 const bool skip_cache_lookup =
2543 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
2544
2545 anv_pipeline_hash_compute(pipeline, &stage, stage.cache_key.sha1);
2546
2547 bool cache_hit = false;
2548 if (!skip_cache_lookup) {
2549 stage.bin = anv_device_search_for_kernel(device, cache,
2550 &stage.cache_key,
2551 sizeof(stage.cache_key),
2552 &cache_hit);
2553 }
2554
2555 if (stage.bin == NULL &&
2556 (pipeline->base.flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT))
2557 return VK_PIPELINE_COMPILE_REQUIRED;
2558
2559 void *mem_ctx = ralloc_context(NULL);
2560 if (stage.bin == NULL) {
2561 int64_t stage_start = os_time_get_nano();
2562
2563 anv_stage_allocate_bind_map_tables(&pipeline->base, &stage, mem_ctx);
2564
2565 VkResult result = anv_pipeline_stage_get_nir(&pipeline->base, cache,
2566 mem_ctx, &stage);
2567 if (result != VK_SUCCESS) {
2568 ralloc_free(mem_ctx);
2569 return result;
2570 }
2571
2572 anv_pipeline_nir_preprocess(&pipeline->base, &stage);
2573
2574 anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage,
2575 &pipeline->base.layout, 0 /* view_mask */,
2576 false /* use_primitive_replication */);
2577
2578 anv_fixup_subgroup_size(device, &stage.nir->info);
2579
2580 stage.num_stats = 1;
2581
2582 struct brw_compile_cs_params params = {
2583 .base = {
2584 .nir = stage.nir,
2585 .stats = stage.stats,
2586 .log_data = device,
2587 .mem_ctx = mem_ctx,
2588 .source_hash = stage.source_hash,
2589 },
2590 .key = &stage.key.cs,
2591 .prog_data = &stage.prog_data.cs,
2592 };
2593
2594 stage.code = brw_compile_cs(compiler, ¶ms);
2595 if (stage.code == NULL) {
2596 VkResult result;
2597
2598 if (params.base.error_str)
2599 result = vk_errorf(pipeline, VK_ERROR_UNKNOWN, "%s", params.base.error_str);
2600 else
2601 result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2602
2603 ralloc_free(mem_ctx);
2604 return result;
2605 }
2606
2607 anv_nir_validate_push_layout(device->physical, &stage.prog_data.base,
2608 &stage.bind_map);
2609
2610 struct anv_shader_upload_params upload_params = {
2611 .stage = MESA_SHADER_COMPUTE,
2612 .key_data = &stage.cache_key,
2613 .key_size = sizeof(stage.cache_key),
2614 .kernel_data = stage.code,
2615 .kernel_size = stage.prog_data.base.program_size,
2616 .prog_data = &stage.prog_data.base,
2617 .prog_data_size = sizeof(stage.prog_data.cs),
2618 .stats = stage.stats,
2619 .num_stats = stage.num_stats,
2620 .bind_map = &stage.bind_map,
2621 .push_desc_info = &stage.push_desc_info,
2622 .dynamic_push_values = stage.dynamic_push_values,
2623 };
2624
2625 stage.bin = anv_device_upload_kernel(device, cache, &upload_params);
2626 if (!stage.bin) {
2627 ralloc_free(mem_ctx);
2628 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2629 }
2630
2631 stage.feedback.duration = os_time_get_nano() - stage_start;
2632 }
2633
2634 anv_pipeline_account_shader(&pipeline->base, stage.bin);
2635 anv_pipeline_add_executables(&pipeline->base, &stage);
2636 pipeline->source_hash = stage.source_hash;
2637
2638 ralloc_free(mem_ctx);
2639
2640 if (cache_hit) {
2641 stage.feedback.flags |=
2642 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2643 pipeline_feedback.flags |=
2644 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2645 }
2646 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2647
2648 const VkPipelineCreationFeedbackCreateInfo *create_feedback =
2649 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
2650 if (create_feedback) {
2651 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
2652
2653 if (create_feedback->pipelineStageCreationFeedbackCount) {
2654 assert(create_feedback->pipelineStageCreationFeedbackCount == 1);
2655 create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
2656 }
2657 }
2658
2659 pipeline->cs = stage.bin;
2660
2661 return VK_SUCCESS;
2662 }
2663
2664 static VkResult
anv_compute_pipeline_create(struct anv_device * device,struct vk_pipeline_cache * cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)2665 anv_compute_pipeline_create(struct anv_device *device,
2666 struct vk_pipeline_cache *cache,
2667 const VkComputePipelineCreateInfo *pCreateInfo,
2668 const VkAllocationCallbacks *pAllocator,
2669 VkPipeline *pPipeline)
2670 {
2671 struct anv_compute_pipeline *pipeline;
2672 VkResult result;
2673
2674 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
2675
2676 pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
2677 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2678 if (pipeline == NULL)
2679 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2680
2681 result = anv_pipeline_init(&pipeline->base, device,
2682 ANV_PIPELINE_COMPUTE,
2683 vk_compute_pipeline_create_flags(pCreateInfo),
2684 pAllocator);
2685 if (result != VK_SUCCESS) {
2686 vk_free2(&device->vk.alloc, pAllocator, pipeline);
2687 return result;
2688 }
2689
2690
2691 ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
2692 anv_pipeline_init_layout(&pipeline->base, pipeline_layout);
2693
2694 pipeline->base.active_stages = VK_SHADER_STAGE_COMPUTE_BIT;
2695
2696 anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
2697 pipeline->batch_data, sizeof(pipeline->batch_data));
2698
2699 result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo);
2700 if (result != VK_SUCCESS) {
2701 anv_pipeline_finish(&pipeline->base, device);
2702 vk_free2(&device->vk.alloc, pAllocator, pipeline);
2703 return result;
2704 }
2705
2706 anv_genX(device->info, compute_pipeline_emit)(pipeline);
2707
2708 ANV_RMV(compute_pipeline_create, device, pipeline, false);
2709
2710 *pPipeline = anv_pipeline_to_handle(&pipeline->base);
2711
2712 return pipeline->base.batch.status;
2713 }
2714
anv_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)2715 VkResult anv_CreateComputePipelines(
2716 VkDevice _device,
2717 VkPipelineCache pipelineCache,
2718 uint32_t count,
2719 const VkComputePipelineCreateInfo* pCreateInfos,
2720 const VkAllocationCallbacks* pAllocator,
2721 VkPipeline* pPipelines)
2722 {
2723 ANV_FROM_HANDLE(anv_device, device, _device);
2724 ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
2725
2726 VkResult result = VK_SUCCESS;
2727
2728 unsigned i;
2729 for (i = 0; i < count; i++) {
2730 const VkPipelineCreateFlags2KHR flags =
2731 vk_compute_pipeline_create_flags(&pCreateInfos[i]);
2732 VkResult res = anv_compute_pipeline_create(device, pipeline_cache,
2733 &pCreateInfos[i],
2734 pAllocator, &pPipelines[i]);
2735
2736 if (res != VK_SUCCESS) {
2737 result = res;
2738 if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
2739 break;
2740 pPipelines[i] = VK_NULL_HANDLE;
2741 }
2742 }
2743
2744 for (; i < count; i++)
2745 pPipelines[i] = VK_NULL_HANDLE;
2746
2747 return result;
2748 }
2749
2750 /**
2751 * Calculate the desired L3 partitioning based on the current state of the
2752 * pipeline. For now this simply returns the conservative defaults calculated
2753 * by get_default_l3_weights(), but we could probably do better by gathering
2754 * more statistics from the pipeline state (e.g. guess of expected URB usage
2755 * and bound surfaces), or by using feed-back from performance counters.
2756 */
2757 void
anv_pipeline_setup_l3_config(struct anv_pipeline * pipeline,bool needs_slm)2758 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
2759 {
2760 const struct intel_device_info *devinfo = pipeline->device->info;
2761
2762 const struct intel_l3_weights w =
2763 intel_get_default_l3_weights(devinfo, true, needs_slm);
2764
2765 pipeline->l3_config = intel_get_l3_config(devinfo, w);
2766 }
2767
2768 static uint32_t
get_vs_input_elements(const struct brw_vs_prog_data * vs_prog_data)2769 get_vs_input_elements(const struct brw_vs_prog_data *vs_prog_data)
2770 {
2771 /* Pull inputs_read out of the VS prog data */
2772 const uint64_t inputs_read = vs_prog_data->inputs_read;
2773 const uint64_t double_inputs_read =
2774 vs_prog_data->double_inputs_read & inputs_read;
2775 assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0);
2776 const uint32_t elements = inputs_read >> VERT_ATTRIB_GENERIC0;
2777 const uint32_t elements_double = double_inputs_read >> VERT_ATTRIB_GENERIC0;
2778
2779 return __builtin_popcount(elements) -
2780 __builtin_popcount(elements_double) / 2;
2781 }
2782
2783 static void
anv_graphics_pipeline_emit(struct anv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state)2784 anv_graphics_pipeline_emit(struct anv_graphics_pipeline *pipeline,
2785 const struct vk_graphics_pipeline_state *state)
2786 {
2787 pipeline->view_mask = state->rp->view_mask;
2788
2789 anv_pipeline_setup_l3_config(&pipeline->base.base, false);
2790
2791 if (anv_pipeline_is_primitive(pipeline)) {
2792 const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
2793
2794 /* The total number of vertex elements we need to program. We might need
2795 * a couple more to implement some of the draw parameters.
2796 */
2797 pipeline->svgs_count =
2798 (vs_prog_data->uses_vertexid ||
2799 vs_prog_data->uses_instanceid ||
2800 vs_prog_data->uses_firstvertex ||
2801 vs_prog_data->uses_baseinstance) + vs_prog_data->uses_drawid;
2802
2803 pipeline->vs_input_elements = get_vs_input_elements(vs_prog_data);
2804
2805 pipeline->vertex_input_elems =
2806 (BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_VI) ?
2807 0 : pipeline->vs_input_elements) + pipeline->svgs_count;
2808
2809 /* Our implementation of VK_KHR_multiview uses instancing to draw the
2810 * different views when primitive replication cannot be used. If the
2811 * client asks for instancing, we need to multiply by the client's
2812 * instance count at draw time and instance divisor in the vertex
2813 * bindings by the number of views ensure that we repeat the client's
2814 * per-instance data once for each view.
2815 */
2816 const bool uses_primitive_replication =
2817 anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map.num_pos_slots > 1;
2818 pipeline->instance_multiplier = 1;
2819 if (pipeline->view_mask && !uses_primitive_replication)
2820 pipeline->instance_multiplier = util_bitcount(pipeline->view_mask);
2821 } else {
2822 assert(anv_pipeline_is_mesh(pipeline));
2823 /* TODO(mesh): Mesh vs. Multiview with Instancing. */
2824 }
2825
2826
2827 pipeline->dynamic_patch_control_points =
2828 anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL) &&
2829 BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS) &&
2830 (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->dynamic_push_values &
2831 ANV_DYNAMIC_PUSH_INPUT_VERTICES);
2832
2833 if (pipeline->base.shaders[MESA_SHADER_FRAGMENT] && state->ms) {
2834 pipeline->sample_shading_enable = state->ms->sample_shading_enable;
2835 pipeline->min_sample_shading = state->ms->min_sample_shading;
2836 }
2837
2838 /* Mark all color output as unused by default */
2839 memset(pipeline->color_output_mapping,
2840 ANV_COLOR_OUTPUT_UNUSED,
2841 sizeof(pipeline->color_output_mapping));
2842
2843 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
2844 /* Count the number of color attachments in the binding table */
2845 const struct anv_pipeline_bind_map *bind_map =
2846 &pipeline->base.shaders[MESA_SHADER_FRAGMENT]->bind_map;
2847
2848 if (state->cal != NULL) {
2849 /* Build a map of fragment color output to attachment */
2850 uint8_t rt_to_att[MAX_RTS];
2851 memset(rt_to_att, ANV_COLOR_OUTPUT_DISABLED, MAX_RTS);
2852 for (uint32_t i = 0; i < MAX_RTS; i++) {
2853 if (state->cal->color_map[i] != MESA_VK_ATTACHMENT_UNUSED)
2854 rt_to_att[state->cal->color_map[i]] = i;
2855 }
2856
2857 /* For each fragment shader output if not unused apply the remapping
2858 * to pipeline->color_output_mapping
2859 */
2860 unsigned i;
2861 for (i = 0; i < MIN2(bind_map->surface_count, MAX_RTS); i++) {
2862 if (bind_map->surface_to_descriptor[i].set !=
2863 ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
2864 break;
2865
2866 uint32_t index = bind_map->surface_to_descriptor[i].index;
2867 if (index >= MAX_RTS) {
2868 assert(index <= 0xff);
2869 pipeline->color_output_mapping[i] = index;
2870 } else {
2871 pipeline->color_output_mapping[i] = rt_to_att[i];
2872 }
2873 }
2874 pipeline->num_color_outputs = i;
2875 }
2876 }
2877
2878 const struct anv_device *device = pipeline->base.base.device;
2879 const struct intel_device_info *devinfo = device->info;
2880 anv_genX(devinfo, graphics_pipeline_emit)(pipeline, state);
2881 }
2882
2883 static void
anv_graphics_pipeline_import_layout(struct anv_graphics_base_pipeline * pipeline,struct anv_pipeline_sets_layout * layout)2884 anv_graphics_pipeline_import_layout(struct anv_graphics_base_pipeline *pipeline,
2885 struct anv_pipeline_sets_layout *layout)
2886 {
2887 pipeline->base.layout.independent_sets |= layout->independent_sets;
2888
2889 for (uint32_t s = 0; s < layout->num_sets; s++) {
2890 if (layout->set[s].layout == NULL)
2891 continue;
2892
2893 anv_pipeline_sets_layout_add(&pipeline->base.layout, s,
2894 layout->set[s].layout);
2895 }
2896 }
2897
2898 static void
anv_graphics_pipeline_import_lib(struct anv_graphics_base_pipeline * pipeline,bool link_optimize,bool retain_shaders,struct anv_pipeline_stage * stages,struct anv_graphics_lib_pipeline * lib)2899 anv_graphics_pipeline_import_lib(struct anv_graphics_base_pipeline *pipeline,
2900 bool link_optimize,
2901 bool retain_shaders,
2902 struct anv_pipeline_stage *stages,
2903 struct anv_graphics_lib_pipeline *lib)
2904 {
2905 struct anv_pipeline_sets_layout *lib_layout =
2906 &lib->base.base.layout;
2907 anv_graphics_pipeline_import_layout(pipeline, lib_layout);
2908
2909 /* We can't have shaders specified twice through libraries. */
2910 assert((pipeline->base.active_stages & lib->base.base.active_stages) == 0);
2911
2912 /* VK_EXT_graphics_pipeline_library:
2913 *
2914 * "To perform link time optimizations,
2915 * VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT must
2916 * be specified on all pipeline libraries that are being linked
2917 * together. Implementations should retain any additional information
2918 * needed to perform optimizations at the final link step when this bit
2919 * is present."
2920 */
2921 assert(!link_optimize || lib->retain_shaders);
2922
2923 pipeline->base.active_stages |= lib->base.base.active_stages;
2924
2925 /* Propagate the fragment dynamic flag, unless we're doing link
2926 * optimization, in that case we'll have all the state information and this
2927 * will never be dynamic.
2928 */
2929 if (!link_optimize) {
2930 if (lib->base.fragment_dynamic) {
2931 assert(lib->base.base.active_stages & VK_SHADER_STAGE_FRAGMENT_BIT);
2932 pipeline->fragment_dynamic = true;
2933 }
2934 }
2935
2936 uint32_t shader_count = anv_graphics_pipeline_imported_shader_count(stages);
2937 for (uint32_t s = 0; s < ARRAY_SIZE(lib->base.shaders); s++) {
2938 if (lib->base.shaders[s] == NULL)
2939 continue;
2940
2941 stages[s].stage = s;
2942 stages[s].pipeline_flags = pipeline->base.flags;
2943 stages[s].feedback_idx = shader_count + lib->base.feedback_index[s];
2944 stages[s].robust_flags = lib->base.robust_flags[s];
2945
2946 /* Always import the shader sha1, this will be used for cache lookup. */
2947 memcpy(stages[s].shader_sha1, lib->retained_shaders[s].shader_sha1,
2948 sizeof(stages[s].shader_sha1));
2949 stages[s].source_hash = lib->base.source_hashes[s];
2950
2951 stages[s].subgroup_size_type = lib->retained_shaders[s].subgroup_size_type;
2952 stages[s].imported.nir = lib->retained_shaders[s].nir;
2953 stages[s].imported.bin = lib->base.shaders[s];
2954 }
2955
2956 /* When not link optimizing, import the executables (shader descriptions
2957 * for VK_KHR_pipeline_executable_properties). With link optimization there
2958 * is a chance it'll produce different binaries, so we'll add the optimized
2959 * version later.
2960 */
2961 if (!link_optimize) {
2962 util_dynarray_foreach(&lib->base.base.executables,
2963 struct anv_pipeline_executable, exe) {
2964 util_dynarray_append(&pipeline->base.executables,
2965 struct anv_pipeline_executable, *exe);
2966 }
2967 }
2968 }
2969
2970 static void
anv_graphics_lib_validate_shaders(struct anv_graphics_lib_pipeline * lib,bool retained_shaders)2971 anv_graphics_lib_validate_shaders(struct anv_graphics_lib_pipeline *lib,
2972 bool retained_shaders)
2973 {
2974 for (uint32_t s = 0; s < ARRAY_SIZE(lib->retained_shaders); s++) {
2975 if (anv_pipeline_base_has_stage(&lib->base, s)) {
2976 assert(!retained_shaders || lib->retained_shaders[s].nir != NULL);
2977 assert(lib->base.shaders[s] != NULL);
2978 }
2979 }
2980 }
2981
2982 static VkResult
anv_graphics_lib_pipeline_create(struct anv_device * device,struct vk_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)2983 anv_graphics_lib_pipeline_create(struct anv_device *device,
2984 struct vk_pipeline_cache *cache,
2985 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2986 const VkAllocationCallbacks *pAllocator,
2987 VkPipeline *pPipeline)
2988 {
2989 struct anv_pipeline_stage stages[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {};
2990 VkPipelineCreationFeedback pipeline_feedback = {
2991 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2992 };
2993 int64_t pipeline_start = os_time_get_nano();
2994
2995 struct anv_graphics_lib_pipeline *pipeline;
2996 VkResult result;
2997
2998 const VkPipelineCreateFlags2KHR flags =
2999 vk_graphics_pipeline_create_flags(pCreateInfo);
3000 assert(flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR);
3001
3002 const VkPipelineLibraryCreateInfoKHR *libs_info =
3003 vk_find_struct_const(pCreateInfo->pNext,
3004 PIPELINE_LIBRARY_CREATE_INFO_KHR);
3005
3006 pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
3007 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3008 if (pipeline == NULL)
3009 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3010
3011 result = anv_pipeline_init(&pipeline->base.base, device,
3012 ANV_PIPELINE_GRAPHICS_LIB, flags,
3013 pAllocator);
3014 if (result != VK_SUCCESS) {
3015 vk_free2(&device->vk.alloc, pAllocator, pipeline);
3016 if (result == VK_PIPELINE_COMPILE_REQUIRED)
3017 *pPipeline = VK_NULL_HANDLE;
3018 return result;
3019 }
3020
3021 /* Capture the retain state before we compile/load any shader. */
3022 pipeline->retain_shaders =
3023 (flags & VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT) != 0;
3024
3025 /* If we have libraries, import them first. */
3026 if (libs_info) {
3027 for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
3028 ANV_FROM_HANDLE(anv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
3029 struct anv_graphics_lib_pipeline *gfx_pipeline_lib =
3030 anv_pipeline_to_graphics_lib(pipeline_lib);
3031
3032 vk_graphics_pipeline_state_merge(&pipeline->state, &gfx_pipeline_lib->state);
3033 anv_graphics_pipeline_import_lib(&pipeline->base,
3034 false /* link_optimize */,
3035 pipeline->retain_shaders,
3036 stages, gfx_pipeline_lib);
3037 }
3038 }
3039
3040 result = vk_graphics_pipeline_state_fill(&device->vk,
3041 &pipeline->state, pCreateInfo,
3042 NULL /* driver_rp */,
3043 0 /* driver_rp_flags */,
3044 &pipeline->all_state, NULL, 0, NULL);
3045 if (result != VK_SUCCESS) {
3046 anv_pipeline_finish(&pipeline->base.base, device);
3047 vk_free2(&device->vk.alloc, pAllocator, pipeline);
3048 return result;
3049 }
3050
3051 pipeline->base.base.active_stages = pipeline->state.shader_stages;
3052
3053 /* After we've imported all the libraries' layouts, import the pipeline
3054 * layout and hash the whole lot.
3055 */
3056 ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
3057 if (pipeline_layout != NULL) {
3058 anv_graphics_pipeline_import_layout(&pipeline->base,
3059 &pipeline_layout->sets_layout);
3060 }
3061
3062 anv_pipeline_sets_layout_hash(&pipeline->base.base.layout);
3063
3064 /* Compile shaders. We can skip this if there are no active stage in that
3065 * pipeline.
3066 */
3067 if (pipeline->base.base.active_stages != 0) {
3068 result = anv_graphics_pipeline_compile(&pipeline->base, stages,
3069 cache, &pipeline_feedback,
3070 pCreateInfo, &pipeline->state);
3071 if (result != VK_SUCCESS) {
3072 anv_pipeline_finish(&pipeline->base.base, device);
3073 vk_free2(&device->vk.alloc, pAllocator, pipeline);
3074 return result;
3075 }
3076 }
3077
3078 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
3079
3080 anv_fill_pipeline_creation_feedback(&pipeline->base, &pipeline_feedback,
3081 pCreateInfo, stages);
3082
3083 anv_graphics_lib_validate_shaders(
3084 pipeline,
3085 flags & VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT);
3086
3087 *pPipeline = anv_pipeline_to_handle(&pipeline->base.base);
3088
3089 return VK_SUCCESS;
3090 }
3091
3092 static VkResult
anv_graphics_pipeline_create(struct anv_device * device,struct vk_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3093 anv_graphics_pipeline_create(struct anv_device *device,
3094 struct vk_pipeline_cache *cache,
3095 const VkGraphicsPipelineCreateInfo *pCreateInfo,
3096 const VkAllocationCallbacks *pAllocator,
3097 VkPipeline *pPipeline)
3098 {
3099 struct anv_pipeline_stage stages[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {};
3100 VkPipelineCreationFeedback pipeline_feedback = {
3101 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
3102 };
3103 int64_t pipeline_start = os_time_get_nano();
3104
3105 struct anv_graphics_pipeline *pipeline;
3106 VkResult result;
3107
3108 const VkPipelineCreateFlags2KHR flags =
3109 vk_graphics_pipeline_create_flags(pCreateInfo);
3110 assert((flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR) == 0);
3111
3112 const VkPipelineLibraryCreateInfoKHR *libs_info =
3113 vk_find_struct_const(pCreateInfo->pNext,
3114 PIPELINE_LIBRARY_CREATE_INFO_KHR);
3115
3116 pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
3117 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3118 if (pipeline == NULL)
3119 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3120
3121 /* Initialize some information required by shaders */
3122 result = anv_pipeline_init(&pipeline->base.base, device,
3123 ANV_PIPELINE_GRAPHICS, flags,
3124 pAllocator);
3125 if (result != VK_SUCCESS) {
3126 vk_free2(&device->vk.alloc, pAllocator, pipeline);
3127 return result;
3128 }
3129
3130 const bool link_optimize =
3131 (flags & VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT) != 0;
3132
3133 struct vk_graphics_pipeline_all_state all;
3134 struct vk_graphics_pipeline_state state = { };
3135
3136 /* If we have libraries, import them first. */
3137 if (libs_info) {
3138 for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
3139 ANV_FROM_HANDLE(anv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
3140 struct anv_graphics_lib_pipeline *gfx_pipeline_lib =
3141 anv_pipeline_to_graphics_lib(pipeline_lib);
3142
3143 /* If we have link time optimization, all libraries must be created
3144 * with
3145 * VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT.
3146 */
3147 assert(!link_optimize || gfx_pipeline_lib->retain_shaders);
3148
3149 vk_graphics_pipeline_state_merge(&state, &gfx_pipeline_lib->state);
3150 anv_graphics_pipeline_import_lib(&pipeline->base,
3151 link_optimize,
3152 false,
3153 stages,
3154 gfx_pipeline_lib);
3155 }
3156 }
3157
3158 result = vk_graphics_pipeline_state_fill(&device->vk, &state, pCreateInfo,
3159 NULL /* driver_rp */,
3160 0 /* driver_rp_flags */,
3161 &all, NULL, 0, NULL);
3162 if (result != VK_SUCCESS) {
3163 anv_pipeline_finish(&pipeline->base.base, device);
3164 vk_free2(&device->vk.alloc, pAllocator, pipeline);
3165 return result;
3166 }
3167
3168 pipeline->dynamic_state.vi = &pipeline->vertex_input;
3169 pipeline->dynamic_state.ms.sample_locations = &pipeline->base.sample_locations;
3170 vk_dynamic_graphics_state_fill(&pipeline->dynamic_state, &state);
3171
3172 pipeline->base.base.active_stages = state.shader_stages;
3173
3174 /* Sanity check on the shaders */
3175 assert(pipeline->base.base.active_stages & VK_SHADER_STAGE_VERTEX_BIT ||
3176 pipeline->base.base.active_stages & VK_SHADER_STAGE_MESH_BIT_EXT);
3177
3178 if (anv_pipeline_is_mesh(pipeline)) {
3179 assert(device->physical->vk.supported_extensions.EXT_mesh_shader);
3180 }
3181
3182 /* After we've imported all the libraries' layouts, import the pipeline
3183 * layout and hash the whole lot.
3184 */
3185 ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
3186 if (pipeline_layout != NULL) {
3187 anv_graphics_pipeline_import_layout(&pipeline->base,
3188 &pipeline_layout->sets_layout);
3189 }
3190
3191 anv_pipeline_sets_layout_hash(&pipeline->base.base.layout);
3192
3193 /* Compile shaders, all required information should be have been copied in
3194 * the previous step. We can skip this if there are no active stage in that
3195 * pipeline.
3196 */
3197 result = anv_graphics_pipeline_compile(&pipeline->base, stages,
3198 cache, &pipeline_feedback,
3199 pCreateInfo, &state);
3200 if (result != VK_SUCCESS) {
3201 anv_pipeline_finish(&pipeline->base.base, device);
3202 vk_free2(&device->vk.alloc, pAllocator, pipeline);
3203 return result;
3204 }
3205
3206 /* Prepare a batch for the commands and emit all the non dynamic ones.
3207 */
3208 anv_batch_set_storage(&pipeline->base.base.batch, ANV_NULL_ADDRESS,
3209 pipeline->batch_data, sizeof(pipeline->batch_data));
3210
3211 if (pipeline->base.base.active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3212 pipeline->base.base.active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
3213
3214 if (anv_pipeline_is_mesh(pipeline))
3215 assert(device->physical->vk.supported_extensions.EXT_mesh_shader);
3216
3217 anv_graphics_pipeline_emit(pipeline, &state);
3218
3219 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
3220
3221 anv_fill_pipeline_creation_feedback(&pipeline->base, &pipeline_feedback,
3222 pCreateInfo, stages);
3223
3224 ANV_RMV(graphics_pipeline_create, device, pipeline, false);
3225
3226 *pPipeline = anv_pipeline_to_handle(&pipeline->base.base);
3227
3228 return pipeline->base.base.batch.status;
3229 }
3230
anv_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3231 VkResult anv_CreateGraphicsPipelines(
3232 VkDevice _device,
3233 VkPipelineCache pipelineCache,
3234 uint32_t count,
3235 const VkGraphicsPipelineCreateInfo* pCreateInfos,
3236 const VkAllocationCallbacks* pAllocator,
3237 VkPipeline* pPipelines)
3238 {
3239 ANV_FROM_HANDLE(anv_device, device, _device);
3240 ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
3241
3242 VkResult result = VK_SUCCESS;
3243
3244 unsigned i;
3245 for (i = 0; i < count; i++) {
3246 assert(pCreateInfos[i].sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
3247
3248 const VkPipelineCreateFlags2KHR flags =
3249 vk_graphics_pipeline_create_flags(&pCreateInfos[i]);
3250 VkResult res;
3251 if (flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR) {
3252 res = anv_graphics_lib_pipeline_create(device, pipeline_cache,
3253 &pCreateInfos[i],
3254 pAllocator,
3255 &pPipelines[i]);
3256 } else {
3257 res = anv_graphics_pipeline_create(device,
3258 pipeline_cache,
3259 &pCreateInfos[i],
3260 pAllocator, &pPipelines[i]);
3261 }
3262
3263 if (res != VK_SUCCESS) {
3264 result = res;
3265 if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
3266 break;
3267 pPipelines[i] = VK_NULL_HANDLE;
3268 }
3269 }
3270
3271 for (; i < count; i++)
3272 pPipelines[i] = VK_NULL_HANDLE;
3273
3274 return result;
3275 }
3276
3277 static bool
should_remat_cb(nir_instr * instr,void * data)3278 should_remat_cb(nir_instr *instr, void *data)
3279 {
3280 if (instr->type != nir_instr_type_intrinsic)
3281 return false;
3282
3283 return nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_resource_intel;
3284 }
3285
3286 static VkResult
compile_upload_rt_shader(struct anv_ray_tracing_pipeline * pipeline,struct vk_pipeline_cache * cache,nir_shader * nir,struct anv_pipeline_stage * stage,void * mem_ctx)3287 compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline,
3288 struct vk_pipeline_cache *cache,
3289 nir_shader *nir,
3290 struct anv_pipeline_stage *stage,
3291 void *mem_ctx)
3292 {
3293 const struct brw_compiler *compiler =
3294 pipeline->base.device->physical->compiler;
3295 const struct intel_device_info *devinfo = compiler->devinfo;
3296
3297 nir_shader **resume_shaders = NULL;
3298 uint32_t num_resume_shaders = 0;
3299 if (nir->info.stage != MESA_SHADER_COMPUTE) {
3300 const nir_lower_shader_calls_options opts = {
3301 .address_format = nir_address_format_64bit_global,
3302 .stack_alignment = BRW_BTD_STACK_ALIGN,
3303 .localized_loads = true,
3304 .vectorizer_callback = brw_nir_should_vectorize_mem,
3305 .vectorizer_data = NULL,
3306 .should_remat_callback = should_remat_cb,
3307 };
3308
3309 NIR_PASS(_, nir, nir_lower_shader_calls, &opts,
3310 &resume_shaders, &num_resume_shaders, mem_ctx);
3311 NIR_PASS(_, nir, brw_nir_lower_shader_calls, &stage->key.bs);
3312 NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo);
3313 }
3314
3315 for (unsigned i = 0; i < num_resume_shaders; i++) {
3316 NIR_PASS(_,resume_shaders[i], brw_nir_lower_shader_calls, &stage->key.bs);
3317 NIR_PASS_V(resume_shaders[i], brw_nir_lower_rt_intrinsics, devinfo);
3318 }
3319
3320 struct brw_compile_bs_params params = {
3321 .base = {
3322 .nir = nir,
3323 .stats = stage->stats,
3324 .log_data = pipeline->base.device,
3325 .mem_ctx = mem_ctx,
3326 .source_hash = stage->source_hash,
3327 },
3328 .key = &stage->key.bs,
3329 .prog_data = &stage->prog_data.bs,
3330 .num_resume_shaders = num_resume_shaders,
3331 .resume_shaders = resume_shaders,
3332 };
3333
3334 stage->code = brw_compile_bs(compiler, ¶ms);
3335 if (stage->code == NULL) {
3336 VkResult result;
3337
3338 if (params.base.error_str)
3339 result = vk_errorf(pipeline, VK_ERROR_UNKNOWN, "%s", params.base.error_str);
3340 else
3341 result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
3342
3343 return result;
3344 }
3345
3346 struct anv_shader_upload_params upload_params = {
3347 .stage = stage->stage,
3348 .key_data = &stage->cache_key,
3349 .key_size = sizeof(stage->cache_key),
3350 .kernel_data = stage->code,
3351 .kernel_size = stage->prog_data.base.program_size,
3352 .prog_data = &stage->prog_data.base,
3353 .prog_data_size = brw_prog_data_size(stage->stage),
3354 .stats = stage->stats,
3355 .num_stats = 1,
3356 .bind_map = &stage->bind_map,
3357 .push_desc_info = &stage->push_desc_info,
3358 .dynamic_push_values = stage->dynamic_push_values,
3359 };
3360
3361 stage->bin =
3362 anv_device_upload_kernel(pipeline->base.device, cache, &upload_params);
3363 if (stage->bin == NULL)
3364 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
3365
3366 anv_pipeline_add_executables(&pipeline->base, stage);
3367
3368 return VK_SUCCESS;
3369 }
3370
3371 static bool
is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR * info)3372 is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info)
3373 {
3374 if (info->pDynamicState == NULL)
3375 return false;
3376
3377 for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) {
3378 if (info->pDynamicState->pDynamicStates[i] ==
3379 VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
3380 return true;
3381 }
3382
3383 return false;
3384 }
3385
3386 static void
anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline * pipeline,const VkRayTracingPipelineCreateInfoKHR * info,uint32_t * stack_max)3387 anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline,
3388 const VkRayTracingPipelineCreateInfoKHR *info,
3389 uint32_t *stack_max)
3390 {
3391 if (is_rt_stack_size_dynamic(info)) {
3392 pipeline->stack_size = 0; /* 0 means dynamic */
3393 } else {
3394 /* From the Vulkan spec:
3395 *
3396 * "If the stack size is not set explicitly, the stack size for a
3397 * pipeline is:
3398 *
3399 * rayGenStackMax +
3400 * min(1, maxPipelineRayRecursionDepth) ×
3401 * max(closestHitStackMax, missStackMax,
3402 * intersectionStackMax + anyHitStackMax) +
3403 * max(0, maxPipelineRayRecursionDepth-1) ×
3404 * max(closestHitStackMax, missStackMax) +
3405 * 2 × callableStackMax"
3406 */
3407 pipeline->stack_size =
3408 stack_max[MESA_SHADER_RAYGEN] +
3409 MIN2(1, info->maxPipelineRayRecursionDepth) *
3410 MAX4(stack_max[MESA_SHADER_CLOSEST_HIT],
3411 stack_max[MESA_SHADER_MISS],
3412 stack_max[MESA_SHADER_INTERSECTION],
3413 stack_max[MESA_SHADER_ANY_HIT]) +
3414 MAX2(0, (int)info->maxPipelineRayRecursionDepth - 1) *
3415 MAX2(stack_max[MESA_SHADER_CLOSEST_HIT],
3416 stack_max[MESA_SHADER_MISS]) +
3417 2 * stack_max[MESA_SHADER_CALLABLE];
3418
3419 /* This is an extremely unlikely case but we need to set it to some
3420 * non-zero value so that we don't accidentally think it's dynamic.
3421 * Our minimum stack size is 2KB anyway so we could set to any small
3422 * value we like.
3423 */
3424 if (pipeline->stack_size == 0)
3425 pipeline->stack_size = 1;
3426 }
3427 }
3428
3429 static enum brw_rt_ray_flags
anv_pipeline_get_pipeline_ray_flags(VkPipelineCreateFlags2KHR flags)3430 anv_pipeline_get_pipeline_ray_flags(VkPipelineCreateFlags2KHR flags)
3431 {
3432 uint32_t ray_flags = 0;
3433
3434 const bool rt_skip_triangles =
3435 flags & VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR;
3436 const bool rt_skip_aabbs =
3437 flags & VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_AABBS_BIT_KHR;
3438 assert(!(rt_skip_triangles && rt_skip_aabbs));
3439
3440 if (rt_skip_triangles)
3441 ray_flags |= BRW_RT_RAY_FLAG_SKIP_TRIANGLES;
3442 else if (rt_skip_aabbs)
3443 ray_flags |= BRW_RT_RAY_FLAG_SKIP_AABBS;
3444
3445 return ray_flags;
3446 }
3447
3448 static struct anv_pipeline_stage *
anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline * pipeline,const VkRayTracingPipelineCreateInfoKHR * info,void * tmp_pipeline_ctx)3449 anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline,
3450 const VkRayTracingPipelineCreateInfoKHR *info,
3451 void *tmp_pipeline_ctx)
3452 {
3453 struct anv_device *device = pipeline->base.device;
3454 /* Create enough stage entries for all shader modules plus potential
3455 * combinaisons in the groups.
3456 */
3457 struct anv_pipeline_stage *stages =
3458 rzalloc_array(tmp_pipeline_ctx, struct anv_pipeline_stage, info->stageCount);
3459
3460 enum brw_rt_ray_flags ray_flags =
3461 anv_pipeline_get_pipeline_ray_flags(pipeline->base.flags);
3462
3463 for (uint32_t i = 0; i < info->stageCount; i++) {
3464 const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
3465 if (vk_pipeline_shader_stage_is_null(sinfo))
3466 continue;
3467
3468 int64_t stage_start = os_time_get_nano();
3469
3470 stages[i] = (struct anv_pipeline_stage) {
3471 .stage = vk_to_mesa_shader_stage(sinfo->stage),
3472 .pipeline_flags = pipeline->base.flags,
3473 .pipeline_pNext = info->pNext,
3474 .info = sinfo,
3475 .cache_key = {
3476 .stage = vk_to_mesa_shader_stage(sinfo->stage),
3477 },
3478 .feedback = {
3479 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
3480 },
3481 };
3482
3483 anv_stage_allocate_bind_map_tables(&pipeline->base, &stages[i],
3484 tmp_pipeline_ctx);
3485
3486 pipeline->base.active_stages |= sinfo->stage;
3487
3488 anv_stage_write_shader_hash(&stages[i], device);
3489
3490 populate_bs_prog_key(&stages[i],
3491 pipeline->base.device,
3492 ray_flags);
3493
3494 if (stages[i].stage != MESA_SHADER_INTERSECTION) {
3495 anv_pipeline_hash_ray_tracing_shader(pipeline, &stages[i],
3496 stages[i].cache_key.sha1);
3497 }
3498
3499 stages[i].feedback.duration += os_time_get_nano() - stage_start;
3500 }
3501
3502 for (uint32_t i = 0; i < info->groupCount; i++) {
3503 const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
3504
3505 if (ginfo->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR)
3506 continue;
3507
3508 int64_t stage_start = os_time_get_nano();
3509
3510 uint32_t intersection_idx = ginfo->intersectionShader;
3511 assert(intersection_idx < info->stageCount);
3512
3513 uint32_t any_hit_idx = ginfo->anyHitShader;
3514 if (any_hit_idx != VK_SHADER_UNUSED_KHR) {
3515 assert(any_hit_idx < info->stageCount);
3516 anv_pipeline_hash_ray_tracing_combined_shader(pipeline,
3517 &stages[intersection_idx],
3518 &stages[any_hit_idx],
3519 stages[intersection_idx].cache_key.sha1);
3520 } else {
3521 anv_pipeline_hash_ray_tracing_shader(pipeline,
3522 &stages[intersection_idx],
3523 stages[intersection_idx].cache_key.sha1);
3524 }
3525
3526 stages[intersection_idx].feedback.duration += os_time_get_nano() - stage_start;
3527 }
3528
3529 return stages;
3530 }
3531
3532 static bool
anv_ray_tracing_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline * pipeline,struct vk_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * info,struct anv_pipeline_stage * stages,VkPipelineCreationFeedback * pipeline_feedback)3533 anv_ray_tracing_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline,
3534 struct vk_pipeline_cache *cache,
3535 const VkRayTracingPipelineCreateInfoKHR *info,
3536 struct anv_pipeline_stage *stages,
3537 VkPipelineCreationFeedback *pipeline_feedback)
3538 {
3539 uint32_t shaders = 0, found = 0, cache_hits = 0;
3540 for (uint32_t i = 0; i < info->stageCount; i++) {
3541 if (stages[i].info == NULL)
3542 continue;
3543
3544 shaders++;
3545
3546 int64_t stage_start = os_time_get_nano();
3547
3548 bool cache_hit;
3549 stages[i].bin = anv_device_search_for_kernel(pipeline->base.device, cache,
3550 &stages[i].cache_key,
3551 sizeof(stages[i].cache_key),
3552 &cache_hit);
3553 if (cache_hit) {
3554 cache_hits++;
3555 stages[i].feedback.flags |=
3556 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
3557 }
3558
3559 if (stages[i].bin != NULL) {
3560 found++;
3561 anv_pipeline_add_executables(&pipeline->base, &stages[i]);
3562 }
3563
3564 stages[i].feedback.duration += os_time_get_nano() - stage_start;
3565 }
3566
3567 if (cache_hits == shaders) {
3568 pipeline_feedback->flags |=
3569 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
3570 }
3571
3572 return found == shaders;
3573 }
3574
3575 static VkResult
anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline * pipeline,void * tmp_pipeline_ctx,struct anv_pipeline_stage * stages,struct vk_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * info)3576 anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline,
3577 void *tmp_pipeline_ctx,
3578 struct anv_pipeline_stage *stages,
3579 struct vk_pipeline_cache *cache,
3580 const VkRayTracingPipelineCreateInfoKHR *info)
3581 {
3582 const struct intel_device_info *devinfo = pipeline->base.device->info;
3583 VkResult result;
3584
3585 VkPipelineCreationFeedback pipeline_feedback = {
3586 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
3587 };
3588 int64_t pipeline_start = os_time_get_nano();
3589
3590 const bool skip_cache_lookup =
3591 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
3592
3593 if (!skip_cache_lookup &&
3594 anv_ray_tracing_pipeline_load_cached_shaders(pipeline, cache, info, stages,
3595 &pipeline_feedback)) {
3596 goto done;
3597 }
3598
3599 if (pipeline->base.flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)
3600 return VK_PIPELINE_COMPILE_REQUIRED;
3601
3602 for (uint32_t i = 0; i < info->stageCount; i++) {
3603 if (stages[i].info == NULL)
3604 continue;
3605
3606 /* Intersection and any-hit need to fetch the nir always,
3607 * so that they can be handled correctly below in the group section.
3608 * For the other stages, if we found them in the cache, skip this part.
3609 */
3610 if (!(stages[i].stage == MESA_SHADER_INTERSECTION ||
3611 stages[i].stage == MESA_SHADER_ANY_HIT) &&
3612 stages[i].bin != NULL)
3613 continue;
3614
3615 int64_t stage_start = os_time_get_nano();
3616
3617 VkResult result = anv_pipeline_stage_get_nir(&pipeline->base, cache,
3618 tmp_pipeline_ctx,
3619 &stages[i]);
3620 if (result != VK_SUCCESS)
3621 return result;
3622
3623 anv_pipeline_nir_preprocess(&pipeline->base, &stages[i]);
3624
3625 anv_pipeline_lower_nir(&pipeline->base, tmp_pipeline_ctx, &stages[i],
3626 &pipeline->base.layout, 0 /* view_mask */,
3627 false /* use_primitive_replication */);
3628
3629 stages[i].feedback.duration += os_time_get_nano() - stage_start;
3630 }
3631
3632 for (uint32_t i = 0; i < info->stageCount; i++) {
3633 if (stages[i].info == NULL)
3634 continue;
3635
3636 /* Shader found in cache already. */
3637 if (stages[i].bin != NULL)
3638 continue;
3639
3640 /* We handle intersection shaders as part of the group */
3641 if (stages[i].stage == MESA_SHADER_INTERSECTION)
3642 continue;
3643
3644 int64_t stage_start = os_time_get_nano();
3645
3646 void *tmp_stage_ctx = ralloc_context(tmp_pipeline_ctx);
3647
3648 nir_shader *nir = nir_shader_clone(tmp_stage_ctx, stages[i].nir);
3649 switch (stages[i].stage) {
3650 case MESA_SHADER_RAYGEN:
3651 brw_nir_lower_raygen(nir);
3652 break;
3653
3654 case MESA_SHADER_ANY_HIT:
3655 brw_nir_lower_any_hit(nir, devinfo);
3656 break;
3657
3658 case MESA_SHADER_CLOSEST_HIT:
3659 brw_nir_lower_closest_hit(nir);
3660 break;
3661
3662 case MESA_SHADER_MISS:
3663 brw_nir_lower_miss(nir);
3664 break;
3665
3666 case MESA_SHADER_INTERSECTION:
3667 unreachable("These are handled later");
3668
3669 case MESA_SHADER_CALLABLE:
3670 brw_nir_lower_callable(nir);
3671 break;
3672
3673 default:
3674 unreachable("Invalid ray-tracing shader stage");
3675 }
3676
3677 result = compile_upload_rt_shader(pipeline, cache, nir, &stages[i],
3678 tmp_stage_ctx);
3679 if (result != VK_SUCCESS) {
3680 ralloc_free(tmp_stage_ctx);
3681 return result;
3682 }
3683
3684 ralloc_free(tmp_stage_ctx);
3685
3686 stages[i].feedback.duration += os_time_get_nano() - stage_start;
3687 }
3688
3689 done:
3690 for (uint32_t i = 0; i < info->groupCount; i++) {
3691 const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
3692 struct anv_rt_shader_group *group = &pipeline->groups[i];
3693 group->type = ginfo->type;
3694 switch (ginfo->type) {
3695 case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
3696 assert(ginfo->generalShader < info->stageCount);
3697 group->general = stages[ginfo->generalShader].bin;
3698 break;
3699
3700 case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
3701 if (ginfo->anyHitShader < info->stageCount)
3702 group->any_hit = stages[ginfo->anyHitShader].bin;
3703
3704 if (ginfo->closestHitShader < info->stageCount)
3705 group->closest_hit = stages[ginfo->closestHitShader].bin;
3706 break;
3707
3708 case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: {
3709 if (ginfo->closestHitShader < info->stageCount)
3710 group->closest_hit = stages[ginfo->closestHitShader].bin;
3711
3712 uint32_t intersection_idx = info->pGroups[i].intersectionShader;
3713 assert(intersection_idx < info->stageCount);
3714
3715 /* Only compile this stage if not already found in the cache. */
3716 if (stages[intersection_idx].bin == NULL) {
3717 /* The any-hit and intersection shader have to be combined */
3718 uint32_t any_hit_idx = info->pGroups[i].anyHitShader;
3719 const nir_shader *any_hit = NULL;
3720 if (any_hit_idx < info->stageCount)
3721 any_hit = stages[any_hit_idx].nir;
3722
3723 void *tmp_group_ctx = ralloc_context(tmp_pipeline_ctx);
3724 nir_shader *intersection =
3725 nir_shader_clone(tmp_group_ctx, stages[intersection_idx].nir);
3726
3727 brw_nir_lower_combined_intersection_any_hit(intersection, any_hit,
3728 devinfo);
3729
3730 result = compile_upload_rt_shader(pipeline, cache,
3731 intersection,
3732 &stages[intersection_idx],
3733 tmp_group_ctx);
3734 ralloc_free(tmp_group_ctx);
3735 if (result != VK_SUCCESS)
3736 return result;
3737 }
3738
3739 group->intersection = stages[intersection_idx].bin;
3740 break;
3741 }
3742
3743 default:
3744 unreachable("Invalid ray tracing shader group type");
3745 }
3746 }
3747
3748 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
3749
3750 const VkPipelineCreationFeedbackCreateInfo *create_feedback =
3751 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
3752 if (create_feedback) {
3753 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
3754
3755 uint32_t stage_count = create_feedback->pipelineStageCreationFeedbackCount;
3756 assert(stage_count == 0 || info->stageCount == stage_count);
3757 for (uint32_t i = 0; i < stage_count; i++) {
3758 gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
3759 create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
3760 }
3761 }
3762
3763 return VK_SUCCESS;
3764 }
3765
3766 VkResult
anv_device_init_rt_shaders(struct anv_device * device)3767 anv_device_init_rt_shaders(struct anv_device *device)
3768 {
3769 device->bvh_build_method = ANV_BVH_BUILD_METHOD_NEW_SAH;
3770
3771 if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
3772 return VK_SUCCESS;
3773
3774 bool cache_hit;
3775
3776 struct anv_push_descriptor_info empty_push_desc_info = {};
3777 struct anv_pipeline_bind_map empty_bind_map = {};
3778 struct brw_rt_trampoline {
3779 char name[16];
3780 struct brw_cs_prog_key key;
3781 } trampoline_key = {
3782 .name = "rt-trampoline",
3783 };
3784 device->rt_trampoline =
3785 anv_device_search_for_kernel(device, device->internal_cache,
3786 &trampoline_key, sizeof(trampoline_key),
3787 &cache_hit);
3788 if (device->rt_trampoline == NULL) {
3789
3790 void *tmp_ctx = ralloc_context(NULL);
3791 nir_shader *trampoline_nir =
3792 brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);
3793
3794 if (device->info->ver >= 20)
3795 trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_16;
3796 else
3797 trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8;
3798
3799 struct brw_cs_prog_data trampoline_prog_data = {
3800 .uses_btd_stack_ids = true,
3801 };
3802 struct brw_compile_cs_params params = {
3803 .base = {
3804 .nir = trampoline_nir,
3805 .log_data = device,
3806 .mem_ctx = tmp_ctx,
3807 },
3808 .key = &trampoline_key.key,
3809 .prog_data = &trampoline_prog_data,
3810 };
3811 const unsigned *tramp_data =
3812 brw_compile_cs(device->physical->compiler, ¶ms);
3813
3814 struct anv_shader_upload_params upload_params = {
3815 .stage = MESA_SHADER_COMPUTE,
3816 .key_data = &trampoline_key,
3817 .key_size = sizeof(trampoline_key),
3818 .kernel_data = tramp_data,
3819 .kernel_size = trampoline_prog_data.base.program_size,
3820 .prog_data = &trampoline_prog_data.base,
3821 .prog_data_size = sizeof(trampoline_prog_data),
3822 .bind_map = &empty_bind_map,
3823 .push_desc_info = &empty_push_desc_info,
3824 };
3825
3826 device->rt_trampoline =
3827 anv_device_upload_kernel(device, device->internal_cache,
3828 &upload_params);
3829
3830 ralloc_free(tmp_ctx);
3831
3832 if (device->rt_trampoline == NULL)
3833 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3834 }
3835
3836 /* The cache already has a reference and it's not going anywhere so there
3837 * is no need to hold a second reference.
3838 */
3839 anv_shader_bin_unref(device, device->rt_trampoline);
3840
3841 struct brw_rt_trivial_return {
3842 char name[16];
3843 struct brw_bs_prog_key key;
3844 } return_key = {
3845 .name = "rt-trivial-ret",
3846 };
3847 device->rt_trivial_return =
3848 anv_device_search_for_kernel(device, device->internal_cache,
3849 &return_key, sizeof(return_key),
3850 &cache_hit);
3851 if (device->rt_trivial_return == NULL) {
3852 void *tmp_ctx = ralloc_context(NULL);
3853 nir_shader *trivial_return_nir =
3854 brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx);
3855
3856 NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, device->info);
3857
3858 struct brw_bs_prog_data return_prog_data = { 0, };
3859 struct brw_compile_bs_params params = {
3860 .base = {
3861 .nir = trivial_return_nir,
3862 .log_data = device,
3863 .mem_ctx = tmp_ctx,
3864 },
3865 .key = &return_key.key,
3866 .prog_data = &return_prog_data,
3867 };
3868 const unsigned *return_data =
3869 brw_compile_bs(device->physical->compiler, ¶ms);
3870
3871 struct anv_shader_upload_params upload_params = {
3872 .stage = MESA_SHADER_CALLABLE,
3873 .key_data = &return_key,
3874 .key_size = sizeof(return_key),
3875 .kernel_data = return_data,
3876 .kernel_size = return_prog_data.base.program_size,
3877 .prog_data = &return_prog_data.base,
3878 .prog_data_size = sizeof(return_prog_data),
3879 .bind_map = &empty_bind_map,
3880 .push_desc_info = &empty_push_desc_info,
3881 };
3882
3883 device->rt_trivial_return =
3884 anv_device_upload_kernel(device, device->internal_cache,
3885 &upload_params);
3886
3887 ralloc_free(tmp_ctx);
3888
3889 if (device->rt_trivial_return == NULL)
3890 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3891 }
3892
3893 /* The cache already has a reference and it's not going anywhere so there
3894 * is no need to hold a second reference.
3895 */
3896 anv_shader_bin_unref(device, device->rt_trivial_return);
3897
3898 return VK_SUCCESS;
3899 }
3900
3901 void
anv_device_finish_rt_shaders(struct anv_device * device)3902 anv_device_finish_rt_shaders(struct anv_device *device)
3903 {
3904 if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
3905 return;
3906 }
3907
3908 static void
anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline * pipeline,struct anv_device * device,struct vk_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * alloc)3909 anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
3910 struct anv_device *device,
3911 struct vk_pipeline_cache *cache,
3912 const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
3913 const VkAllocationCallbacks *alloc)
3914 {
3915 util_dynarray_init(&pipeline->shaders, pipeline->base.mem_ctx);
3916
3917 ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
3918 anv_pipeline_init_layout(&pipeline->base, pipeline_layout);
3919
3920 anv_pipeline_setup_l3_config(&pipeline->base, /* needs_slm */ false);
3921 }
3922
3923 static void
assert_rt_stage_index_valid(const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,uint32_t stage_idx,VkShaderStageFlags valid_stages)3924 assert_rt_stage_index_valid(const VkRayTracingPipelineCreateInfoKHR* pCreateInfo,
3925 uint32_t stage_idx,
3926 VkShaderStageFlags valid_stages)
3927 {
3928 if (stage_idx == VK_SHADER_UNUSED_KHR)
3929 return;
3930
3931 assert(stage_idx <= pCreateInfo->stageCount);
3932 assert(util_bitcount(pCreateInfo->pStages[stage_idx].stage) == 1);
3933 assert(pCreateInfo->pStages[stage_idx].stage & valid_stages);
3934 }
3935
3936 static VkResult
anv_ray_tracing_pipeline_create(VkDevice _device,struct vk_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3937 anv_ray_tracing_pipeline_create(
3938 VkDevice _device,
3939 struct vk_pipeline_cache * cache,
3940 const VkRayTracingPipelineCreateInfoKHR* pCreateInfo,
3941 const VkAllocationCallbacks* pAllocator,
3942 VkPipeline* pPipeline)
3943 {
3944 ANV_FROM_HANDLE(anv_device, device, _device);
3945 VkResult result;
3946
3947 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR);
3948
3949 uint32_t group_count = pCreateInfo->groupCount;
3950 if (pCreateInfo->pLibraryInfo) {
3951 for (uint32_t l = 0; l < pCreateInfo->pLibraryInfo->libraryCount; l++) {
3952 ANV_FROM_HANDLE(anv_pipeline, library,
3953 pCreateInfo->pLibraryInfo->pLibraries[l]);
3954 struct anv_ray_tracing_pipeline *rt_library =
3955 anv_pipeline_to_ray_tracing(library);
3956 group_count += rt_library->group_count;
3957 }
3958 }
3959
3960 VK_MULTIALLOC(ma);
3961 VK_MULTIALLOC_DECL(&ma, struct anv_ray_tracing_pipeline, pipeline, 1);
3962 VK_MULTIALLOC_DECL(&ma, struct anv_rt_shader_group, groups, group_count);
3963 if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator,
3964 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
3965 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3966
3967 result = anv_pipeline_init(&pipeline->base, device,
3968 ANV_PIPELINE_RAY_TRACING,
3969 vk_rt_pipeline_create_flags(pCreateInfo),
3970 pAllocator);
3971 if (result != VK_SUCCESS) {
3972 vk_free2(&device->vk.alloc, pAllocator, pipeline);
3973 return result;
3974 }
3975
3976 pipeline->group_count = group_count;
3977 pipeline->groups = groups;
3978
3979 ASSERTED const VkShaderStageFlags ray_tracing_stages =
3980 VK_SHADER_STAGE_RAYGEN_BIT_KHR |
3981 VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
3982 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
3983 VK_SHADER_STAGE_MISS_BIT_KHR |
3984 VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
3985 VK_SHADER_STAGE_CALLABLE_BIT_KHR;
3986
3987 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
3988 assert((pCreateInfo->pStages[i].stage & ~ray_tracing_stages) == 0);
3989
3990 for (uint32_t i = 0; i < pCreateInfo->groupCount; i++) {
3991 const VkRayTracingShaderGroupCreateInfoKHR *ginfo =
3992 &pCreateInfo->pGroups[i];
3993 assert_rt_stage_index_valid(pCreateInfo, ginfo->generalShader,
3994 VK_SHADER_STAGE_RAYGEN_BIT_KHR |
3995 VK_SHADER_STAGE_MISS_BIT_KHR |
3996 VK_SHADER_STAGE_CALLABLE_BIT_KHR);
3997 assert_rt_stage_index_valid(pCreateInfo, ginfo->closestHitShader,
3998 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR);
3999 assert_rt_stage_index_valid(pCreateInfo, ginfo->anyHitShader,
4000 VK_SHADER_STAGE_ANY_HIT_BIT_KHR);
4001 assert_rt_stage_index_valid(pCreateInfo, ginfo->intersectionShader,
4002 VK_SHADER_STAGE_INTERSECTION_BIT_KHR);
4003 switch (ginfo->type) {
4004 case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
4005 assert(ginfo->generalShader < pCreateInfo->stageCount);
4006 assert(ginfo->anyHitShader == VK_SHADER_UNUSED_KHR);
4007 assert(ginfo->closestHitShader == VK_SHADER_UNUSED_KHR);
4008 assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR);
4009 break;
4010
4011 case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
4012 assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR);
4013 assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR);
4014 break;
4015
4016 case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR:
4017 assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR);
4018 break;
4019
4020 default:
4021 unreachable("Invalid ray-tracing shader group type");
4022 }
4023 }
4024
4025 anv_ray_tracing_pipeline_init(pipeline, device, cache,
4026 pCreateInfo, pAllocator);
4027
4028 void *tmp_ctx = ralloc_context(NULL);
4029
4030 struct anv_pipeline_stage *stages =
4031 anv_pipeline_init_ray_tracing_stages(pipeline, pCreateInfo, tmp_ctx);
4032
4033 result = anv_pipeline_compile_ray_tracing(pipeline, tmp_ctx, stages,
4034 cache, pCreateInfo);
4035 if (result != VK_SUCCESS) {
4036 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
4037 if (stages[i].bin != NULL)
4038 anv_shader_bin_unref(device, stages[i].bin);
4039 }
4040 ralloc_free(tmp_ctx);
4041 anv_pipeline_finish(&pipeline->base, device);
4042 vk_free2(&device->vk.alloc, pAllocator, pipeline);
4043 return result;
4044 }
4045
4046 /* Compute the size of the scratch BO (for register spilling) by taking the
4047 * max of all the shaders in the pipeline. Also add the shaders to the list
4048 * of executables.
4049 */
4050 uint32_t stack_max[MESA_VULKAN_SHADER_STAGES] = {};
4051 for (uint32_t s = 0; s < pCreateInfo->stageCount; s++) {
4052 util_dynarray_append(&pipeline->shaders,
4053 struct anv_shader_bin *,
4054 stages[s].bin);
4055
4056 uint32_t stack_size =
4057 brw_bs_prog_data_const(stages[s].bin->prog_data)->max_stack_size;
4058 stack_max[stages[s].stage] = MAX2(stack_max[stages[s].stage], stack_size);
4059
4060 anv_pipeline_account_shader(&pipeline->base, stages[s].bin);
4061 }
4062
4063 anv_pipeline_compute_ray_tracing_stacks(pipeline, pCreateInfo, stack_max);
4064
4065 if (pCreateInfo->pLibraryInfo) {
4066 uint32_t g = pCreateInfo->groupCount;
4067 for (uint32_t l = 0; l < pCreateInfo->pLibraryInfo->libraryCount; l++) {
4068 ANV_FROM_HANDLE(anv_pipeline, library,
4069 pCreateInfo->pLibraryInfo->pLibraries[l]);
4070 struct anv_ray_tracing_pipeline *rt_library =
4071 anv_pipeline_to_ray_tracing(library);
4072 for (uint32_t lg = 0; lg < rt_library->group_count; lg++) {
4073 pipeline->groups[g] = rt_library->groups[lg];
4074 pipeline->groups[g].imported = true;
4075 g++;
4076 }
4077
4078 /* Account for shaders in the library. */
4079 util_dynarray_foreach(&rt_library->shaders,
4080 struct anv_shader_bin *, shader) {
4081 util_dynarray_append(&pipeline->shaders,
4082 struct anv_shader_bin *,
4083 anv_shader_bin_ref(*shader));
4084 anv_pipeline_account_shader(&pipeline->base, *shader);
4085 }
4086
4087 /* Add the library shaders to this pipeline's executables. */
4088 util_dynarray_foreach(&rt_library->base.executables,
4089 struct anv_pipeline_executable, exe) {
4090 util_dynarray_append(&pipeline->base.executables,
4091 struct anv_pipeline_executable, *exe);
4092 }
4093
4094 pipeline->base.active_stages |= rt_library->base.active_stages;
4095 }
4096 }
4097
4098 anv_genX(device->info, ray_tracing_pipeline_emit)(pipeline);
4099
4100 ralloc_free(tmp_ctx);
4101
4102 ANV_RMV(rt_pipeline_create, device, pipeline, false);
4103
4104 *pPipeline = anv_pipeline_to_handle(&pipeline->base);
4105
4106 return pipeline->base.batch.status;
4107 }
4108
4109 VkResult
anv_CreateRayTracingPipelinesKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkRayTracingPipelineCreateInfoKHR * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)4110 anv_CreateRayTracingPipelinesKHR(
4111 VkDevice _device,
4112 VkDeferredOperationKHR deferredOperation,
4113 VkPipelineCache pipelineCache,
4114 uint32_t createInfoCount,
4115 const VkRayTracingPipelineCreateInfoKHR* pCreateInfos,
4116 const VkAllocationCallbacks* pAllocator,
4117 VkPipeline* pPipelines)
4118 {
4119 ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
4120
4121 VkResult result = VK_SUCCESS;
4122
4123 unsigned i;
4124 for (i = 0; i < createInfoCount; i++) {
4125 const VkPipelineCreateFlags2KHR flags =
4126 vk_rt_pipeline_create_flags(&pCreateInfos[i]);
4127 VkResult res = anv_ray_tracing_pipeline_create(_device, pipeline_cache,
4128 &pCreateInfos[i],
4129 pAllocator, &pPipelines[i]);
4130
4131 if (res != VK_SUCCESS) {
4132 result = res;
4133 if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
4134 break;
4135 pPipelines[i] = VK_NULL_HANDLE;
4136 }
4137 }
4138
4139 for (; i < createInfoCount; i++)
4140 pPipelines[i] = VK_NULL_HANDLE;
4141
4142 return result;
4143 }
4144
4145 #define WRITE_STR(field, ...) ({ \
4146 memset(field, 0, sizeof(field)); \
4147 UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
4148 assert(i > 0 && i < sizeof(field)); \
4149 })
4150
anv_GetPipelineExecutablePropertiesKHR(VkDevice device,const VkPipelineInfoKHR * pPipelineInfo,uint32_t * pExecutableCount,VkPipelineExecutablePropertiesKHR * pProperties)4151 VkResult anv_GetPipelineExecutablePropertiesKHR(
4152 VkDevice device,
4153 const VkPipelineInfoKHR* pPipelineInfo,
4154 uint32_t* pExecutableCount,
4155 VkPipelineExecutablePropertiesKHR* pProperties)
4156 {
4157 ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);
4158 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
4159 pProperties, pExecutableCount);
4160
4161 util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) {
4162 vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
4163 gl_shader_stage stage = exe->stage;
4164 props->stages = mesa_to_vk_shader_stage(stage);
4165
4166 unsigned simd_width = exe->stats.dispatch_width;
4167 if (stage == MESA_SHADER_FRAGMENT) {
4168 if (exe->stats.max_polygons > 1)
4169 WRITE_STR(props->name, "SIMD%dx%d %s",
4170 exe->stats.max_polygons,
4171 simd_width / exe->stats.max_polygons,
4172 _mesa_shader_stage_to_string(stage));
4173 else
4174 WRITE_STR(props->name, "%s%d %s",
4175 simd_width ? "SIMD" : "vec",
4176 simd_width ? simd_width : 4,
4177 _mesa_shader_stage_to_string(stage));
4178 } else {
4179 WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage));
4180 }
4181 WRITE_STR(props->description, "%s%d %s shader",
4182 simd_width ? "SIMD" : "vec",
4183 simd_width ? simd_width : 4,
4184 _mesa_shader_stage_to_string(stage));
4185
4186 /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan
4187 * wants a subgroup size of 1.
4188 */
4189 props->subgroupSize = MAX2(simd_width, 1);
4190 }
4191 }
4192
4193 return vk_outarray_status(&out);
4194 }
4195
4196 static const struct anv_pipeline_executable *
anv_pipeline_get_executable(struct anv_pipeline * pipeline,uint32_t index)4197 anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)
4198 {
4199 assert(index < util_dynarray_num_elements(&pipeline->executables,
4200 struct anv_pipeline_executable));
4201 return util_dynarray_element(
4202 &pipeline->executables, struct anv_pipeline_executable, index);
4203 }
4204
anv_GetPipelineExecutableStatisticsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pStatisticCount,VkPipelineExecutableStatisticKHR * pStatistics)4205 VkResult anv_GetPipelineExecutableStatisticsKHR(
4206 VkDevice device,
4207 const VkPipelineExecutableInfoKHR* pExecutableInfo,
4208 uint32_t* pStatisticCount,
4209 VkPipelineExecutableStatisticKHR* pStatistics)
4210 {
4211 ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
4212 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
4213 pStatistics, pStatisticCount);
4214
4215 const struct anv_pipeline_executable *exe =
4216 anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
4217
4218 const struct brw_stage_prog_data *prog_data;
4219 switch (pipeline->type) {
4220 case ANV_PIPELINE_GRAPHICS:
4221 case ANV_PIPELINE_GRAPHICS_LIB: {
4222 prog_data = anv_pipeline_to_graphics_base(pipeline)->shaders[exe->stage]->prog_data;
4223 break;
4224 }
4225 case ANV_PIPELINE_COMPUTE: {
4226 prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;
4227 break;
4228 }
4229 case ANV_PIPELINE_RAY_TRACING: {
4230 struct anv_shader_bin **shader =
4231 util_dynarray_element(&anv_pipeline_to_ray_tracing(pipeline)->shaders,
4232 struct anv_shader_bin *,
4233 pExecutableInfo->executableIndex);
4234 prog_data = (*shader)->prog_data;
4235 break;
4236 }
4237 default:
4238 unreachable("invalid pipeline type");
4239 }
4240
4241 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4242 WRITE_STR(stat->name, "Instruction Count");
4243 WRITE_STR(stat->description,
4244 "Number of GEN instructions in the final generated "
4245 "shader executable.");
4246 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4247 stat->value.u64 = exe->stats.instructions;
4248 }
4249
4250 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4251 WRITE_STR(stat->name, "SEND Count");
4252 WRITE_STR(stat->description,
4253 "Number of instructions in the final generated shader "
4254 "executable which access external units such as the "
4255 "constant cache or the sampler.");
4256 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4257 stat->value.u64 = exe->stats.sends;
4258 }
4259
4260 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4261 WRITE_STR(stat->name, "Loop Count");
4262 WRITE_STR(stat->description,
4263 "Number of loops (not unrolled) in the final generated "
4264 "shader executable.");
4265 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4266 stat->value.u64 = exe->stats.loops;
4267 }
4268
4269 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4270 WRITE_STR(stat->name, "Cycle Count");
4271 WRITE_STR(stat->description,
4272 "Estimate of the number of EU cycles required to execute "
4273 "the final generated executable. This is an estimate only "
4274 "and may vary greatly from actual run-time performance.");
4275 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4276 stat->value.u64 = exe->stats.cycles;
4277 }
4278
4279 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4280 WRITE_STR(stat->name, "Spill Count");
4281 WRITE_STR(stat->description,
4282 "Number of scratch spill operations. This gives a rough "
4283 "estimate of the cost incurred due to spilling temporary "
4284 "values to memory. If this is non-zero, you may want to "
4285 "adjust your shader to reduce register pressure.");
4286 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4287 stat->value.u64 = exe->stats.spills;
4288 }
4289
4290 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4291 WRITE_STR(stat->name, "Fill Count");
4292 WRITE_STR(stat->description,
4293 "Number of scratch fill operations. This gives a rough "
4294 "estimate of the cost incurred due to spilling temporary "
4295 "values to memory. If this is non-zero, you may want to "
4296 "adjust your shader to reduce register pressure.");
4297 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4298 stat->value.u64 = exe->stats.fills;
4299 }
4300
4301 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4302 WRITE_STR(stat->name, "Scratch Memory Size");
4303 WRITE_STR(stat->description,
4304 "Number of bytes of scratch memory required by the "
4305 "generated shader executable. If this is non-zero, you "
4306 "may want to adjust your shader to reduce register "
4307 "pressure.");
4308 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4309 stat->value.u64 = prog_data->total_scratch;
4310 }
4311
4312 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4313 WRITE_STR(stat->name, "Max dispatch width");
4314 WRITE_STR(stat->description,
4315 "Largest SIMD dispatch width.");
4316 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4317 /* Report the max dispatch width only on the smallest SIMD variant */
4318 if (exe->stage != MESA_SHADER_FRAGMENT || exe->stats.dispatch_width == 8)
4319 stat->value.u64 = exe->stats.max_dispatch_width;
4320 else
4321 stat->value.u64 = 0;
4322 }
4323
4324 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4325 WRITE_STR(stat->name, "Max live registers");
4326 WRITE_STR(stat->description,
4327 "Maximum number of registers used across the entire shader.");
4328 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4329 stat->value.u64 = exe->stats.max_live_registers;
4330 }
4331
4332 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4333 WRITE_STR(stat->name, "Workgroup Memory Size");
4334 WRITE_STR(stat->description,
4335 "Number of bytes of workgroup shared memory used by this "
4336 "shader including any padding.");
4337 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4338 if (gl_shader_stage_uses_workgroup(exe->stage))
4339 stat->value.u64 = prog_data->total_shared;
4340 else
4341 stat->value.u64 = 0;
4342 }
4343
4344 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4345 uint32_t hash = pipeline->type == ANV_PIPELINE_COMPUTE ?
4346 anv_pipeline_to_compute(pipeline)->source_hash :
4347 (pipeline->type == ANV_PIPELINE_GRAPHICS_LIB ||
4348 pipeline->type == ANV_PIPELINE_GRAPHICS) ?
4349 anv_pipeline_to_graphics_base(pipeline)->source_hashes[exe->stage] :
4350 0 /* No source hash for ray tracing */;
4351 WRITE_STR(stat->name, "Source hash");
4352 WRITE_STR(stat->description,
4353 "hash = 0x%08x. Hash generated from shader source.", hash);
4354 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4355 stat->value.u64 = hash;
4356 }
4357
4358 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4359 WRITE_STR(stat->name, "Non SSA regs after NIR");
4360 WRITE_STR(stat->description, "Non SSA regs after NIR translation to BRW.");
4361 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4362 stat->value.u64 = exe->stats.non_ssa_registers_after_nir;
4363 }
4364
4365 return vk_outarray_status(&out);
4366 }
4367
4368 static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR * ir,const char * data)4369 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
4370 const char *data)
4371 {
4372 ir->isText = VK_TRUE;
4373
4374 size_t data_len = strlen(data) + 1;
4375
4376 if (ir->pData == NULL) {
4377 ir->dataSize = data_len;
4378 return true;
4379 }
4380
4381 strncpy(ir->pData, data, ir->dataSize);
4382 if (ir->dataSize < data_len)
4383 return false;
4384
4385 ir->dataSize = data_len;
4386 return true;
4387 }
4388
anv_GetPipelineExecutableInternalRepresentationsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pInternalRepresentationCount,VkPipelineExecutableInternalRepresentationKHR * pInternalRepresentations)4389 VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
4390 VkDevice device,
4391 const VkPipelineExecutableInfoKHR* pExecutableInfo,
4392 uint32_t* pInternalRepresentationCount,
4393 VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
4394 {
4395 ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
4396 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
4397 pInternalRepresentations, pInternalRepresentationCount);
4398 bool incomplete_text = false;
4399
4400 const struct anv_pipeline_executable *exe =
4401 anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
4402
4403 if (exe->nir) {
4404 vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
4405 WRITE_STR(ir->name, "Final NIR");
4406 WRITE_STR(ir->description,
4407 "Final NIR before going into the back-end compiler");
4408
4409 if (!write_ir_text(ir, exe->nir))
4410 incomplete_text = true;
4411 }
4412 }
4413
4414 if (exe->disasm) {
4415 vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
4416 WRITE_STR(ir->name, "GEN Assembly");
4417 WRITE_STR(ir->description,
4418 "Final GEN assembly for the generated shader binary");
4419
4420 if (!write_ir_text(ir, exe->disasm))
4421 incomplete_text = true;
4422 }
4423 }
4424
4425 return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
4426 }
4427
4428 VkResult
anv_GetRayTracingShaderGroupHandlesKHR(VkDevice _device,VkPipeline _pipeline,uint32_t firstGroup,uint32_t groupCount,size_t dataSize,void * pData)4429 anv_GetRayTracingShaderGroupHandlesKHR(
4430 VkDevice _device,
4431 VkPipeline _pipeline,
4432 uint32_t firstGroup,
4433 uint32_t groupCount,
4434 size_t dataSize,
4435 void* pData)
4436 {
4437 ANV_FROM_HANDLE(anv_device, device, _device);
4438 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
4439
4440 if (pipeline->type != ANV_PIPELINE_RAY_TRACING)
4441 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
4442
4443 struct anv_ray_tracing_pipeline *rt_pipeline =
4444 anv_pipeline_to_ray_tracing(pipeline);
4445
4446 assert(firstGroup + groupCount <= rt_pipeline->group_count);
4447 for (uint32_t i = 0; i < groupCount; i++) {
4448 struct anv_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i];
4449 memcpy(pData, group->handle, sizeof(group->handle));
4450 pData += sizeof(group->handle);
4451 }
4452
4453 return VK_SUCCESS;
4454 }
4455
4456 VkResult
anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(VkDevice _device,VkPipeline pipeline,uint32_t firstGroup,uint32_t groupCount,size_t dataSize,void * pData)4457 anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(
4458 VkDevice _device,
4459 VkPipeline pipeline,
4460 uint32_t firstGroup,
4461 uint32_t groupCount,
4462 size_t dataSize,
4463 void* pData)
4464 {
4465 ANV_FROM_HANDLE(anv_device, device, _device);
4466 unreachable("Unimplemented");
4467 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
4468 }
4469
4470 VkDeviceSize
anv_GetRayTracingShaderGroupStackSizeKHR(VkDevice device,VkPipeline _pipeline,uint32_t group,VkShaderGroupShaderKHR groupShader)4471 anv_GetRayTracingShaderGroupStackSizeKHR(
4472 VkDevice device,
4473 VkPipeline _pipeline,
4474 uint32_t group,
4475 VkShaderGroupShaderKHR groupShader)
4476 {
4477 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
4478 assert(pipeline->type == ANV_PIPELINE_RAY_TRACING);
4479
4480 struct anv_ray_tracing_pipeline *rt_pipeline =
4481 anv_pipeline_to_ray_tracing(pipeline);
4482
4483 assert(group < rt_pipeline->group_count);
4484
4485 struct anv_shader_bin *bin;
4486 switch (groupShader) {
4487 case VK_SHADER_GROUP_SHADER_GENERAL_KHR:
4488 bin = rt_pipeline->groups[group].general;
4489 break;
4490
4491 case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR:
4492 bin = rt_pipeline->groups[group].closest_hit;
4493 break;
4494
4495 case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR:
4496 bin = rt_pipeline->groups[group].any_hit;
4497 break;
4498
4499 case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR:
4500 bin = rt_pipeline->groups[group].intersection;
4501 break;
4502
4503 default:
4504 unreachable("Invalid VkShaderGroupShader enum");
4505 }
4506
4507 if (bin == NULL)
4508 return 0;
4509
4510 return brw_bs_prog_data_const(bin->prog_data)->max_stack_size;
4511 }
4512