1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "util/mesa-sha1.h"
31 #include "util/os_time.h"
32 #include "common/intel_l3_config.h"
33 #include "common/intel_disasm.h"
34 #include "common/intel_sample_positions.h"
35 #include "anv_private.h"
36 #include "compiler/brw_nir.h"
37 #include "compiler/brw_nir_rt.h"
38 #include "anv_nir.h"
39 #include "nir/nir_xfb_info.h"
40 #include "spirv/nir_spirv.h"
41 #include "vk_util.h"
42
43 /* Needed for SWIZZLE macros */
44 #include "program/prog_instruction.h"
45
46 // Shader functions
47 #define SPIR_V_MAGIC_NUMBER 0x07230203
48
49 struct anv_spirv_debug_data {
50 struct anv_device *device;
51 const struct vk_shader_module *module;
52 };
53
anv_spirv_nir_debug(void * private_data,enum nir_spirv_debug_level level,size_t spirv_offset,const char * message)54 static void anv_spirv_nir_debug(void *private_data,
55 enum nir_spirv_debug_level level,
56 size_t spirv_offset,
57 const char *message)
58 {
59 struct anv_spirv_debug_data *debug_data = private_data;
60
61 switch (level) {
62 case NIR_SPIRV_DEBUG_LEVEL_INFO:
63 vk_logi(VK_LOG_OBJS(&debug_data->module->base),
64 "SPIR-V offset %lu: %s",
65 (unsigned long) spirv_offset, message);
66 break;
67 case NIR_SPIRV_DEBUG_LEVEL_WARNING:
68 vk_logw(VK_LOG_OBJS(&debug_data->module->base),
69 "SPIR-V offset %lu: %s",
70 (unsigned long) spirv_offset, message);
71 break;
72 case NIR_SPIRV_DEBUG_LEVEL_ERROR:
73 vk_loge(VK_LOG_OBJS(&debug_data->module->base),
74 "SPIR-V offset %lu: %s",
75 (unsigned long) spirv_offset, message);
76 break;
77 default:
78 break;
79 }
80 }
81
82 /* Eventually, this will become part of anv_CreateShader. Unfortunately,
83 * we can't do that yet because we don't have the ability to copy nir.
84 */
85 static nir_shader *
anv_shader_compile_to_nir(struct anv_device * device,void * mem_ctx,const struct vk_shader_module * module,const char * entrypoint_name,gl_shader_stage stage,const VkSpecializationInfo * spec_info)86 anv_shader_compile_to_nir(struct anv_device *device,
87 void *mem_ctx,
88 const struct vk_shader_module *module,
89 const char *entrypoint_name,
90 gl_shader_stage stage,
91 const VkSpecializationInfo *spec_info)
92 {
93 const struct anv_physical_device *pdevice = device->physical;
94 const struct brw_compiler *compiler = pdevice->compiler;
95 const nir_shader_compiler_options *nir_options =
96 compiler->glsl_compiler_options[stage].NirOptions;
97
98 uint32_t *spirv = (uint32_t *) module->data;
99 assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
100 assert(module->size % 4 == 0);
101
102 uint32_t num_spec_entries = 0;
103 struct nir_spirv_specialization *spec_entries =
104 vk_spec_info_to_nir_spirv(spec_info, &num_spec_entries);
105
106 struct anv_spirv_debug_data spirv_debug_data = {
107 .device = device,
108 .module = module,
109 };
110 struct spirv_to_nir_options spirv_options = {
111 .caps = {
112 .demote_to_helper_invocation = true,
113 .derivative_group = true,
114 .descriptor_array_dynamic_indexing = true,
115 .descriptor_array_non_uniform_indexing = true,
116 .descriptor_indexing = true,
117 .device_group = true,
118 .draw_parameters = true,
119 .float16 = pdevice->info.ver >= 8,
120 .float32_atomic_add = pdevice->info.has_lsc,
121 .float32_atomic_min_max = pdevice->info.ver >= 9,
122 .float64 = pdevice->info.ver >= 8,
123 .float64_atomic_min_max = pdevice->info.has_lsc,
124 .fragment_shader_sample_interlock = pdevice->info.ver >= 9,
125 .fragment_shader_pixel_interlock = pdevice->info.ver >= 9,
126 .geometry_streams = true,
127 /* When KHR_format_feature_flags2 is enabled, the read/write without
128 * format is per format, so just report true. It's up to the
129 * application to check.
130 */
131 .image_read_without_format = device->vk.enabled_extensions.KHR_format_feature_flags2,
132 .image_write_without_format = true,
133 .int8 = pdevice->info.ver >= 8,
134 .int16 = pdevice->info.ver >= 8,
135 .int64 = pdevice->info.ver >= 8,
136 .int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin,
137 .integer_functions2 = pdevice->info.ver >= 8,
138 .min_lod = true,
139 .multiview = true,
140 .physical_storage_buffer_address = pdevice->has_a64_buffer_access,
141 .post_depth_coverage = pdevice->info.ver >= 9,
142 .runtime_descriptor_array = true,
143 .float_controls = pdevice->info.ver >= 8,
144 .ray_tracing = pdevice->info.has_ray_tracing,
145 .shader_clock = true,
146 .shader_viewport_index_layer = true,
147 .stencil_export = pdevice->info.ver >= 9,
148 .storage_8bit = pdevice->info.ver >= 8,
149 .storage_16bit = pdevice->info.ver >= 8,
150 .subgroup_arithmetic = true,
151 .subgroup_basic = true,
152 .subgroup_ballot = true,
153 .subgroup_dispatch = true,
154 .subgroup_quad = true,
155 .subgroup_uniform_control_flow = true,
156 .subgroup_shuffle = true,
157 .subgroup_vote = true,
158 .tessellation = true,
159 .transform_feedback = pdevice->info.ver >= 8,
160 .variable_pointers = true,
161 .vk_memory_model = true,
162 .vk_memory_model_device_scope = true,
163 .workgroup_memory_explicit_layout = true,
164 .fragment_shading_rate = pdevice->info.ver >= 11,
165 },
166 .ubo_addr_format =
167 anv_nir_ubo_addr_format(pdevice, device->robust_buffer_access),
168 .ssbo_addr_format =
169 anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access),
170 .phys_ssbo_addr_format = nir_address_format_64bit_global,
171 .push_const_addr_format = nir_address_format_logical,
172
173 /* TODO: Consider changing this to an address format that has the NULL
174 * pointer equals to 0. That might be a better format to play nice
175 * with certain code / code generators.
176 */
177 .shared_addr_format = nir_address_format_32bit_offset,
178 .debug = {
179 .func = anv_spirv_nir_debug,
180 .private_data = &spirv_debug_data,
181 },
182 };
183
184
185 nir_shader *nir =
186 spirv_to_nir(spirv, module->size / 4,
187 spec_entries, num_spec_entries,
188 stage, entrypoint_name, &spirv_options, nir_options);
189 if (!nir) {
190 free(spec_entries);
191 return NULL;
192 }
193
194 assert(nir->info.stage == stage);
195 nir_validate_shader(nir, "after spirv_to_nir");
196 nir_validate_ssa_dominance(nir, "after spirv_to_nir");
197 ralloc_steal(mem_ctx, nir);
198
199 free(spec_entries);
200
201 const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
202 .point_coord = true,
203 };
204 NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
205
206 if (INTEL_DEBUG(intel_debug_flag_for_shader_stage(stage))) {
207 fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
208 gl_shader_stage_name(stage));
209 nir_print_shader(nir, stderr);
210 }
211
212 /* We have to lower away local constant initializers right before we
213 * inline functions. That way they get properly initialized at the top
214 * of the function and not at the top of its caller.
215 */
216 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
217 NIR_PASS_V(nir, nir_lower_returns);
218 NIR_PASS_V(nir, nir_inline_functions);
219 NIR_PASS_V(nir, nir_copy_prop);
220 NIR_PASS_V(nir, nir_opt_deref);
221
222 /* Pick off the single entrypoint that we want */
223 foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
224 if (!func->is_entrypoint)
225 exec_node_remove(&func->node);
226 }
227 assert(exec_list_length(&nir->functions) == 1);
228
229 /* Now that we've deleted all but the main function, we can go ahead and
230 * lower the rest of the constant initializers. We do this here so that
231 * nir_remove_dead_variables and split_per_member_structs below see the
232 * corresponding stores.
233 */
234 NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
235
236 /* Split member structs. We do this before lower_io_to_temporaries so that
237 * it doesn't lower system values to temporaries by accident.
238 */
239 NIR_PASS_V(nir, nir_split_var_copies);
240 NIR_PASS_V(nir, nir_split_per_member_structs);
241
242 NIR_PASS_V(nir, nir_remove_dead_variables,
243 nir_var_shader_in | nir_var_shader_out | nir_var_system_value |
244 nir_var_shader_call_data | nir_var_ray_hit_attrib,
245 NULL);
246
247 NIR_PASS_V(nir, nir_propagate_invariant, false);
248 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
249 nir_shader_get_entrypoint(nir), true, false);
250
251 NIR_PASS_V(nir, nir_lower_frexp);
252
253 /* Vulkan uses the separate-shader linking model */
254 nir->info.separate_shader = true;
255
256 brw_preprocess_nir(compiler, nir, NULL);
257
258 return nir;
259 }
260
261 VkResult
anv_pipeline_init(struct anv_pipeline * pipeline,struct anv_device * device,enum anv_pipeline_type type,VkPipelineCreateFlags flags,const VkAllocationCallbacks * pAllocator)262 anv_pipeline_init(struct anv_pipeline *pipeline,
263 struct anv_device *device,
264 enum anv_pipeline_type type,
265 VkPipelineCreateFlags flags,
266 const VkAllocationCallbacks *pAllocator)
267 {
268 VkResult result;
269
270 memset(pipeline, 0, sizeof(*pipeline));
271
272 vk_object_base_init(&device->vk, &pipeline->base,
273 VK_OBJECT_TYPE_PIPELINE);
274 pipeline->device = device;
275
276 /* It's the job of the child class to provide actual backing storage for
277 * the batch by setting batch.start, batch.next, and batch.end.
278 */
279 pipeline->batch.alloc = pAllocator ? pAllocator : &device->vk.alloc;
280 pipeline->batch.relocs = &pipeline->batch_relocs;
281 pipeline->batch.status = VK_SUCCESS;
282
283 result = anv_reloc_list_init(&pipeline->batch_relocs,
284 pipeline->batch.alloc);
285 if (result != VK_SUCCESS)
286 return result;
287
288 pipeline->mem_ctx = ralloc_context(NULL);
289
290 pipeline->type = type;
291 pipeline->flags = flags;
292
293 util_dynarray_init(&pipeline->executables, pipeline->mem_ctx);
294
295 return VK_SUCCESS;
296 }
297
298 void
anv_pipeline_finish(struct anv_pipeline * pipeline,struct anv_device * device,const VkAllocationCallbacks * pAllocator)299 anv_pipeline_finish(struct anv_pipeline *pipeline,
300 struct anv_device *device,
301 const VkAllocationCallbacks *pAllocator)
302 {
303 anv_reloc_list_finish(&pipeline->batch_relocs,
304 pAllocator ? pAllocator : &device->vk.alloc);
305 ralloc_free(pipeline->mem_ctx);
306 vk_object_base_finish(&pipeline->base);
307 }
308
anv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)309 void anv_DestroyPipeline(
310 VkDevice _device,
311 VkPipeline _pipeline,
312 const VkAllocationCallbacks* pAllocator)
313 {
314 ANV_FROM_HANDLE(anv_device, device, _device);
315 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
316
317 if (!pipeline)
318 return;
319
320 switch (pipeline->type) {
321 case ANV_PIPELINE_GRAPHICS: {
322 struct anv_graphics_pipeline *gfx_pipeline =
323 anv_pipeline_to_graphics(pipeline);
324
325 if (gfx_pipeline->blend_state.map)
326 anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->blend_state);
327 if (gfx_pipeline->cps_state.map)
328 anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->cps_state);
329
330 for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) {
331 if (gfx_pipeline->shaders[s])
332 anv_shader_bin_unref(device, gfx_pipeline->shaders[s]);
333 }
334 break;
335 }
336
337 case ANV_PIPELINE_COMPUTE: {
338 struct anv_compute_pipeline *compute_pipeline =
339 anv_pipeline_to_compute(pipeline);
340
341 if (compute_pipeline->cs)
342 anv_shader_bin_unref(device, compute_pipeline->cs);
343
344 break;
345 }
346
347 case ANV_PIPELINE_RAY_TRACING: {
348 struct anv_ray_tracing_pipeline *rt_pipeline =
349 anv_pipeline_to_ray_tracing(pipeline);
350
351 util_dynarray_foreach(&rt_pipeline->shaders,
352 struct anv_shader_bin *, shader) {
353 anv_shader_bin_unref(device, *shader);
354 }
355 break;
356 }
357
358 default:
359 unreachable("invalid pipeline type");
360 }
361
362 anv_pipeline_finish(pipeline, device, pAllocator);
363 vk_free2(&device->vk.alloc, pAllocator, pipeline);
364 }
365
366 static const uint32_t vk_to_intel_primitive_type[] = {
367 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
368 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
369 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
370 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
371 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
372 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
373 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
374 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
375 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
376 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
377 };
378
379 static void
populate_sampler_prog_key(const struct intel_device_info * devinfo,struct brw_sampler_prog_key_data * key)380 populate_sampler_prog_key(const struct intel_device_info *devinfo,
381 struct brw_sampler_prog_key_data *key)
382 {
383 /* Almost all multisampled textures are compressed. The only time when we
384 * don't compress a multisampled texture is for 16x MSAA with a surface
385 * width greater than 8k which is a bit of an edge case. Since the sampler
386 * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe
387 * to tell the compiler to always assume compression.
388 */
389 key->compressed_multisample_layout_mask = ~0;
390
391 /* SkyLake added support for 16x MSAA. With this came a new message for
392 * reading from a 16x MSAA surface with compression. The new message was
393 * needed because now the MCS data is 64 bits instead of 32 or lower as is
394 * the case for 8x, 4x, and 2x. The key->msaa_16 bit-field controls which
395 * message we use. Fortunately, the 16x message works for 8x, 4x, and 2x
396 * so we can just use it unconditionally. This may not be quite as
397 * efficient but it saves us from recompiling.
398 */
399 if (devinfo->ver >= 9)
400 key->msaa_16 = ~0;
401
402 /* XXX: Handle texture swizzle on HSW- */
403 for (int i = 0; i < MAX_SAMPLERS; i++) {
404 /* Assume color sampler, no swizzling. (Works for BDW+) */
405 key->swizzles[i] = SWIZZLE_XYZW;
406 }
407 }
408
409 static void
populate_base_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,struct brw_base_prog_key * key)410 populate_base_prog_key(const struct intel_device_info *devinfo,
411 enum brw_subgroup_size_type subgroup_size_type,
412 bool robust_buffer_acccess,
413 struct brw_base_prog_key *key)
414 {
415 key->subgroup_size_type = subgroup_size_type;
416 key->robust_buffer_access = robust_buffer_acccess;
417
418 populate_sampler_prog_key(devinfo, &key->tex);
419 }
420
421 static void
populate_vs_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,struct brw_vs_prog_key * key)422 populate_vs_prog_key(const struct intel_device_info *devinfo,
423 enum brw_subgroup_size_type subgroup_size_type,
424 bool robust_buffer_acccess,
425 struct brw_vs_prog_key *key)
426 {
427 memset(key, 0, sizeof(*key));
428
429 populate_base_prog_key(devinfo, subgroup_size_type,
430 robust_buffer_acccess, &key->base);
431
432 /* XXX: Handle vertex input work-arounds */
433
434 /* XXX: Handle sampler_prog_key */
435 }
436
437 static void
populate_tcs_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,unsigned input_vertices,struct brw_tcs_prog_key * key)438 populate_tcs_prog_key(const struct intel_device_info *devinfo,
439 enum brw_subgroup_size_type subgroup_size_type,
440 bool robust_buffer_acccess,
441 unsigned input_vertices,
442 struct brw_tcs_prog_key *key)
443 {
444 memset(key, 0, sizeof(*key));
445
446 populate_base_prog_key(devinfo, subgroup_size_type,
447 robust_buffer_acccess, &key->base);
448
449 key->input_vertices = input_vertices;
450 }
451
452 static void
populate_tes_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,struct brw_tes_prog_key * key)453 populate_tes_prog_key(const struct intel_device_info *devinfo,
454 enum brw_subgroup_size_type subgroup_size_type,
455 bool robust_buffer_acccess,
456 struct brw_tes_prog_key *key)
457 {
458 memset(key, 0, sizeof(*key));
459
460 populate_base_prog_key(devinfo, subgroup_size_type,
461 robust_buffer_acccess, &key->base);
462 }
463
464 static void
populate_gs_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,struct brw_gs_prog_key * key)465 populate_gs_prog_key(const struct intel_device_info *devinfo,
466 enum brw_subgroup_size_type subgroup_size_type,
467 bool robust_buffer_acccess,
468 struct brw_gs_prog_key *key)
469 {
470 memset(key, 0, sizeof(*key));
471
472 populate_base_prog_key(devinfo, subgroup_size_type,
473 robust_buffer_acccess, &key->base);
474 }
475
476 static bool
pipeline_has_coarse_pixel(const struct anv_graphics_pipeline * pipeline,const VkPipelineFragmentShadingRateStateCreateInfoKHR * fsr_info)477 pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline,
478 const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info)
479 {
480 if (pipeline->sample_shading_enable)
481 return false;
482
483 /* Not dynamic & not specified for the pipeline. */
484 if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 && !fsr_info)
485 return false;
486
487 /* Not dynamic & pipeline has a 1x1 fragment shading rate with no
488 * possibility for element of the pipeline to change the value.
489 */
490 if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 &&
491 fsr_info->fragmentSize.width <= 1 &&
492 fsr_info->fragmentSize.height <= 1 &&
493 fsr_info->combinerOps[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR &&
494 fsr_info->combinerOps[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR)
495 return false;
496
497 return true;
498 }
499
500 static bool
is_sample_shading(const VkPipelineMultisampleStateCreateInfo * ms_info)501 is_sample_shading(const VkPipelineMultisampleStateCreateInfo *ms_info)
502 {
503 return ms_info->sampleShadingEnable &&
504 (ms_info->minSampleShading * ms_info->rasterizationSamples) > 1;
505 }
506
507 static void
populate_wm_prog_key(const struct anv_graphics_pipeline * pipeline,VkPipelineShaderStageCreateFlags flags,bool robust_buffer_acccess,const struct anv_subpass * subpass,const VkPipelineMultisampleStateCreateInfo * ms_info,const VkPipelineFragmentShadingRateStateCreateInfoKHR * fsr_info,struct brw_wm_prog_key * key)508 populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
509 VkPipelineShaderStageCreateFlags flags,
510 bool robust_buffer_acccess,
511 const struct anv_subpass *subpass,
512 const VkPipelineMultisampleStateCreateInfo *ms_info,
513 const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info,
514 struct brw_wm_prog_key *key)
515 {
516 const struct anv_device *device = pipeline->base.device;
517 const struct intel_device_info *devinfo = &device->info;
518
519 memset(key, 0, sizeof(*key));
520
521 populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base);
522
523 /* We set this to 0 here and set to the actual value before we call
524 * brw_compile_fs.
525 */
526 key->input_slots_valid = 0;
527
528 /* Vulkan doesn't specify a default */
529 key->high_quality_derivatives = false;
530
531 /* XXX Vulkan doesn't appear to specify */
532 key->clamp_fragment_color = false;
533
534 key->ignore_sample_mask_out = false;
535
536 assert(subpass->color_count <= MAX_RTS);
537 for (uint32_t i = 0; i < subpass->color_count; i++) {
538 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
539 key->color_outputs_valid |= (1 << i);
540 }
541
542 key->nr_color_regions = subpass->color_count;
543
544 /* To reduce possible shader recompilations we would need to know if
545 * there is a SampleMask output variable to compute if we should emit
546 * code to workaround the issue that hardware disables alpha to coverage
547 * when there is SampleMask output.
548 */
549 key->alpha_to_coverage = ms_info && ms_info->alphaToCoverageEnable;
550
551 /* Vulkan doesn't support fixed-function alpha test */
552 key->alpha_test_replicate_alpha = false;
553
554 if (ms_info) {
555 key->persample_interp = is_sample_shading(ms_info);
556 key->multisample_fbo = ms_info->rasterizationSamples > 1;
557 key->frag_coord_adds_sample_pos = key->persample_interp;
558 }
559
560 key->coarse_pixel =
561 device->vk.enabled_extensions.KHR_fragment_shading_rate &&
562 pipeline_has_coarse_pixel(pipeline, fsr_info);
563 }
564
565 static void
populate_cs_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,struct brw_cs_prog_key * key)566 populate_cs_prog_key(const struct intel_device_info *devinfo,
567 enum brw_subgroup_size_type subgroup_size_type,
568 bool robust_buffer_acccess,
569 struct brw_cs_prog_key *key)
570 {
571 memset(key, 0, sizeof(*key));
572
573 populate_base_prog_key(devinfo, subgroup_size_type,
574 robust_buffer_acccess, &key->base);
575 }
576
577 static void
populate_bs_prog_key(const struct intel_device_info * devinfo,VkPipelineShaderStageCreateFlags flags,bool robust_buffer_access,struct brw_bs_prog_key * key)578 populate_bs_prog_key(const struct intel_device_info *devinfo,
579 VkPipelineShaderStageCreateFlags flags,
580 bool robust_buffer_access,
581 struct brw_bs_prog_key *key)
582 {
583 memset(key, 0, sizeof(*key));
584
585 populate_base_prog_key(devinfo, flags, robust_buffer_access, &key->base);
586 }
587
588 struct anv_pipeline_stage {
589 gl_shader_stage stage;
590
591 const struct vk_shader_module *module;
592 const char *entrypoint;
593 const VkSpecializationInfo *spec_info;
594
595 unsigned char shader_sha1[20];
596
597 union brw_any_prog_key key;
598
599 struct {
600 gl_shader_stage stage;
601 unsigned char sha1[20];
602 } cache_key;
603
604 nir_shader *nir;
605
606 struct anv_pipeline_binding surface_to_descriptor[256];
607 struct anv_pipeline_binding sampler_to_descriptor[256];
608 struct anv_pipeline_bind_map bind_map;
609
610 union brw_any_prog_data prog_data;
611
612 uint32_t num_stats;
613 struct brw_compile_stats stats[3];
614 char *disasm[3];
615
616 VkPipelineCreationFeedbackEXT feedback;
617
618 const unsigned *code;
619
620 struct anv_shader_bin *bin;
621 };
622
623 static void
anv_pipeline_hash_shader(const struct vk_shader_module * module,const char * entrypoint,gl_shader_stage stage,const VkSpecializationInfo * spec_info,unsigned char * sha1_out)624 anv_pipeline_hash_shader(const struct vk_shader_module *module,
625 const char *entrypoint,
626 gl_shader_stage stage,
627 const VkSpecializationInfo *spec_info,
628 unsigned char *sha1_out)
629 {
630 struct mesa_sha1 ctx;
631 _mesa_sha1_init(&ctx);
632
633 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
634 _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
635 _mesa_sha1_update(&ctx, &stage, sizeof(stage));
636 if (spec_info) {
637 _mesa_sha1_update(&ctx, spec_info->pMapEntries,
638 spec_info->mapEntryCount *
639 sizeof(*spec_info->pMapEntries));
640 _mesa_sha1_update(&ctx, spec_info->pData,
641 spec_info->dataSize);
642 }
643
644 _mesa_sha1_final(&ctx, sha1_out);
645 }
646
647 static void
anv_pipeline_hash_graphics(struct anv_graphics_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * stages,unsigned char * sha1_out)648 anv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline,
649 struct anv_pipeline_layout *layout,
650 struct anv_pipeline_stage *stages,
651 unsigned char *sha1_out)
652 {
653 struct mesa_sha1 ctx;
654 _mesa_sha1_init(&ctx);
655
656 _mesa_sha1_update(&ctx, &pipeline->subpass->view_mask,
657 sizeof(pipeline->subpass->view_mask));
658
659 if (layout)
660 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
661
662 const bool rba = pipeline->base.device->robust_buffer_access;
663 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
664
665 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
666 if (stages[s].entrypoint) {
667 _mesa_sha1_update(&ctx, stages[s].shader_sha1,
668 sizeof(stages[s].shader_sha1));
669 _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s));
670 }
671 }
672
673 _mesa_sha1_final(&ctx, sha1_out);
674 }
675
676 static void
anv_pipeline_hash_compute(struct anv_compute_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * stage,unsigned char * sha1_out)677 anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
678 struct anv_pipeline_layout *layout,
679 struct anv_pipeline_stage *stage,
680 unsigned char *sha1_out)
681 {
682 struct mesa_sha1 ctx;
683 _mesa_sha1_init(&ctx);
684
685 if (layout)
686 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
687
688 const bool rba = pipeline->base.device->robust_buffer_access;
689 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
690
691 _mesa_sha1_update(&ctx, stage->shader_sha1,
692 sizeof(stage->shader_sha1));
693 _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
694
695 _mesa_sha1_final(&ctx, sha1_out);
696 }
697
698 static void
anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * stage,unsigned char * sha1_out)699 anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline,
700 struct anv_pipeline_layout *layout,
701 struct anv_pipeline_stage *stage,
702 unsigned char *sha1_out)
703 {
704 struct mesa_sha1 ctx;
705 _mesa_sha1_init(&ctx);
706
707 if (layout != NULL)
708 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
709
710 const bool rba = pipeline->base.device->robust_buffer_access;
711 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
712
713 _mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1));
714 _mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key.bs));
715
716 _mesa_sha1_final(&ctx, sha1_out);
717 }
718
719 static void
anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * intersection,struct anv_pipeline_stage * any_hit,unsigned char * sha1_out)720 anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline,
721 struct anv_pipeline_layout *layout,
722 struct anv_pipeline_stage *intersection,
723 struct anv_pipeline_stage *any_hit,
724 unsigned char *sha1_out)
725 {
726 struct mesa_sha1 ctx;
727 _mesa_sha1_init(&ctx);
728
729 if (layout != NULL)
730 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
731
732 const bool rba = pipeline->base.device->robust_buffer_access;
733 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
734
735 _mesa_sha1_update(&ctx, intersection->shader_sha1, sizeof(intersection->shader_sha1));
736 _mesa_sha1_update(&ctx, &intersection->key, sizeof(intersection->key.bs));
737 _mesa_sha1_update(&ctx, any_hit->shader_sha1, sizeof(any_hit->shader_sha1));
738 _mesa_sha1_update(&ctx, &any_hit->key, sizeof(any_hit->key.bs));
739
740 _mesa_sha1_final(&ctx, sha1_out);
741 }
742
743 static nir_shader *
anv_pipeline_stage_get_nir(struct anv_pipeline * pipeline,struct anv_pipeline_cache * cache,void * mem_ctx,struct anv_pipeline_stage * stage)744 anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
745 struct anv_pipeline_cache *cache,
746 void *mem_ctx,
747 struct anv_pipeline_stage *stage)
748 {
749 const struct brw_compiler *compiler =
750 pipeline->device->physical->compiler;
751 const nir_shader_compiler_options *nir_options =
752 compiler->glsl_compiler_options[stage->stage].NirOptions;
753 nir_shader *nir;
754
755 nir = anv_device_search_for_nir(pipeline->device, cache,
756 nir_options,
757 stage->shader_sha1,
758 mem_ctx);
759 if (nir) {
760 assert(nir->info.stage == stage->stage);
761 return nir;
762 }
763
764 nir = anv_shader_compile_to_nir(pipeline->device,
765 mem_ctx,
766 stage->module,
767 stage->entrypoint,
768 stage->stage,
769 stage->spec_info);
770 if (nir) {
771 anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);
772 return nir;
773 }
774
775 return NULL;
776 }
777
778 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)779 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
780 {
781 assert(glsl_type_is_vector_or_scalar(type));
782
783 uint32_t comp_size = glsl_type_is_boolean(type)
784 ? 4 : glsl_get_bit_size(type) / 8;
785 unsigned length = glsl_get_vector_elements(type);
786 *size = comp_size * length,
787 *align = comp_size * (length == 3 ? 4 : length);
788 }
789
790 static void
anv_pipeline_lower_nir(struct anv_pipeline * pipeline,void * mem_ctx,struct anv_pipeline_stage * stage,struct anv_pipeline_layout * layout)791 anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
792 void *mem_ctx,
793 struct anv_pipeline_stage *stage,
794 struct anv_pipeline_layout *layout)
795 {
796 const struct anv_physical_device *pdevice = pipeline->device->physical;
797 const struct brw_compiler *compiler = pdevice->compiler;
798
799 struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
800 nir_shader *nir = stage->nir;
801
802 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
803 /* Check if sample shading is enabled in the shader and toggle
804 * it on for the pipeline independent if sampleShadingEnable is set.
805 */
806 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
807 if (nir->info.fs.uses_sample_shading)
808 anv_pipeline_to_graphics(pipeline)->sample_shading_enable = true;
809
810 NIR_PASS_V(nir, nir_lower_wpos_center,
811 anv_pipeline_to_graphics(pipeline)->sample_shading_enable);
812 NIR_PASS_V(nir, nir_lower_input_attachments,
813 &(nir_input_attachment_options) {
814 .use_fragcoord_sysval = true,
815 .use_layer_id_sysval = true,
816 });
817 }
818
819 NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
820
821 if (pipeline->type == ANV_PIPELINE_GRAPHICS) {
822 NIR_PASS_V(nir, anv_nir_lower_multiview,
823 anv_pipeline_to_graphics(pipeline));
824 }
825
826 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
827
828 NIR_PASS_V(nir, brw_nir_lower_storage_image, compiler->devinfo);
829
830 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global,
831 nir_address_format_64bit_global);
832 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
833 nir_address_format_32bit_offset);
834
835 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
836 anv_nir_apply_pipeline_layout(pdevice,
837 pipeline->device->robust_buffer_access,
838 layout, nir, &stage->bind_map);
839
840 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
841 anv_nir_ubo_addr_format(pdevice,
842 pipeline->device->robust_buffer_access));
843 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
844 anv_nir_ssbo_addr_format(pdevice,
845 pipeline->device->robust_buffer_access));
846
847 /* First run copy-prop to get rid of all of the vec() that address
848 * calculations often create and then constant-fold so that, when we
849 * get to anv_nir_lower_ubo_loads, we can detect constant offsets.
850 */
851 NIR_PASS_V(nir, nir_copy_prop);
852 NIR_PASS_V(nir, nir_opt_constant_folding);
853
854 NIR_PASS_V(nir, anv_nir_lower_ubo_loads);
855
856 /* We don't support non-uniform UBOs and non-uniform SSBO access is
857 * handled naturally by falling back to A64 messages.
858 */
859 NIR_PASS_V(nir, nir_lower_non_uniform_access,
860 &(nir_lower_non_uniform_access_options) {
861 .types = nir_lower_non_uniform_texture_access |
862 nir_lower_non_uniform_image_access,
863 .callback = NULL,
864 });
865
866 anv_nir_compute_push_layout(pdevice, pipeline->device->robust_buffer_access,
867 nir, prog_data, &stage->bind_map, mem_ctx);
868
869 if (gl_shader_stage_uses_workgroup(nir->info.stage)) {
870 if (!nir->info.shared_memory_explicit_layout) {
871 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
872 nir_var_mem_shared, shared_type_info);
873 }
874
875 NIR_PASS_V(nir, nir_lower_explicit_io,
876 nir_var_mem_shared, nir_address_format_32bit_offset);
877
878 if (nir->info.zero_initialize_shared_memory &&
879 nir->info.shared_size > 0) {
880 /* The effective Shared Local Memory size is at least 1024 bytes and
881 * is always rounded to a power of two, so it is OK to align the size
882 * used by the shader to chunk_size -- which does simplify the logic.
883 */
884 const unsigned chunk_size = 16;
885 const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
886 assert(shared_size <=
887 intel_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size));
888
889 NIR_PASS_V(nir, nir_zero_initialize_shared_memory,
890 shared_size, chunk_size);
891 }
892 }
893
894 stage->nir = nir;
895 }
896
897 static void
anv_pipeline_link_vs(const struct brw_compiler * compiler,struct anv_pipeline_stage * vs_stage,struct anv_pipeline_stage * next_stage)898 anv_pipeline_link_vs(const struct brw_compiler *compiler,
899 struct anv_pipeline_stage *vs_stage,
900 struct anv_pipeline_stage *next_stage)
901 {
902 if (next_stage)
903 brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir);
904 }
905
906 static void
anv_pipeline_compile_vs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_graphics_pipeline * pipeline,struct anv_pipeline_stage * vs_stage)907 anv_pipeline_compile_vs(const struct brw_compiler *compiler,
908 void *mem_ctx,
909 struct anv_graphics_pipeline *pipeline,
910 struct anv_pipeline_stage *vs_stage)
911 {
912 /* When using Primitive Replication for multiview, each view gets its own
913 * position slot.
914 */
915 uint32_t pos_slots = pipeline->use_primitive_replication ?
916 anv_subpass_view_count(pipeline->subpass) : 1;
917
918 brw_compute_vue_map(compiler->devinfo,
919 &vs_stage->prog_data.vs.base.vue_map,
920 vs_stage->nir->info.outputs_written,
921 vs_stage->nir->info.separate_shader,
922 pos_slots);
923
924 vs_stage->num_stats = 1;
925
926 struct brw_compile_vs_params params = {
927 .nir = vs_stage->nir,
928 .key = &vs_stage->key.vs,
929 .prog_data = &vs_stage->prog_data.vs,
930 .stats = vs_stage->stats,
931 .log_data = pipeline->base.device,
932 };
933
934 vs_stage->code = brw_compile_vs(compiler, mem_ctx, ¶ms);
935 }
936
937 static void
merge_tess_info(struct shader_info * tes_info,const struct shader_info * tcs_info)938 merge_tess_info(struct shader_info *tes_info,
939 const struct shader_info *tcs_info)
940 {
941 /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
942 *
943 * "PointMode. Controls generation of points rather than triangles
944 * or lines. This functionality defaults to disabled, and is
945 * enabled if either shader stage includes the execution mode.
946 *
947 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
948 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
949 * and OutputVertices, it says:
950 *
951 * "One mode must be set in at least one of the tessellation
952 * shader stages."
953 *
954 * So, the fields can be set in either the TCS or TES, but they must
955 * agree if set in both. Our backend looks at TES, so bitwise-or in
956 * the values from the TCS.
957 */
958 assert(tcs_info->tess.tcs_vertices_out == 0 ||
959 tes_info->tess.tcs_vertices_out == 0 ||
960 tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
961 tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
962
963 assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
964 tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
965 tcs_info->tess.spacing == tes_info->tess.spacing);
966 tes_info->tess.spacing |= tcs_info->tess.spacing;
967
968 assert(tcs_info->tess.primitive_mode == 0 ||
969 tes_info->tess.primitive_mode == 0 ||
970 tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
971 tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
972 tes_info->tess.ccw |= tcs_info->tess.ccw;
973 tes_info->tess.point_mode |= tcs_info->tess.point_mode;
974 }
975
976 static void
anv_pipeline_link_tcs(const struct brw_compiler * compiler,struct anv_pipeline_stage * tcs_stage,struct anv_pipeline_stage * tes_stage)977 anv_pipeline_link_tcs(const struct brw_compiler *compiler,
978 struct anv_pipeline_stage *tcs_stage,
979 struct anv_pipeline_stage *tes_stage)
980 {
981 assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
982
983 brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
984
985 nir_lower_patch_vertices(tes_stage->nir,
986 tcs_stage->nir->info.tess.tcs_vertices_out,
987 NULL);
988
989 /* Copy TCS info into the TES info */
990 merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
991
992 /* Whacking the key after cache lookup is a bit sketchy, but all of
993 * this comes from the SPIR-V, which is part of the hash used for the
994 * pipeline cache. So it should be safe.
995 */
996 tcs_stage->key.tcs.tes_primitive_mode =
997 tes_stage->nir->info.tess.primitive_mode;
998 tcs_stage->key.tcs.quads_workaround =
999 compiler->devinfo->ver < 9 &&
1000 tes_stage->nir->info.tess.primitive_mode == 7 /* GL_QUADS */ &&
1001 tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;
1002 }
1003
1004 static void
anv_pipeline_compile_tcs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * tcs_stage,struct anv_pipeline_stage * prev_stage)1005 anv_pipeline_compile_tcs(const struct brw_compiler *compiler,
1006 void *mem_ctx,
1007 struct anv_device *device,
1008 struct anv_pipeline_stage *tcs_stage,
1009 struct anv_pipeline_stage *prev_stage)
1010 {
1011 tcs_stage->key.tcs.outputs_written =
1012 tcs_stage->nir->info.outputs_written;
1013 tcs_stage->key.tcs.patch_outputs_written =
1014 tcs_stage->nir->info.patch_outputs_written;
1015
1016 tcs_stage->num_stats = 1;
1017 tcs_stage->code = brw_compile_tcs(compiler, device, mem_ctx,
1018 &tcs_stage->key.tcs,
1019 &tcs_stage->prog_data.tcs,
1020 tcs_stage->nir, -1,
1021 tcs_stage->stats, NULL);
1022 }
1023
1024 static void
anv_pipeline_link_tes(const struct brw_compiler * compiler,struct anv_pipeline_stage * tes_stage,struct anv_pipeline_stage * next_stage)1025 anv_pipeline_link_tes(const struct brw_compiler *compiler,
1026 struct anv_pipeline_stage *tes_stage,
1027 struct anv_pipeline_stage *next_stage)
1028 {
1029 if (next_stage)
1030 brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir);
1031 }
1032
1033 static void
anv_pipeline_compile_tes(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * tes_stage,struct anv_pipeline_stage * tcs_stage)1034 anv_pipeline_compile_tes(const struct brw_compiler *compiler,
1035 void *mem_ctx,
1036 struct anv_device *device,
1037 struct anv_pipeline_stage *tes_stage,
1038 struct anv_pipeline_stage *tcs_stage)
1039 {
1040 tes_stage->key.tes.inputs_read =
1041 tcs_stage->nir->info.outputs_written;
1042 tes_stage->key.tes.patch_inputs_read =
1043 tcs_stage->nir->info.patch_outputs_written;
1044
1045 tes_stage->num_stats = 1;
1046 tes_stage->code = brw_compile_tes(compiler, device, mem_ctx,
1047 &tes_stage->key.tes,
1048 &tcs_stage->prog_data.tcs.base.vue_map,
1049 &tes_stage->prog_data.tes,
1050 tes_stage->nir, -1,
1051 tes_stage->stats, NULL);
1052 }
1053
1054 static void
anv_pipeline_link_gs(const struct brw_compiler * compiler,struct anv_pipeline_stage * gs_stage,struct anv_pipeline_stage * next_stage)1055 anv_pipeline_link_gs(const struct brw_compiler *compiler,
1056 struct anv_pipeline_stage *gs_stage,
1057 struct anv_pipeline_stage *next_stage)
1058 {
1059 if (next_stage)
1060 brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir);
1061 }
1062
1063 static void
anv_pipeline_compile_gs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * gs_stage,struct anv_pipeline_stage * prev_stage)1064 anv_pipeline_compile_gs(const struct brw_compiler *compiler,
1065 void *mem_ctx,
1066 struct anv_device *device,
1067 struct anv_pipeline_stage *gs_stage,
1068 struct anv_pipeline_stage *prev_stage)
1069 {
1070 brw_compute_vue_map(compiler->devinfo,
1071 &gs_stage->prog_data.gs.base.vue_map,
1072 gs_stage->nir->info.outputs_written,
1073 gs_stage->nir->info.separate_shader, 1);
1074
1075 gs_stage->num_stats = 1;
1076 gs_stage->code = brw_compile_gs(compiler, device, mem_ctx,
1077 &gs_stage->key.gs,
1078 &gs_stage->prog_data.gs,
1079 gs_stage->nir, -1,
1080 gs_stage->stats, NULL);
1081 }
1082
1083 static void
anv_pipeline_link_fs(const struct brw_compiler * compiler,struct anv_pipeline_stage * stage)1084 anv_pipeline_link_fs(const struct brw_compiler *compiler,
1085 struct anv_pipeline_stage *stage)
1086 {
1087 unsigned num_rt_bindings;
1088 struct anv_pipeline_binding rt_bindings[MAX_RTS];
1089 if (stage->key.wm.nr_color_regions > 0) {
1090 assert(stage->key.wm.nr_color_regions <= MAX_RTS);
1091 for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) {
1092 if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) {
1093 rt_bindings[rt] = (struct anv_pipeline_binding) {
1094 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1095 .index = rt,
1096 };
1097 } else {
1098 /* Setup a null render target */
1099 rt_bindings[rt] = (struct anv_pipeline_binding) {
1100 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1101 .index = UINT32_MAX,
1102 };
1103 }
1104 }
1105 num_rt_bindings = stage->key.wm.nr_color_regions;
1106 } else {
1107 /* Setup a null render target */
1108 rt_bindings[0] = (struct anv_pipeline_binding) {
1109 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1110 .index = UINT32_MAX,
1111 };
1112 num_rt_bindings = 1;
1113 }
1114
1115 assert(num_rt_bindings <= MAX_RTS);
1116 assert(stage->bind_map.surface_count == 0);
1117 typed_memcpy(stage->bind_map.surface_to_descriptor,
1118 rt_bindings, num_rt_bindings);
1119 stage->bind_map.surface_count += num_rt_bindings;
1120
1121 /* Now that we've set up the color attachments, we can go through and
1122 * eliminate any shader outputs that map to VK_ATTACHMENT_UNUSED in the
1123 * hopes that dead code can clean them up in this and any earlier shader
1124 * stages.
1125 */
1126 nir_function_impl *impl = nir_shader_get_entrypoint(stage->nir);
1127 bool deleted_output = false;
1128 nir_foreach_shader_out_variable_safe(var, stage->nir) {
1129 /* TODO: We don't delete depth/stencil writes. We probably could if the
1130 * subpass doesn't have a depth/stencil attachment.
1131 */
1132 if (var->data.location < FRAG_RESULT_DATA0)
1133 continue;
1134
1135 const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
1136
1137 /* If this is the RT at location 0 and we have alpha to coverage
1138 * enabled we still need that write because it will affect the coverage
1139 * mask even if it's never written to a color target.
1140 */
1141 if (rt == 0 && stage->key.wm.alpha_to_coverage)
1142 continue;
1143
1144 const unsigned array_len =
1145 glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
1146 assert(rt + array_len <= MAX_RTS);
1147
1148 if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid &
1149 BITFIELD_RANGE(rt, array_len))) {
1150 deleted_output = true;
1151 var->data.mode = nir_var_function_temp;
1152 exec_node_remove(&var->node);
1153 exec_list_push_tail(&impl->locals, &var->node);
1154 }
1155 }
1156
1157 if (deleted_output)
1158 nir_fixup_deref_modes(stage->nir);
1159
1160 /* We stored the number of subpass color attachments in nr_color_regions
1161 * when calculating the key for caching. Now that we've computed the bind
1162 * map, we can reduce this to the actual max before we go into the back-end
1163 * compiler.
1164 */
1165 stage->key.wm.nr_color_regions =
1166 util_last_bit(stage->key.wm.color_outputs_valid);
1167 }
1168
1169 static void
anv_pipeline_compile_fs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * fs_stage,struct anv_pipeline_stage * prev_stage)1170 anv_pipeline_compile_fs(const struct brw_compiler *compiler,
1171 void *mem_ctx,
1172 struct anv_device *device,
1173 struct anv_pipeline_stage *fs_stage,
1174 struct anv_pipeline_stage *prev_stage)
1175 {
1176 /* TODO: we could set this to 0 based on the information in nir_shader, but
1177 * we need this before we call spirv_to_nir.
1178 */
1179 assert(prev_stage);
1180 fs_stage->key.wm.input_slots_valid =
1181 prev_stage->prog_data.vue.vue_map.slots_valid;
1182
1183 struct brw_compile_fs_params params = {
1184 .nir = fs_stage->nir,
1185 .key = &fs_stage->key.wm,
1186 .prog_data = &fs_stage->prog_data.wm,
1187
1188 .allow_spilling = true,
1189 .stats = fs_stage->stats,
1190 .log_data = device,
1191 };
1192
1193 fs_stage->code = brw_compile_fs(compiler, mem_ctx, ¶ms);
1194
1195 fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 +
1196 (uint32_t)fs_stage->prog_data.wm.dispatch_16 +
1197 (uint32_t)fs_stage->prog_data.wm.dispatch_32;
1198
1199 if (fs_stage->key.wm.color_outputs_valid == 0 &&
1200 !fs_stage->prog_data.wm.has_side_effects &&
1201 !fs_stage->prog_data.wm.uses_omask &&
1202 !fs_stage->key.wm.alpha_to_coverage &&
1203 !fs_stage->prog_data.wm.uses_kill &&
1204 fs_stage->prog_data.wm.computed_depth_mode == BRW_PSCDEPTH_OFF &&
1205 !fs_stage->prog_data.wm.computed_stencil) {
1206 /* This fragment shader has no outputs and no side effects. Go ahead
1207 * and return the code pointer so we don't accidentally think the
1208 * compile failed but zero out prog_data which will set program_size to
1209 * zero and disable the stage.
1210 */
1211 memset(&fs_stage->prog_data, 0, sizeof(fs_stage->prog_data));
1212 }
1213 }
1214
1215 static void
anv_pipeline_add_executable(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage,struct brw_compile_stats * stats,uint32_t code_offset)1216 anv_pipeline_add_executable(struct anv_pipeline *pipeline,
1217 struct anv_pipeline_stage *stage,
1218 struct brw_compile_stats *stats,
1219 uint32_t code_offset)
1220 {
1221 char *nir = NULL;
1222 if (stage->nir &&
1223 (pipeline->flags &
1224 VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1225 nir = nir_shader_as_str(stage->nir, pipeline->mem_ctx);
1226 }
1227
1228 char *disasm = NULL;
1229 if (stage->code &&
1230 (pipeline->flags &
1231 VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1232 char *stream_data = NULL;
1233 size_t stream_size = 0;
1234 FILE *stream = open_memstream(&stream_data, &stream_size);
1235
1236 uint32_t push_size = 0;
1237 for (unsigned i = 0; i < 4; i++)
1238 push_size += stage->bind_map.push_ranges[i].length;
1239 if (push_size > 0) {
1240 fprintf(stream, "Push constant ranges:\n");
1241 for (unsigned i = 0; i < 4; i++) {
1242 if (stage->bind_map.push_ranges[i].length == 0)
1243 continue;
1244
1245 fprintf(stream, " RANGE%d (%dB): ", i,
1246 stage->bind_map.push_ranges[i].length * 32);
1247
1248 switch (stage->bind_map.push_ranges[i].set) {
1249 case ANV_DESCRIPTOR_SET_NULL:
1250 fprintf(stream, "NULL");
1251 break;
1252
1253 case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
1254 fprintf(stream, "Vulkan push constants and API params");
1255 break;
1256
1257 case ANV_DESCRIPTOR_SET_DESCRIPTORS:
1258 fprintf(stream, "Descriptor buffer for set %d (start=%dB)",
1259 stage->bind_map.push_ranges[i].index,
1260 stage->bind_map.push_ranges[i].start * 32);
1261 break;
1262
1263 case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS:
1264 unreachable("gl_NumWorkgroups is never pushed");
1265
1266 case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS:
1267 fprintf(stream, "Inline shader constant data (start=%dB)",
1268 stage->bind_map.push_ranges[i].start * 32);
1269 break;
1270
1271 case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
1272 unreachable("Color attachments can't be pushed");
1273
1274 default:
1275 fprintf(stream, "UBO (set=%d binding=%d start=%dB)",
1276 stage->bind_map.push_ranges[i].set,
1277 stage->bind_map.push_ranges[i].index,
1278 stage->bind_map.push_ranges[i].start * 32);
1279 break;
1280 }
1281 fprintf(stream, "\n");
1282 }
1283 fprintf(stream, "\n");
1284 }
1285
1286 /* Creating this is far cheaper than it looks. It's perfectly fine to
1287 * do it for every binary.
1288 */
1289 intel_disassemble(&pipeline->device->info,
1290 stage->code, code_offset, stream);
1291
1292 fclose(stream);
1293
1294 /* Copy it to a ralloc'd thing */
1295 disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1);
1296 memcpy(disasm, stream_data, stream_size);
1297 disasm[stream_size] = 0;
1298
1299 free(stream_data);
1300 }
1301
1302 const struct anv_pipeline_executable exe = {
1303 .stage = stage->stage,
1304 .stats = *stats,
1305 .nir = nir,
1306 .disasm = disasm,
1307 };
1308 util_dynarray_append(&pipeline->executables,
1309 struct anv_pipeline_executable, exe);
1310 }
1311
1312 static void
anv_pipeline_add_executables(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage,struct anv_shader_bin * bin)1313 anv_pipeline_add_executables(struct anv_pipeline *pipeline,
1314 struct anv_pipeline_stage *stage,
1315 struct anv_shader_bin *bin)
1316 {
1317 if (stage->stage == MESA_SHADER_FRAGMENT) {
1318 /* We pull the prog data and stats out of the anv_shader_bin because
1319 * the anv_pipeline_stage may not be fully populated if we successfully
1320 * looked up the shader in a cache.
1321 */
1322 const struct brw_wm_prog_data *wm_prog_data =
1323 (const struct brw_wm_prog_data *)bin->prog_data;
1324 struct brw_compile_stats *stats = bin->stats;
1325
1326 if (wm_prog_data->dispatch_8) {
1327 anv_pipeline_add_executable(pipeline, stage, stats++, 0);
1328 }
1329
1330 if (wm_prog_data->dispatch_16) {
1331 anv_pipeline_add_executable(pipeline, stage, stats++,
1332 wm_prog_data->prog_offset_16);
1333 }
1334
1335 if (wm_prog_data->dispatch_32) {
1336 anv_pipeline_add_executable(pipeline, stage, stats++,
1337 wm_prog_data->prog_offset_32);
1338 }
1339 } else {
1340 anv_pipeline_add_executable(pipeline, stage, bin->stats, 0);
1341 }
1342 }
1343
1344 static enum brw_subgroup_size_type
anv_subgroup_size_type(gl_shader_stage stage,VkPipelineShaderStageCreateFlags flags,const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT * rss_info)1345 anv_subgroup_size_type(gl_shader_stage stage,
1346 VkPipelineShaderStageCreateFlags flags,
1347 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info)
1348 {
1349 enum brw_subgroup_size_type subgroup_size_type;
1350
1351 if (rss_info) {
1352 assert(stage == MESA_SHADER_COMPUTE);
1353 /* These enum values are expressly chosen to be equal to the subgroup
1354 * size that they require.
1355 */
1356 assert(rss_info->requiredSubgroupSize == 8 ||
1357 rss_info->requiredSubgroupSize == 16 ||
1358 rss_info->requiredSubgroupSize == 32);
1359 subgroup_size_type = rss_info->requiredSubgroupSize;
1360 } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) {
1361 subgroup_size_type = BRW_SUBGROUP_SIZE_VARYING;
1362 } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) {
1363 assert(stage == MESA_SHADER_COMPUTE);
1364 /* If the client expressly requests full subgroups and they don't
1365 * specify a subgroup size neither allow varying subgroups, we need to
1366 * pick one. So we specify the API value of 32. Performance will
1367 * likely be terrible in this case but there's nothing we can do about
1368 * that. The client should have chosen a size.
1369 */
1370 subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_32;
1371 } else {
1372 subgroup_size_type = BRW_SUBGROUP_SIZE_API_CONSTANT;
1373 }
1374
1375 return subgroup_size_type;
1376 }
1377
1378 static void
anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline * pipeline)1379 anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline)
1380 {
1381 /* TODO: Cache this pipeline-wide information. */
1382
1383 if (anv_pipeline_is_primitive(pipeline)) {
1384 /* Primitive replication depends on information from all the shaders.
1385 * Recover this bit from the fact that we have more than one position slot
1386 * in the vertex shader when using it.
1387 */
1388 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1389 int pos_slots = 0;
1390 const struct brw_vue_prog_data *vue_prog_data =
1391 (const void *) pipeline->shaders[MESA_SHADER_VERTEX]->prog_data;
1392 const struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1393 for (int i = 0; i < vue_map->num_slots; i++) {
1394 if (vue_map->slot_to_varying[i] == VARYING_SLOT_POS)
1395 pos_slots++;
1396 }
1397 pipeline->use_primitive_replication = pos_slots > 1;
1398 }
1399 }
1400
1401 static VkResult
anv_pipeline_compile_graphics(struct anv_graphics_pipeline * pipeline,struct anv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * info)1402 anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
1403 struct anv_pipeline_cache *cache,
1404 const VkGraphicsPipelineCreateInfo *info)
1405 {
1406 VkPipelineCreationFeedbackEXT pipeline_feedback = {
1407 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1408 };
1409 int64_t pipeline_start = os_time_get_nano();
1410
1411 const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
1412 struct anv_pipeline_stage stages[MESA_SHADER_STAGES] = {};
1413
1414 /* Information on which states are considered dynamic. */
1415 const VkPipelineDynamicStateCreateInfo *dyn_info =
1416 info->pDynamicState;
1417 uint32_t dynamic_states = 0;
1418 if (dyn_info) {
1419 for (unsigned i = 0; i < dyn_info->dynamicStateCount; i++)
1420 dynamic_states |=
1421 anv_cmd_dirty_bit_for_vk_dynamic_state(dyn_info->pDynamicStates[i]);
1422 }
1423
1424 VkResult result;
1425 for (uint32_t i = 0; i < info->stageCount; i++) {
1426 const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
1427 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
1428
1429 int64_t stage_start = os_time_get_nano();
1430
1431 stages[stage].stage = stage;
1432 stages[stage].module = vk_shader_module_from_handle(sinfo->module);
1433 stages[stage].entrypoint = sinfo->pName;
1434 stages[stage].spec_info = sinfo->pSpecializationInfo;
1435 anv_pipeline_hash_shader(stages[stage].module,
1436 stages[stage].entrypoint,
1437 stage,
1438 stages[stage].spec_info,
1439 stages[stage].shader_sha1);
1440
1441 enum brw_subgroup_size_type subgroup_size_type =
1442 anv_subgroup_size_type(stage, sinfo->flags, NULL);
1443
1444 const struct intel_device_info *devinfo = &pipeline->base.device->info;
1445 switch (stage) {
1446 case MESA_SHADER_VERTEX:
1447 populate_vs_prog_key(devinfo, subgroup_size_type,
1448 pipeline->base.device->robust_buffer_access,
1449 &stages[stage].key.vs);
1450 break;
1451 case MESA_SHADER_TESS_CTRL:
1452 populate_tcs_prog_key(devinfo, subgroup_size_type,
1453 pipeline->base.device->robust_buffer_access,
1454 info->pTessellationState->patchControlPoints,
1455 &stages[stage].key.tcs);
1456 break;
1457 case MESA_SHADER_TESS_EVAL:
1458 populate_tes_prog_key(devinfo, subgroup_size_type,
1459 pipeline->base.device->robust_buffer_access,
1460 &stages[stage].key.tes);
1461 break;
1462 case MESA_SHADER_GEOMETRY:
1463 populate_gs_prog_key(devinfo, subgroup_size_type,
1464 pipeline->base.device->robust_buffer_access,
1465 &stages[stage].key.gs);
1466 break;
1467 case MESA_SHADER_FRAGMENT: {
1468 const bool raster_enabled =
1469 !info->pRasterizationState->rasterizerDiscardEnable ||
1470 dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
1471 populate_wm_prog_key(pipeline, subgroup_size_type,
1472 pipeline->base.device->robust_buffer_access,
1473 pipeline->subpass,
1474 raster_enabled ? info->pMultisampleState : NULL,
1475 vk_find_struct_const(info->pNext,
1476 PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR),
1477 &stages[stage].key.wm);
1478 break;
1479 }
1480 default:
1481 unreachable("Invalid graphics shader stage");
1482 }
1483
1484 stages[stage].feedback.duration += os_time_get_nano() - stage_start;
1485 stages[stage].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
1486 }
1487
1488 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1489
1490 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1491
1492 unsigned char sha1[20];
1493 anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
1494
1495 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1496 if (!stages[s].entrypoint)
1497 continue;
1498
1499 stages[s].cache_key.stage = s;
1500 memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
1501 }
1502
1503 const bool skip_cache_lookup =
1504 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1505
1506 if (!skip_cache_lookup) {
1507 unsigned found = 0;
1508 unsigned cache_hits = 0;
1509 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1510 if (!stages[s].entrypoint)
1511 continue;
1512
1513 int64_t stage_start = os_time_get_nano();
1514
1515 bool cache_hit;
1516 struct anv_shader_bin *bin =
1517 anv_device_search_for_kernel(pipeline->base.device, cache,
1518 &stages[s].cache_key,
1519 sizeof(stages[s].cache_key), &cache_hit);
1520 if (bin) {
1521 found++;
1522 pipeline->shaders[s] = bin;
1523 }
1524
1525 if (cache_hit) {
1526 cache_hits++;
1527 stages[s].feedback.flags |=
1528 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1529 }
1530 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1531 }
1532
1533 if (found == __builtin_popcount(pipeline->active_stages)) {
1534 if (cache_hits == found) {
1535 pipeline_feedback.flags |=
1536 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1537 }
1538 /* We found all our shaders in the cache. We're done. */
1539 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1540 if (!stages[s].entrypoint)
1541 continue;
1542
1543 anv_pipeline_add_executables(&pipeline->base, &stages[s],
1544 pipeline->shaders[s]);
1545 }
1546 anv_pipeline_init_from_cached_graphics(pipeline);
1547 goto done;
1548 } else if (found > 0) {
1549 /* We found some but not all of our shaders. This shouldn't happen
1550 * most of the time but it can if we have a partially populated
1551 * pipeline cache.
1552 */
1553 assert(found < __builtin_popcount(pipeline->active_stages));
1554
1555 vk_perf(VK_LOG_OBJS(&cache->base),
1556 "Found a partial pipeline in the cache. This is "
1557 "most likely caused by an incomplete pipeline cache "
1558 "import or export");
1559
1560 /* We're going to have to recompile anyway, so just throw away our
1561 * references to the shaders in the cache. We'll get them out of the
1562 * cache again as part of the compilation process.
1563 */
1564 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1565 stages[s].feedback.flags = 0;
1566 if (pipeline->shaders[s]) {
1567 anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1568 pipeline->shaders[s] = NULL;
1569 }
1570 }
1571 }
1572 }
1573
1574 if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
1575 return VK_PIPELINE_COMPILE_REQUIRED_EXT;
1576
1577 void *pipeline_ctx = ralloc_context(NULL);
1578
1579 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1580 if (!stages[s].entrypoint)
1581 continue;
1582
1583 int64_t stage_start = os_time_get_nano();
1584
1585 assert(stages[s].stage == s);
1586 assert(pipeline->shaders[s] == NULL);
1587
1588 stages[s].bind_map = (struct anv_pipeline_bind_map) {
1589 .surface_to_descriptor = stages[s].surface_to_descriptor,
1590 .sampler_to_descriptor = stages[s].sampler_to_descriptor
1591 };
1592
1593 stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
1594 pipeline_ctx,
1595 &stages[s]);
1596 if (stages[s].nir == NULL) {
1597 result = vk_error(pipeline, VK_ERROR_UNKNOWN);
1598 goto fail;
1599 }
1600
1601 /* This is rather ugly.
1602 *
1603 * Any variable annotated as interpolated by sample essentially disables
1604 * coarse pixel shading. Unfortunately the CTS tests exercising this set
1605 * the varying value in the previous stage using a constant. Our NIR
1606 * infrastructure is clever enough to lookup variables across stages and
1607 * constant fold, removing the variable. So in order to comply with CTS
1608 * we have check variables here.
1609 */
1610 if (s == MESA_SHADER_FRAGMENT) {
1611 nir_foreach_variable_in_list(var, &stages[s].nir->variables) {
1612 if (var->data.sample) {
1613 stages[s].key.wm.coarse_pixel = false;
1614 break;
1615 }
1616 }
1617 }
1618
1619 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1620 }
1621
1622 /* Walk backwards to link */
1623 struct anv_pipeline_stage *next_stage = NULL;
1624 for (int s = ARRAY_SIZE(pipeline->shaders) - 1; s >= 0; s--) {
1625 if (!stages[s].entrypoint)
1626 continue;
1627
1628 switch (s) {
1629 case MESA_SHADER_VERTEX:
1630 anv_pipeline_link_vs(compiler, &stages[s], next_stage);
1631 break;
1632 case MESA_SHADER_TESS_CTRL:
1633 anv_pipeline_link_tcs(compiler, &stages[s], next_stage);
1634 break;
1635 case MESA_SHADER_TESS_EVAL:
1636 anv_pipeline_link_tes(compiler, &stages[s], next_stage);
1637 break;
1638 case MESA_SHADER_GEOMETRY:
1639 anv_pipeline_link_gs(compiler, &stages[s], next_stage);
1640 break;
1641 case MESA_SHADER_FRAGMENT:
1642 anv_pipeline_link_fs(compiler, &stages[s]);
1643 break;
1644 default:
1645 unreachable("Invalid graphics shader stage");
1646 }
1647
1648 next_stage = &stages[s];
1649 }
1650
1651 if (pipeline->base.device->info.ver >= 12 &&
1652 pipeline->subpass->view_mask != 0) {
1653 /* For some pipelines HW Primitive Replication can be used instead of
1654 * instancing to implement Multiview. This depend on how viewIndex is
1655 * used in all the active shaders, so this check can't be done per
1656 * individual shaders.
1657 */
1658 nir_shader *shaders[MESA_SHADER_STAGES] = {};
1659 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++)
1660 shaders[s] = stages[s].nir;
1661
1662 pipeline->use_primitive_replication =
1663 anv_check_for_primitive_replication(shaders, pipeline);
1664 } else {
1665 pipeline->use_primitive_replication = false;
1666 }
1667
1668 struct anv_pipeline_stage *prev_stage = NULL;
1669 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1670 if (!stages[s].entrypoint)
1671 continue;
1672
1673 int64_t stage_start = os_time_get_nano();
1674
1675 void *stage_ctx = ralloc_context(NULL);
1676
1677 anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout);
1678
1679 if (prev_stage && compiler->glsl_compiler_options[s].NirOptions->unify_interfaces) {
1680 prev_stage->nir->info.outputs_written |= stages[s].nir->info.inputs_read &
1681 ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1682 stages[s].nir->info.inputs_read |= prev_stage->nir->info.outputs_written &
1683 ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1684 prev_stage->nir->info.patch_outputs_written |= stages[s].nir->info.patch_inputs_read;
1685 stages[s].nir->info.patch_inputs_read |= prev_stage->nir->info.patch_outputs_written;
1686 }
1687
1688 ralloc_free(stage_ctx);
1689
1690 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1691
1692 prev_stage = &stages[s];
1693 }
1694
1695 prev_stage = NULL;
1696 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1697 if (!stages[s].entrypoint)
1698 continue;
1699
1700 int64_t stage_start = os_time_get_nano();
1701
1702 void *stage_ctx = ralloc_context(NULL);
1703
1704 nir_xfb_info *xfb_info = NULL;
1705 if (s == MESA_SHADER_VERTEX ||
1706 s == MESA_SHADER_TESS_EVAL ||
1707 s == MESA_SHADER_GEOMETRY)
1708 xfb_info = nir_gather_xfb_info(stages[s].nir, stage_ctx);
1709
1710 switch (s) {
1711 case MESA_SHADER_VERTEX:
1712 anv_pipeline_compile_vs(compiler, stage_ctx, pipeline,
1713 &stages[s]);
1714 break;
1715 case MESA_SHADER_TESS_CTRL:
1716 anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device,
1717 &stages[s], prev_stage);
1718 break;
1719 case MESA_SHADER_TESS_EVAL:
1720 anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device,
1721 &stages[s], prev_stage);
1722 break;
1723 case MESA_SHADER_GEOMETRY:
1724 anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device,
1725 &stages[s], prev_stage);
1726 break;
1727 case MESA_SHADER_FRAGMENT:
1728 anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device,
1729 &stages[s], prev_stage);
1730 break;
1731 default:
1732 unreachable("Invalid graphics shader stage");
1733 }
1734 if (stages[s].code == NULL) {
1735 ralloc_free(stage_ctx);
1736 result = vk_error(pipeline->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
1737 goto fail;
1738 }
1739
1740 anv_nir_validate_push_layout(&stages[s].prog_data.base,
1741 &stages[s].bind_map);
1742
1743 struct anv_shader_bin *bin =
1744 anv_device_upload_kernel(pipeline->base.device, cache, s,
1745 &stages[s].cache_key,
1746 sizeof(stages[s].cache_key),
1747 stages[s].code,
1748 stages[s].prog_data.base.program_size,
1749 &stages[s].prog_data.base,
1750 brw_prog_data_size(s),
1751 stages[s].stats, stages[s].num_stats,
1752 xfb_info, &stages[s].bind_map);
1753 if (!bin) {
1754 ralloc_free(stage_ctx);
1755 result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1756 goto fail;
1757 }
1758
1759 anv_pipeline_add_executables(&pipeline->base, &stages[s], bin);
1760
1761 pipeline->shaders[s] = bin;
1762 ralloc_free(stage_ctx);
1763
1764 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1765
1766 prev_stage = &stages[s];
1767 }
1768
1769 ralloc_free(pipeline_ctx);
1770
1771 done:
1772
1773 if (pipeline->shaders[MESA_SHADER_FRAGMENT] &&
1774 pipeline->shaders[MESA_SHADER_FRAGMENT]->prog_data->program_size == 0) {
1775 /* This can happen if we decided to implicitly disable the fragment
1776 * shader. See anv_pipeline_compile_fs().
1777 */
1778 anv_shader_bin_unref(pipeline->base.device,
1779 pipeline->shaders[MESA_SHADER_FRAGMENT]);
1780 pipeline->shaders[MESA_SHADER_FRAGMENT] = NULL;
1781 pipeline->active_stages &= ~VK_SHADER_STAGE_FRAGMENT_BIT;
1782 }
1783
1784 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1785
1786 const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
1787 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
1788 if (create_feedback) {
1789 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1790
1791 assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);
1792 for (uint32_t i = 0; i < info->stageCount; i++) {
1793 gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
1794 create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
1795 }
1796 }
1797
1798 return VK_SUCCESS;
1799
1800 fail:
1801 ralloc_free(pipeline_ctx);
1802
1803 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1804 if (pipeline->shaders[s])
1805 anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1806 }
1807
1808 return result;
1809 }
1810
1811 VkResult
anv_pipeline_compile_cs(struct anv_compute_pipeline * pipeline,struct anv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const struct vk_shader_module * module,const char * entrypoint,const VkSpecializationInfo * spec_info)1812 anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
1813 struct anv_pipeline_cache *cache,
1814 const VkComputePipelineCreateInfo *info,
1815 const struct vk_shader_module *module,
1816 const char *entrypoint,
1817 const VkSpecializationInfo *spec_info)
1818 {
1819 VkPipelineCreationFeedbackEXT pipeline_feedback = {
1820 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1821 };
1822 int64_t pipeline_start = os_time_get_nano();
1823
1824 const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
1825
1826 struct anv_pipeline_stage stage = {
1827 .stage = MESA_SHADER_COMPUTE,
1828 .module = module,
1829 .entrypoint = entrypoint,
1830 .spec_info = spec_info,
1831 .cache_key = {
1832 .stage = MESA_SHADER_COMPUTE,
1833 },
1834 .feedback = {
1835 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1836 },
1837 };
1838 anv_pipeline_hash_shader(stage.module,
1839 stage.entrypoint,
1840 MESA_SHADER_COMPUTE,
1841 stage.spec_info,
1842 stage.shader_sha1);
1843
1844 struct anv_shader_bin *bin = NULL;
1845
1846 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info =
1847 vk_find_struct_const(info->stage.pNext,
1848 PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
1849
1850 const enum brw_subgroup_size_type subgroup_size_type =
1851 anv_subgroup_size_type(MESA_SHADER_COMPUTE, info->stage.flags, rss_info);
1852
1853 populate_cs_prog_key(&pipeline->base.device->info, subgroup_size_type,
1854 pipeline->base.device->robust_buffer_access,
1855 &stage.key.cs);
1856
1857 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1858
1859 const bool skip_cache_lookup =
1860 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1861
1862 anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
1863
1864 bool cache_hit = false;
1865 if (!skip_cache_lookup) {
1866 bin = anv_device_search_for_kernel(pipeline->base.device, cache,
1867 &stage.cache_key,
1868 sizeof(stage.cache_key),
1869 &cache_hit);
1870 }
1871
1872 if (bin == NULL &&
1873 (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT))
1874 return VK_PIPELINE_COMPILE_REQUIRED_EXT;
1875
1876 void *mem_ctx = ralloc_context(NULL);
1877 if (bin == NULL) {
1878 int64_t stage_start = os_time_get_nano();
1879
1880 stage.bind_map = (struct anv_pipeline_bind_map) {
1881 .surface_to_descriptor = stage.surface_to_descriptor,
1882 .sampler_to_descriptor = stage.sampler_to_descriptor
1883 };
1884
1885 /* Set up a binding for the gl_NumWorkGroups */
1886 stage.bind_map.surface_count = 1;
1887 stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) {
1888 .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,
1889 };
1890
1891 stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage);
1892 if (stage.nir == NULL) {
1893 ralloc_free(mem_ctx);
1894 return vk_error(pipeline, VK_ERROR_UNKNOWN);
1895 }
1896
1897 NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id);
1898
1899 anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
1900
1901 NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics);
1902
1903 stage.num_stats = 1;
1904
1905 struct brw_compile_cs_params params = {
1906 .nir = stage.nir,
1907 .key = &stage.key.cs,
1908 .prog_data = &stage.prog_data.cs,
1909 .stats = stage.stats,
1910 .log_data = pipeline->base.device,
1911 };
1912
1913 stage.code = brw_compile_cs(compiler, mem_ctx, ¶ms);
1914 if (stage.code == NULL) {
1915 ralloc_free(mem_ctx);
1916 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1917 }
1918
1919 anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map);
1920
1921 if (!stage.prog_data.cs.uses_num_work_groups) {
1922 assert(stage.bind_map.surface_to_descriptor[0].set ==
1923 ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS);
1924 stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL;
1925 }
1926
1927 const unsigned code_size = stage.prog_data.base.program_size;
1928 bin = anv_device_upload_kernel(pipeline->base.device, cache,
1929 MESA_SHADER_COMPUTE,
1930 &stage.cache_key, sizeof(stage.cache_key),
1931 stage.code, code_size,
1932 &stage.prog_data.base,
1933 sizeof(stage.prog_data.cs),
1934 stage.stats, stage.num_stats,
1935 NULL, &stage.bind_map);
1936 if (!bin) {
1937 ralloc_free(mem_ctx);
1938 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1939 }
1940
1941 stage.feedback.duration = os_time_get_nano() - stage_start;
1942 }
1943
1944 anv_pipeline_add_executables(&pipeline->base, &stage, bin);
1945
1946 ralloc_free(mem_ctx);
1947
1948 if (cache_hit) {
1949 stage.feedback.flags |=
1950 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1951 pipeline_feedback.flags |=
1952 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1953 }
1954 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1955
1956 const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
1957 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
1958 if (create_feedback) {
1959 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1960
1961 assert(create_feedback->pipelineStageCreationFeedbackCount == 1);
1962 create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
1963 }
1964
1965 pipeline->cs = bin;
1966
1967 return VK_SUCCESS;
1968 }
1969
1970 /**
1971 * Copy pipeline state not marked as dynamic.
1972 * Dynamic state is pipeline state which hasn't been provided at pipeline
1973 * creation time, but is dynamically provided afterwards using various
1974 * vkCmdSet* functions.
1975 *
1976 * The set of state considered "non_dynamic" is determined by the pieces of
1977 * state that have their corresponding VkDynamicState enums omitted from
1978 * VkPipelineDynamicStateCreateInfo::pDynamicStates.
1979 *
1980 * @param[out] pipeline Destination non_dynamic state.
1981 * @param[in] pCreateInfo Source of non_dynamic state to be copied.
1982 */
1983 static void
copy_non_dynamic_state(struct anv_graphics_pipeline * pipeline,const VkGraphicsPipelineCreateInfo * pCreateInfo)1984 copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
1985 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1986 {
1987 anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
1988 struct anv_subpass *subpass = pipeline->subpass;
1989
1990 pipeline->dynamic_state = default_dynamic_state;
1991
1992 states &= ~pipeline->dynamic_states;
1993
1994 struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
1995
1996 bool raster_discard =
1997 pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
1998 !(pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
1999
2000 /* Section 9.2 of the Vulkan 1.0.15 spec says:
2001 *
2002 * pViewportState is [...] NULL if the pipeline
2003 * has rasterization disabled.
2004 */
2005 if (!raster_discard) {
2006 assert(pCreateInfo->pViewportState);
2007
2008 dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
2009 if (states & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) {
2010 typed_memcpy(dynamic->viewport.viewports,
2011 pCreateInfo->pViewportState->pViewports,
2012 pCreateInfo->pViewportState->viewportCount);
2013 }
2014
2015 dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
2016 if (states & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) {
2017 typed_memcpy(dynamic->scissor.scissors,
2018 pCreateInfo->pViewportState->pScissors,
2019 pCreateInfo->pViewportState->scissorCount);
2020 }
2021 }
2022
2023 if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) {
2024 assert(pCreateInfo->pRasterizationState);
2025 dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
2026 }
2027
2028 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS) {
2029 assert(pCreateInfo->pRasterizationState);
2030 dynamic->depth_bias.bias =
2031 pCreateInfo->pRasterizationState->depthBiasConstantFactor;
2032 dynamic->depth_bias.clamp =
2033 pCreateInfo->pRasterizationState->depthBiasClamp;
2034 dynamic->depth_bias.slope =
2035 pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
2036 }
2037
2038 if (states & ANV_CMD_DIRTY_DYNAMIC_CULL_MODE) {
2039 assert(pCreateInfo->pRasterizationState);
2040 dynamic->cull_mode =
2041 pCreateInfo->pRasterizationState->cullMode;
2042 }
2043
2044 if (states & ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE) {
2045 assert(pCreateInfo->pRasterizationState);
2046 dynamic->front_face =
2047 pCreateInfo->pRasterizationState->frontFace;
2048 }
2049
2050 if ((states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) &&
2051 (pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
2052 assert(pCreateInfo->pInputAssemblyState);
2053 dynamic->primitive_topology = pCreateInfo->pInputAssemblyState->topology;
2054 }
2055
2056 if (states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE) {
2057 assert(pCreateInfo->pRasterizationState);
2058 dynamic->raster_discard =
2059 pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
2060 }
2061
2062 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE) {
2063 assert(pCreateInfo->pRasterizationState);
2064 dynamic->depth_bias_enable =
2065 pCreateInfo->pRasterizationState->depthBiasEnable;
2066 }
2067
2068 if ((states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE) &&
2069 (pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
2070 assert(pCreateInfo->pInputAssemblyState);
2071 dynamic->primitive_restart_enable =
2072 pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
2073 }
2074
2075 /* Section 9.2 of the Vulkan 1.0.15 spec says:
2076 *
2077 * pColorBlendState is [...] NULL if the pipeline has rasterization
2078 * disabled or if the subpass of the render pass the pipeline is
2079 * created against does not use any color attachments.
2080 */
2081 bool uses_color_att = false;
2082 for (unsigned i = 0; i < subpass->color_count; ++i) {
2083 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
2084 uses_color_att = true;
2085 break;
2086 }
2087 }
2088
2089 if (uses_color_att && !raster_discard) {
2090 assert(pCreateInfo->pColorBlendState);
2091
2092 if (states & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS)
2093 typed_memcpy(dynamic->blend_constants,
2094 pCreateInfo->pColorBlendState->blendConstants, 4);
2095 }
2096
2097 /* If there is no depthstencil attachment, then don't read
2098 * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
2099 * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
2100 * no need to override the depthstencil defaults in
2101 * anv_pipeline::dynamic_state when there is no depthstencil attachment.
2102 *
2103 * Section 9.2 of the Vulkan 1.0.15 spec says:
2104 *
2105 * pDepthStencilState is [...] NULL if the pipeline has rasterization
2106 * disabled or if the subpass of the render pass the pipeline is created
2107 * against does not use a depth/stencil attachment.
2108 */
2109 if (!raster_discard && subpass->depth_stencil_attachment) {
2110 assert(pCreateInfo->pDepthStencilState);
2111
2112 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS) {
2113 dynamic->depth_bounds.min =
2114 pCreateInfo->pDepthStencilState->minDepthBounds;
2115 dynamic->depth_bounds.max =
2116 pCreateInfo->pDepthStencilState->maxDepthBounds;
2117 }
2118
2119 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) {
2120 dynamic->stencil_compare_mask.front =
2121 pCreateInfo->pDepthStencilState->front.compareMask;
2122 dynamic->stencil_compare_mask.back =
2123 pCreateInfo->pDepthStencilState->back.compareMask;
2124 }
2125
2126 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) {
2127 dynamic->stencil_write_mask.front =
2128 pCreateInfo->pDepthStencilState->front.writeMask;
2129 dynamic->stencil_write_mask.back =
2130 pCreateInfo->pDepthStencilState->back.writeMask;
2131 }
2132
2133 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) {
2134 dynamic->stencil_reference.front =
2135 pCreateInfo->pDepthStencilState->front.reference;
2136 dynamic->stencil_reference.back =
2137 pCreateInfo->pDepthStencilState->back.reference;
2138 }
2139
2140 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE) {
2141 dynamic->depth_test_enable =
2142 pCreateInfo->pDepthStencilState->depthTestEnable;
2143 }
2144
2145 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE) {
2146 dynamic->depth_write_enable =
2147 pCreateInfo->pDepthStencilState->depthWriteEnable;
2148 }
2149
2150 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP) {
2151 dynamic->depth_compare_op =
2152 pCreateInfo->pDepthStencilState->depthCompareOp;
2153 }
2154
2155 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
2156 dynamic->depth_bounds_test_enable =
2157 pCreateInfo->pDepthStencilState->depthBoundsTestEnable;
2158 }
2159
2160 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE) {
2161 dynamic->stencil_test_enable =
2162 pCreateInfo->pDepthStencilState->stencilTestEnable;
2163 }
2164
2165 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP) {
2166 const VkPipelineDepthStencilStateCreateInfo *info =
2167 pCreateInfo->pDepthStencilState;
2168 memcpy(&dynamic->stencil_op.front, &info->front,
2169 sizeof(dynamic->stencil_op.front));
2170 memcpy(&dynamic->stencil_op.back, &info->back,
2171 sizeof(dynamic->stencil_op.back));
2172 }
2173 }
2174
2175 const VkPipelineRasterizationLineStateCreateInfoEXT *line_state =
2176 vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
2177 PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
2178 if (!raster_discard && line_state && line_state->stippledLineEnable) {
2179 if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) {
2180 dynamic->line_stipple.factor = line_state->lineStippleFactor;
2181 dynamic->line_stipple.pattern = line_state->lineStipplePattern;
2182 }
2183 }
2184
2185 const VkPipelineMultisampleStateCreateInfo *ms_info =
2186 pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? NULL :
2187 pCreateInfo->pMultisampleState;
2188 if (states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) {
2189 const VkPipelineSampleLocationsStateCreateInfoEXT *sl_info = ms_info ?
2190 vk_find_struct_const(ms_info, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT) : NULL;
2191
2192 if (sl_info) {
2193 dynamic->sample_locations.samples =
2194 sl_info->sampleLocationsInfo.sampleLocationsCount;
2195 const VkSampleLocationEXT *positions =
2196 sl_info->sampleLocationsInfo.pSampleLocations;
2197 for (uint32_t i = 0; i < dynamic->sample_locations.samples; i++) {
2198 dynamic->sample_locations.locations[i].x = positions[i].x;
2199 dynamic->sample_locations.locations[i].y = positions[i].y;
2200 }
2201 }
2202 }
2203 /* Ensure we always have valid values for sample_locations. */
2204 if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations &&
2205 dynamic->sample_locations.samples == 0) {
2206 dynamic->sample_locations.samples =
2207 ms_info ? ms_info->rasterizationSamples : 1;
2208 const struct intel_sample_position *positions =
2209 intel_get_sample_positions(dynamic->sample_locations.samples);
2210 for (uint32_t i = 0; i < dynamic->sample_locations.samples; i++) {
2211 dynamic->sample_locations.locations[i].x = positions[i].x;
2212 dynamic->sample_locations.locations[i].y = positions[i].y;
2213 }
2214 }
2215
2216 if (states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {
2217 if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
2218 uses_color_att) {
2219 assert(pCreateInfo->pColorBlendState);
2220 const VkPipelineColorWriteCreateInfoEXT *color_write_info =
2221 vk_find_struct_const(pCreateInfo->pColorBlendState->pNext,
2222 PIPELINE_COLOR_WRITE_CREATE_INFO_EXT);
2223
2224 if (color_write_info) {
2225 dynamic->color_writes = 0;
2226 for (uint32_t i = 0; i < color_write_info->attachmentCount; i++) {
2227 dynamic->color_writes |=
2228 color_write_info->pColorWriteEnables[i] ? (1u << i) : 0;
2229 }
2230 }
2231 }
2232 }
2233
2234 const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_state =
2235 vk_find_struct_const(pCreateInfo->pNext,
2236 PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR);
2237 if (fsr_state) {
2238 if (states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE)
2239 dynamic->fragment_shading_rate = fsr_state->fragmentSize;
2240 }
2241
2242 pipeline->dynamic_state_mask = states;
2243
2244 /* Mark states that can either be dynamic or fully baked into the pipeline.
2245 */
2246 pipeline->static_state_mask = states &
2247 (ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS |
2248 ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |
2249 ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE |
2250 ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE |
2251 ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP |
2252 ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY);
2253 }
2254
2255 static void
anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo * info)2256 anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)
2257 {
2258 #ifdef DEBUG
2259 struct anv_render_pass *renderpass = NULL;
2260 struct anv_subpass *subpass = NULL;
2261
2262 /* Assert that all required members of VkGraphicsPipelineCreateInfo are
2263 * present. See the Vulkan 1.0.28 spec, Section 9.2 Graphics Pipelines.
2264 */
2265 assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
2266
2267 renderpass = anv_render_pass_from_handle(info->renderPass);
2268 assert(renderpass);
2269
2270 assert(info->subpass < renderpass->subpass_count);
2271 subpass = &renderpass->subpasses[info->subpass];
2272
2273 assert(info->stageCount >= 1);
2274 assert(info->pRasterizationState);
2275 if (!info->pRasterizationState->rasterizerDiscardEnable) {
2276 assert(info->pViewportState);
2277 assert(info->pMultisampleState);
2278
2279 if (subpass && subpass->depth_stencil_attachment)
2280 assert(info->pDepthStencilState);
2281
2282 if (subpass && subpass->color_count > 0) {
2283 bool all_color_unused = true;
2284 for (int i = 0; i < subpass->color_count; i++) {
2285 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
2286 all_color_unused = false;
2287 }
2288 /* pColorBlendState is ignored if the pipeline has rasterization
2289 * disabled or if the subpass of the render pass the pipeline is
2290 * created against does not use any color attachments.
2291 */
2292 assert(info->pColorBlendState || all_color_unused);
2293 }
2294 }
2295
2296 for (uint32_t i = 0; i < info->stageCount; ++i) {
2297 switch (info->pStages[i].stage) {
2298 case VK_SHADER_STAGE_VERTEX_BIT:
2299 assert(info->pVertexInputState);
2300 assert(info->pInputAssemblyState);
2301 break;
2302 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2303 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2304 assert(info->pTessellationState);
2305 break;
2306 default:
2307 break;
2308 }
2309 }
2310 #endif
2311 }
2312
2313 /**
2314 * Calculate the desired L3 partitioning based on the current state of the
2315 * pipeline. For now this simply returns the conservative defaults calculated
2316 * by get_default_l3_weights(), but we could probably do better by gathering
2317 * more statistics from the pipeline state (e.g. guess of expected URB usage
2318 * and bound surfaces), or by using feed-back from performance counters.
2319 */
2320 void
anv_pipeline_setup_l3_config(struct anv_pipeline * pipeline,bool needs_slm)2321 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
2322 {
2323 const struct intel_device_info *devinfo = &pipeline->device->info;
2324
2325 const struct intel_l3_weights w =
2326 intel_get_default_l3_weights(devinfo, true, needs_slm);
2327
2328 pipeline->l3_config = intel_get_l3_config(devinfo, w);
2329 }
2330
2331 static VkLineRasterizationModeEXT
vk_line_rasterization_mode(const VkPipelineRasterizationLineStateCreateInfoEXT * line_info,const VkPipelineMultisampleStateCreateInfo * ms_info)2332 vk_line_rasterization_mode(const VkPipelineRasterizationLineStateCreateInfoEXT *line_info,
2333 const VkPipelineMultisampleStateCreateInfo *ms_info)
2334 {
2335 VkLineRasterizationModeEXT line_mode =
2336 line_info ? line_info->lineRasterizationMode :
2337 VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT;
2338
2339 if (line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT) {
2340 if (ms_info && ms_info->rasterizationSamples > 1) {
2341 return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;
2342 } else {
2343 return VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;
2344 }
2345 }
2346
2347 return line_mode;
2348 }
2349
2350 VkResult
anv_graphics_pipeline_init(struct anv_graphics_pipeline * pipeline,struct anv_device * device,struct anv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * alloc)2351 anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
2352 struct anv_device *device,
2353 struct anv_pipeline_cache *cache,
2354 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2355 const VkAllocationCallbacks *alloc)
2356 {
2357 VkResult result;
2358
2359 anv_pipeline_validate_create_info(pCreateInfo);
2360
2361 result = anv_pipeline_init(&pipeline->base, device,
2362 ANV_PIPELINE_GRAPHICS, pCreateInfo->flags,
2363 alloc);
2364 if (result != VK_SUCCESS)
2365 return result;
2366
2367 anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
2368 pipeline->batch_data, sizeof(pipeline->batch_data));
2369
2370 ANV_FROM_HANDLE(anv_render_pass, render_pass, pCreateInfo->renderPass);
2371 assert(pCreateInfo->subpass < render_pass->subpass_count);
2372 pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2373
2374 assert(pCreateInfo->pRasterizationState);
2375
2376 if (pCreateInfo->pDynamicState) {
2377 /* Remove all of the states that are marked as dynamic */
2378 uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
2379 for (uint32_t s = 0; s < count; s++) {
2380 pipeline->dynamic_states |= anv_cmd_dirty_bit_for_vk_dynamic_state(
2381 pCreateInfo->pDynamicState->pDynamicStates[s]);
2382 }
2383 }
2384
2385 pipeline->active_stages = 0;
2386 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
2387 pipeline->active_stages |= pCreateInfo->pStages[i].stage;
2388
2389 if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2390 pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
2391
2392 copy_non_dynamic_state(pipeline, pCreateInfo);
2393
2394 pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState->depthClampEnable;
2395
2396 /* Previously we enabled depth clipping when !depthClampEnable.
2397 * DepthClipStateCreateInfo now makes depth clipping explicit so if the
2398 * clipping info is available, use its enable value to determine clipping,
2399 * otherwise fallback to the previous !depthClampEnable logic.
2400 */
2401 const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info =
2402 vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
2403 PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
2404 pipeline->depth_clip_enable = clip_info ? clip_info->depthClipEnable : !pipeline->depth_clamp_enable;
2405
2406 /* If rasterization is not enabled, ms_info must be ignored. */
2407 const bool raster_enabled =
2408 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ||
2409 (pipeline->dynamic_states &
2410 ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
2411
2412 const VkPipelineMultisampleStateCreateInfo *ms_info =
2413 raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2414
2415 pipeline->sample_shading_enable = ms_info && is_sample_shading(ms_info);
2416
2417 result = anv_pipeline_compile_graphics(pipeline, cache, pCreateInfo);
2418 if (result != VK_SUCCESS) {
2419 anv_pipeline_finish(&pipeline->base, device, alloc);
2420 return result;
2421 }
2422
2423 anv_pipeline_setup_l3_config(&pipeline->base, false);
2424
2425 if (anv_pipeline_is_primitive(pipeline)) {
2426 const VkPipelineVertexInputStateCreateInfo *vi_info =
2427 pCreateInfo->pVertexInputState;
2428
2429 const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
2430
2431 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2432 const VkVertexInputAttributeDescription *desc =
2433 &vi_info->pVertexAttributeDescriptions[i];
2434
2435 if (inputs_read & (1ull << (VERT_ATTRIB_GENERIC0 + desc->location)))
2436 pipeline->vb_used |= 1 << desc->binding;
2437 }
2438
2439 for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
2440 const VkVertexInputBindingDescription *desc =
2441 &vi_info->pVertexBindingDescriptions[i];
2442
2443 pipeline->vb[desc->binding].stride = desc->stride;
2444
2445 /* Step rate is programmed per vertex element (attribute), not
2446 * binding. Set up a map of which bindings step per instance, for
2447 * reference by vertex element setup. */
2448 switch (desc->inputRate) {
2449 default:
2450 case VK_VERTEX_INPUT_RATE_VERTEX:
2451 pipeline->vb[desc->binding].instanced = false;
2452 break;
2453 case VK_VERTEX_INPUT_RATE_INSTANCE:
2454 pipeline->vb[desc->binding].instanced = true;
2455 break;
2456 }
2457
2458 pipeline->vb[desc->binding].instance_divisor = 1;
2459 }
2460
2461 const VkPipelineVertexInputDivisorStateCreateInfoEXT *vi_div_state =
2462 vk_find_struct_const(vi_info->pNext,
2463 PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
2464 if (vi_div_state) {
2465 for (uint32_t i = 0; i < vi_div_state->vertexBindingDivisorCount; i++) {
2466 const VkVertexInputBindingDivisorDescriptionEXT *desc =
2467 &vi_div_state->pVertexBindingDivisors[i];
2468
2469 pipeline->vb[desc->binding].instance_divisor = desc->divisor;
2470 }
2471 }
2472
2473 /* Our implementation of VK_KHR_multiview uses instancing to draw the
2474 * different views. If the client asks for instancing, we need to multiply
2475 * the instance divisor by the number of views ensure that we repeat the
2476 * client's per-instance data once for each view.
2477 */
2478 if (pipeline->subpass->view_mask && !pipeline->use_primitive_replication) {
2479 const uint32_t view_count = anv_subpass_view_count(pipeline->subpass);
2480 for (uint32_t vb = 0; vb < MAX_VBS; vb++) {
2481 if (pipeline->vb[vb].instanced)
2482 pipeline->vb[vb].instance_divisor *= view_count;
2483 }
2484 }
2485
2486 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2487 pCreateInfo->pInputAssemblyState;
2488 const VkPipelineTessellationStateCreateInfo *tess_info =
2489 pCreateInfo->pTessellationState;
2490
2491 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
2492 pipeline->topology = _3DPRIM_PATCHLIST(tess_info->patchControlPoints);
2493 else
2494 pipeline->topology = vk_to_intel_primitive_type[ia_info->topology];
2495 }
2496
2497 const VkPipelineRasterizationLineStateCreateInfoEXT *line_info =
2498 vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
2499 PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
2500
2501 /* Store line mode, polygon mode and rasterization samples, these are used
2502 * for dynamic primitive topology.
2503 */
2504 pipeline->line_mode = vk_line_rasterization_mode(line_info, ms_info);
2505 pipeline->polygon_mode = pCreateInfo->pRasterizationState->polygonMode;
2506 pipeline->rasterization_samples =
2507 ms_info ? ms_info->rasterizationSamples : 1;
2508
2509 return VK_SUCCESS;
2510 }
2511
2512 static VkResult
compile_upload_rt_shader(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_cache * cache,nir_shader * nir,struct anv_pipeline_stage * stage,struct anv_shader_bin ** shader_out,void * mem_ctx)2513 compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline,
2514 struct anv_pipeline_cache *cache,
2515 nir_shader *nir,
2516 struct anv_pipeline_stage *stage,
2517 struct anv_shader_bin **shader_out,
2518 void *mem_ctx)
2519 {
2520 const struct brw_compiler *compiler =
2521 pipeline->base.device->physical->compiler;
2522 const struct intel_device_info *devinfo = compiler->devinfo;
2523
2524 nir_shader **resume_shaders = NULL;
2525 uint32_t num_resume_shaders = 0;
2526 if (nir->info.stage != MESA_SHADER_COMPUTE) {
2527 NIR_PASS_V(nir, nir_lower_shader_calls,
2528 nir_address_format_64bit_global,
2529 BRW_BTD_STACK_ALIGN,
2530 &resume_shaders, &num_resume_shaders, mem_ctx);
2531 NIR_PASS_V(nir, brw_nir_lower_shader_calls);
2532 NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo);
2533 }
2534
2535 for (unsigned i = 0; i < num_resume_shaders; i++) {
2536 NIR_PASS_V(resume_shaders[i], brw_nir_lower_shader_calls);
2537 NIR_PASS_V(resume_shaders[i], brw_nir_lower_rt_intrinsics, devinfo);
2538 }
2539
2540 stage->code =
2541 brw_compile_bs(compiler, pipeline->base.device, mem_ctx,
2542 &stage->key.bs, &stage->prog_data.bs, nir,
2543 num_resume_shaders, resume_shaders, stage->stats, NULL);
2544 if (stage->code == NULL)
2545 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2546
2547 /* Ray-tracing shaders don't have a "real" bind map */
2548 struct anv_pipeline_bind_map empty_bind_map = {};
2549
2550 const unsigned code_size = stage->prog_data.base.program_size;
2551 struct anv_shader_bin *bin =
2552 anv_device_upload_kernel(pipeline->base.device,
2553 cache,
2554 stage->stage,
2555 &stage->cache_key, sizeof(stage->cache_key),
2556 stage->code, code_size,
2557 &stage->prog_data.base,
2558 sizeof(stage->prog_data.bs),
2559 stage->stats, 1,
2560 NULL, &empty_bind_map);
2561 if (bin == NULL)
2562 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2563
2564 /* TODO: Figure out executables for resume shaders */
2565 anv_pipeline_add_executables(&pipeline->base, stage, bin);
2566 util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, bin);
2567
2568 *shader_out = bin;
2569
2570 return VK_SUCCESS;
2571 }
2572
2573 static bool
is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR * info)2574 is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info)
2575 {
2576 if (info->pDynamicState == NULL)
2577 return false;
2578
2579 for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) {
2580 if (info->pDynamicState->pDynamicStates[i] ==
2581 VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
2582 return true;
2583 }
2584
2585 return false;
2586 }
2587
2588 static void
anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline * pipeline,const VkRayTracingPipelineCreateInfoKHR * info,uint32_t * stack_max)2589 anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline,
2590 const VkRayTracingPipelineCreateInfoKHR *info,
2591 uint32_t *stack_max)
2592 {
2593 if (is_rt_stack_size_dynamic(info)) {
2594 pipeline->stack_size = 0; /* 0 means dynamic */
2595 } else {
2596 /* From the Vulkan spec:
2597 *
2598 * "If the stack size is not set explicitly, the stack size for a
2599 * pipeline is:
2600 *
2601 * rayGenStackMax +
2602 * min(1, maxPipelineRayRecursionDepth) ×
2603 * max(closestHitStackMax, missStackMax,
2604 * intersectionStackMax + anyHitStackMax) +
2605 * max(0, maxPipelineRayRecursionDepth-1) ×
2606 * max(closestHitStackMax, missStackMax) +
2607 * 2 × callableStackMax"
2608 */
2609 pipeline->stack_size =
2610 stack_max[MESA_SHADER_RAYGEN] +
2611 MIN2(1, info->maxPipelineRayRecursionDepth) *
2612 MAX4(stack_max[MESA_SHADER_CLOSEST_HIT],
2613 stack_max[MESA_SHADER_MISS],
2614 stack_max[MESA_SHADER_INTERSECTION],
2615 stack_max[MESA_SHADER_ANY_HIT]) +
2616 MAX2(0, (int)info->maxPipelineRayRecursionDepth - 1) *
2617 MAX2(stack_max[MESA_SHADER_CLOSEST_HIT],
2618 stack_max[MESA_SHADER_MISS]) +
2619 2 * stack_max[MESA_SHADER_CALLABLE];
2620
2621 /* This is an extremely unlikely case but we need to set it to some
2622 * non-zero value so that we don't accidentally think it's dynamic.
2623 * Our minimum stack size is 2KB anyway so we could set to any small
2624 * value we like.
2625 */
2626 if (pipeline->stack_size == 0)
2627 pipeline->stack_size = 1;
2628 }
2629 }
2630
2631 static struct anv_pipeline_stage *
anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline * pipeline,const VkRayTracingPipelineCreateInfoKHR * info,void * pipeline_ctx)2632 anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline,
2633 const VkRayTracingPipelineCreateInfoKHR *info,
2634 void *pipeline_ctx)
2635 {
2636 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
2637
2638 /* Create enough stage entries for all shader modules plus potential
2639 * combinaisons in the groups.
2640 */
2641 struct anv_pipeline_stage *stages =
2642 rzalloc_array(pipeline_ctx, struct anv_pipeline_stage, info->stageCount);
2643
2644 for (uint32_t i = 0; i < info->stageCount; i++) {
2645 const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
2646 if (sinfo->module == VK_NULL_HANDLE)
2647 continue;
2648
2649 int64_t stage_start = os_time_get_nano();
2650
2651 stages[i] = (struct anv_pipeline_stage) {
2652 .stage = vk_to_mesa_shader_stage(sinfo->stage),
2653 .module = vk_shader_module_from_handle(sinfo->module),
2654 .entrypoint = sinfo->pName,
2655 .spec_info = sinfo->pSpecializationInfo,
2656 .cache_key = {
2657 .stage = vk_to_mesa_shader_stage(sinfo->stage),
2658 },
2659 .feedback = {
2660 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
2661 },
2662 };
2663
2664 populate_bs_prog_key(&pipeline->base.device->info, sinfo->flags,
2665 pipeline->base.device->robust_buffer_access,
2666 &stages[i].key.bs);
2667
2668 anv_pipeline_hash_shader(stages[i].module,
2669 stages[i].entrypoint,
2670 stages[i].stage,
2671 stages[i].spec_info,
2672 stages[i].shader_sha1);
2673
2674 if (stages[i].stage != MESA_SHADER_INTERSECTION) {
2675 anv_pipeline_hash_ray_tracing_shader(pipeline, layout, &stages[i],
2676 stages[i].cache_key.sha1);
2677 }
2678
2679 stages[i].feedback.duration += os_time_get_nano() - stage_start;
2680 }
2681
2682 for (uint32_t i = 0; i < info->groupCount; i++) {
2683 const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
2684
2685 if (ginfo->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR)
2686 continue;
2687
2688 int64_t stage_start = os_time_get_nano();
2689
2690 uint32_t intersection_idx = ginfo->intersectionShader;
2691 assert(intersection_idx < info->stageCount);
2692
2693 uint32_t any_hit_idx = ginfo->anyHitShader;
2694 if (any_hit_idx != VK_SHADER_UNUSED_KHR) {
2695 assert(any_hit_idx < info->stageCount);
2696 anv_pipeline_hash_ray_tracing_combined_shader(pipeline,
2697 layout,
2698 &stages[intersection_idx],
2699 &stages[any_hit_idx],
2700 stages[intersection_idx].cache_key.sha1);
2701 } else {
2702 anv_pipeline_hash_ray_tracing_shader(pipeline, layout,
2703 &stages[intersection_idx],
2704 stages[intersection_idx].cache_key.sha1);
2705 }
2706
2707 stages[intersection_idx].feedback.duration += os_time_get_nano() - stage_start;
2708 }
2709
2710 return stages;
2711 }
2712
2713 static bool
anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * info,struct anv_pipeline_stage * stages,uint32_t * stack_max)2714 anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline,
2715 struct anv_pipeline_cache *cache,
2716 const VkRayTracingPipelineCreateInfoKHR *info,
2717 struct anv_pipeline_stage *stages,
2718 uint32_t *stack_max)
2719 {
2720 uint32_t shaders = 0, cache_hits = 0;
2721 for (uint32_t i = 0; i < info->stageCount; i++) {
2722 if (stages[i].entrypoint == NULL)
2723 continue;
2724
2725 shaders++;
2726
2727 int64_t stage_start = os_time_get_nano();
2728
2729 bool cache_hit;
2730 stages[i].bin = anv_device_search_for_kernel(pipeline->base.device, cache,
2731 &stages[i].cache_key,
2732 sizeof(stages[i].cache_key),
2733 &cache_hit);
2734 if (cache_hit) {
2735 cache_hits++;
2736 stages[i].feedback.flags |=
2737 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
2738 }
2739
2740 if (stages[i].bin != NULL) {
2741 anv_pipeline_add_executables(&pipeline->base, &stages[i], stages[i].bin);
2742 util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, stages[i].bin);
2743
2744 uint32_t stack_size =
2745 brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
2746 stack_max[stages[i].stage] =
2747 MAX2(stack_max[stages[i].stage], stack_size);
2748 }
2749
2750 stages[i].feedback.duration += os_time_get_nano() - stage_start;
2751 }
2752
2753 return cache_hits == shaders;
2754 }
2755
2756 static VkResult
anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * info)2757 anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline,
2758 struct anv_pipeline_cache *cache,
2759 const VkRayTracingPipelineCreateInfoKHR *info)
2760 {
2761 const struct intel_device_info *devinfo = &pipeline->base.device->info;
2762 VkResult result;
2763
2764 VkPipelineCreationFeedbackEXT pipeline_feedback = {
2765 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
2766 };
2767 int64_t pipeline_start = os_time_get_nano();
2768
2769 void *pipeline_ctx = ralloc_context(NULL);
2770
2771 struct anv_pipeline_stage *stages =
2772 anv_pipeline_init_ray_tracing_stages(pipeline, info, pipeline_ctx);
2773
2774 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
2775
2776 const bool skip_cache_lookup =
2777 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
2778
2779 uint32_t stack_max[MESA_VULKAN_SHADER_STAGES] = {};
2780
2781 if (!skip_cache_lookup &&
2782 anv_pipeline_load_cached_shaders(pipeline, cache, info, stages, stack_max)) {
2783 pipeline_feedback.flags |=
2784 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
2785 goto done;
2786 }
2787
2788 if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) {
2789 ralloc_free(pipeline_ctx);
2790 return VK_PIPELINE_COMPILE_REQUIRED_EXT;
2791 }
2792
2793 for (uint32_t i = 0; i < info->stageCount; i++) {
2794 if (stages[i].entrypoint == NULL)
2795 continue;
2796
2797 int64_t stage_start = os_time_get_nano();
2798
2799 stages[i].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
2800 pipeline_ctx, &stages[i]);
2801 if (stages[i].nir == NULL) {
2802 ralloc_free(pipeline_ctx);
2803 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2804 }
2805
2806 anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i], layout);
2807
2808 stages[i].feedback.duration += os_time_get_nano() - stage_start;
2809 }
2810
2811 for (uint32_t i = 0; i < info->stageCount; i++) {
2812 if (stages[i].entrypoint == NULL)
2813 continue;
2814
2815 /* Shader found in cache already. */
2816 if (stages[i].bin != NULL)
2817 continue;
2818
2819 /* We handle intersection shaders as part of the group */
2820 if (stages[i].stage == MESA_SHADER_INTERSECTION)
2821 continue;
2822
2823 int64_t stage_start = os_time_get_nano();
2824
2825 void *stage_ctx = ralloc_context(pipeline_ctx);
2826
2827 nir_shader *nir = nir_shader_clone(stage_ctx, stages[i].nir);
2828 switch (stages[i].stage) {
2829 case MESA_SHADER_RAYGEN:
2830 brw_nir_lower_raygen(nir);
2831 break;
2832
2833 case MESA_SHADER_ANY_HIT:
2834 brw_nir_lower_any_hit(nir, devinfo);
2835 break;
2836
2837 case MESA_SHADER_CLOSEST_HIT:
2838 brw_nir_lower_closest_hit(nir);
2839 break;
2840
2841 case MESA_SHADER_MISS:
2842 brw_nir_lower_miss(nir);
2843 break;
2844
2845 case MESA_SHADER_INTERSECTION:
2846 unreachable("These are handled later");
2847
2848 case MESA_SHADER_CALLABLE:
2849 brw_nir_lower_callable(nir);
2850 break;
2851
2852 default:
2853 unreachable("Invalid ray-tracing shader stage");
2854 }
2855
2856 result = compile_upload_rt_shader(pipeline, cache, nir, &stages[i],
2857 &stages[i].bin, stage_ctx);
2858 if (result != VK_SUCCESS) {
2859 ralloc_free(pipeline_ctx);
2860 return result;
2861 }
2862
2863 uint32_t stack_size =
2864 brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
2865 stack_max[stages[i].stage] = MAX2(stack_max[stages[i].stage], stack_size);
2866
2867 ralloc_free(stage_ctx);
2868
2869 stages[i].feedback.duration += os_time_get_nano() - stage_start;
2870 }
2871
2872 for (uint32_t i = 0; i < info->groupCount; i++) {
2873 const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
2874 struct anv_rt_shader_group *group = &pipeline->groups[i];
2875 group->type = ginfo->type;
2876 switch (ginfo->type) {
2877 case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
2878 assert(ginfo->generalShader < info->stageCount);
2879 group->general = stages[ginfo->generalShader].bin;
2880 break;
2881
2882 case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
2883 if (ginfo->anyHitShader < info->stageCount)
2884 group->any_hit = stages[ginfo->anyHitShader].bin;
2885
2886 if (ginfo->closestHitShader < info->stageCount)
2887 group->closest_hit = stages[ginfo->closestHitShader].bin;
2888 break;
2889
2890 case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: {
2891 if (ginfo->closestHitShader < info->stageCount)
2892 group->closest_hit = stages[ginfo->closestHitShader].bin;
2893
2894 uint32_t intersection_idx = info->pGroups[i].intersectionShader;
2895 assert(intersection_idx < info->stageCount);
2896
2897 /* Only compile this stage if not already found in the cache. */
2898 if (stages[intersection_idx].bin == NULL) {
2899 /* The any-hit and intersection shader have to be combined */
2900 uint32_t any_hit_idx = info->pGroups[i].anyHitShader;
2901 const nir_shader *any_hit = NULL;
2902 if (any_hit_idx < info->stageCount)
2903 any_hit = stages[any_hit_idx].nir;
2904
2905 void *group_ctx = ralloc_context(pipeline_ctx);
2906 nir_shader *intersection =
2907 nir_shader_clone(group_ctx, stages[intersection_idx].nir);
2908
2909 brw_nir_lower_combined_intersection_any_hit(intersection, any_hit,
2910 devinfo);
2911
2912 result = compile_upload_rt_shader(pipeline, cache,
2913 intersection,
2914 &stages[intersection_idx],
2915 &group->intersection,
2916 group_ctx);
2917 ralloc_free(group_ctx);
2918 if (result != VK_SUCCESS)
2919 return result;
2920 } else {
2921 group->intersection = stages[intersection_idx].bin;
2922 }
2923
2924 uint32_t stack_size =
2925 brw_bs_prog_data_const(group->intersection->prog_data)->max_stack_size;
2926 stack_max[MESA_SHADER_INTERSECTION] =
2927 MAX2(stack_max[MESA_SHADER_INTERSECTION], stack_size);
2928
2929 break;
2930 }
2931
2932 default:
2933 unreachable("Invalid ray tracing shader group type");
2934 }
2935 }
2936
2937 done:
2938 ralloc_free(pipeline_ctx);
2939
2940 anv_pipeline_compute_ray_tracing_stacks(pipeline, info, stack_max);
2941
2942 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2943
2944 const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
2945 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
2946 if (create_feedback) {
2947 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
2948
2949 assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);
2950 for (uint32_t i = 0; i < info->stageCount; i++) {
2951 gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
2952 create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
2953 }
2954 }
2955
2956 return VK_SUCCESS;
2957 }
2958
2959 VkResult
anv_device_init_rt_shaders(struct anv_device * device)2960 anv_device_init_rt_shaders(struct anv_device *device)
2961 {
2962 if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
2963 return VK_SUCCESS;
2964
2965 bool cache_hit;
2966
2967 struct brw_rt_trampoline {
2968 char name[16];
2969 struct brw_cs_prog_key key;
2970 } trampoline_key = {
2971 .name = "rt-trampoline",
2972 .key = {
2973 /* TODO: Other subgroup sizes? */
2974 .base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_8,
2975 },
2976 };
2977 device->rt_trampoline =
2978 anv_device_search_for_kernel(device, &device->default_pipeline_cache,
2979 &trampoline_key, sizeof(trampoline_key),
2980 &cache_hit);
2981 if (device->rt_trampoline == NULL) {
2982
2983 void *tmp_ctx = ralloc_context(NULL);
2984 nir_shader *trampoline_nir =
2985 brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);
2986
2987 struct anv_pipeline_bind_map bind_map = {
2988 .surface_count = 0,
2989 .sampler_count = 0,
2990 };
2991 uint32_t dummy_params[4] = { 0, };
2992 struct brw_cs_prog_data trampoline_prog_data = {
2993 .base.nr_params = 4,
2994 .base.param = dummy_params,
2995 .uses_inline_data = true,
2996 .uses_btd_stack_ids = true,
2997 };
2998 struct brw_compile_cs_params params = {
2999 .nir = trampoline_nir,
3000 .key = &trampoline_key.key,
3001 .prog_data = &trampoline_prog_data,
3002 .log_data = device,
3003 };
3004 const unsigned *tramp_data =
3005 brw_compile_cs(device->physical->compiler, tmp_ctx, ¶ms);
3006
3007 device->rt_trampoline =
3008 anv_device_upload_kernel(device, &device->default_pipeline_cache,
3009 MESA_SHADER_COMPUTE,
3010 &trampoline_key, sizeof(trampoline_key),
3011 tramp_data,
3012 trampoline_prog_data.base.program_size,
3013 &trampoline_prog_data.base,
3014 sizeof(trampoline_prog_data),
3015 NULL, 0, NULL, &bind_map);
3016
3017 ralloc_free(tmp_ctx);
3018
3019 if (device->rt_trampoline == NULL)
3020 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3021 }
3022
3023 struct brw_rt_trivial_return {
3024 char name[16];
3025 struct brw_bs_prog_key key;
3026 } return_key = {
3027 .name = "rt-trivial-ret",
3028 };
3029 device->rt_trivial_return =
3030 anv_device_search_for_kernel(device, &device->default_pipeline_cache,
3031 &return_key, sizeof(return_key),
3032 &cache_hit);
3033 if (device->rt_trivial_return == NULL) {
3034 void *tmp_ctx = ralloc_context(NULL);
3035 nir_shader *trivial_return_nir =
3036 brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx);
3037
3038 NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, &device->info);
3039
3040 struct anv_pipeline_bind_map bind_map = {
3041 .surface_count = 0,
3042 .sampler_count = 0,
3043 };
3044 struct brw_bs_prog_data return_prog_data = { 0, };
3045 const unsigned *return_data =
3046 brw_compile_bs(device->physical->compiler, device, tmp_ctx,
3047 &return_key.key, &return_prog_data, trivial_return_nir,
3048 0, 0, NULL, NULL);
3049
3050 device->rt_trivial_return =
3051 anv_device_upload_kernel(device, &device->default_pipeline_cache,
3052 MESA_SHADER_CALLABLE,
3053 &return_key, sizeof(return_key),
3054 return_data, return_prog_data.base.program_size,
3055 &return_prog_data.base, sizeof(return_prog_data),
3056 NULL, 0, NULL, &bind_map);
3057
3058 ralloc_free(tmp_ctx);
3059
3060 if (device->rt_trivial_return == NULL) {
3061 anv_shader_bin_unref(device, device->rt_trampoline);
3062 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3063 }
3064 }
3065
3066 return VK_SUCCESS;
3067 }
3068
3069 void
anv_device_finish_rt_shaders(struct anv_device * device)3070 anv_device_finish_rt_shaders(struct anv_device *device)
3071 {
3072 if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
3073 return;
3074
3075 anv_shader_bin_unref(device, device->rt_trampoline);
3076 }
3077
3078 VkResult
anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline * pipeline,struct anv_device * device,struct anv_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * alloc)3079 anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
3080 struct anv_device *device,
3081 struct anv_pipeline_cache *cache,
3082 const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
3083 const VkAllocationCallbacks *alloc)
3084 {
3085 VkResult result;
3086
3087 util_dynarray_init(&pipeline->shaders, pipeline->base.mem_ctx);
3088
3089 result = anv_pipeline_compile_ray_tracing(pipeline, cache, pCreateInfo);
3090 if (result != VK_SUCCESS)
3091 goto fail;
3092
3093 anv_pipeline_setup_l3_config(&pipeline->base, /* needs_slm */ false);
3094
3095 return VK_SUCCESS;
3096
3097 fail:
3098 util_dynarray_foreach(&pipeline->shaders,
3099 struct anv_shader_bin *, shader) {
3100 anv_shader_bin_unref(device, *shader);
3101 }
3102 return result;
3103 }
3104
3105 #define WRITE_STR(field, ...) ({ \
3106 memset(field, 0, sizeof(field)); \
3107 UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
3108 assert(i > 0 && i < sizeof(field)); \
3109 })
3110
anv_GetPipelineExecutablePropertiesKHR(VkDevice device,const VkPipelineInfoKHR * pPipelineInfo,uint32_t * pExecutableCount,VkPipelineExecutablePropertiesKHR * pProperties)3111 VkResult anv_GetPipelineExecutablePropertiesKHR(
3112 VkDevice device,
3113 const VkPipelineInfoKHR* pPipelineInfo,
3114 uint32_t* pExecutableCount,
3115 VkPipelineExecutablePropertiesKHR* pProperties)
3116 {
3117 ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);
3118 VK_OUTARRAY_MAKE(out, pProperties, pExecutableCount);
3119
3120 util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) {
3121 vk_outarray_append(&out, props) {
3122 gl_shader_stage stage = exe->stage;
3123 props->stages = mesa_to_vk_shader_stage(stage);
3124
3125 unsigned simd_width = exe->stats.dispatch_width;
3126 if (stage == MESA_SHADER_FRAGMENT) {
3127 WRITE_STR(props->name, "%s%d %s",
3128 simd_width ? "SIMD" : "vec",
3129 simd_width ? simd_width : 4,
3130 _mesa_shader_stage_to_string(stage));
3131 } else {
3132 WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage));
3133 }
3134 WRITE_STR(props->description, "%s%d %s shader",
3135 simd_width ? "SIMD" : "vec",
3136 simd_width ? simd_width : 4,
3137 _mesa_shader_stage_to_string(stage));
3138
3139 /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan
3140 * wants a subgroup size of 1.
3141 */
3142 props->subgroupSize = MAX2(simd_width, 1);
3143 }
3144 }
3145
3146 return vk_outarray_status(&out);
3147 }
3148
3149 static const struct anv_pipeline_executable *
anv_pipeline_get_executable(struct anv_pipeline * pipeline,uint32_t index)3150 anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)
3151 {
3152 assert(index < util_dynarray_num_elements(&pipeline->executables,
3153 struct anv_pipeline_executable));
3154 return util_dynarray_element(
3155 &pipeline->executables, struct anv_pipeline_executable, index);
3156 }
3157
anv_GetPipelineExecutableStatisticsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pStatisticCount,VkPipelineExecutableStatisticKHR * pStatistics)3158 VkResult anv_GetPipelineExecutableStatisticsKHR(
3159 VkDevice device,
3160 const VkPipelineExecutableInfoKHR* pExecutableInfo,
3161 uint32_t* pStatisticCount,
3162 VkPipelineExecutableStatisticKHR* pStatistics)
3163 {
3164 ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
3165 VK_OUTARRAY_MAKE(out, pStatistics, pStatisticCount);
3166
3167 const struct anv_pipeline_executable *exe =
3168 anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3169
3170 const struct brw_stage_prog_data *prog_data;
3171 switch (pipeline->type) {
3172 case ANV_PIPELINE_GRAPHICS: {
3173 prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data;
3174 break;
3175 }
3176 case ANV_PIPELINE_COMPUTE: {
3177 prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;
3178 break;
3179 }
3180 default:
3181 unreachable("invalid pipeline type");
3182 }
3183
3184 vk_outarray_append(&out, stat) {
3185 WRITE_STR(stat->name, "Instruction Count");
3186 WRITE_STR(stat->description,
3187 "Number of GEN instructions in the final generated "
3188 "shader executable.");
3189 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3190 stat->value.u64 = exe->stats.instructions;
3191 }
3192
3193 vk_outarray_append(&out, stat) {
3194 WRITE_STR(stat->name, "SEND Count");
3195 WRITE_STR(stat->description,
3196 "Number of instructions in the final generated shader "
3197 "executable which access external units such as the "
3198 "constant cache or the sampler.");
3199 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3200 stat->value.u64 = exe->stats.sends;
3201 }
3202
3203 vk_outarray_append(&out, stat) {
3204 WRITE_STR(stat->name, "Loop Count");
3205 WRITE_STR(stat->description,
3206 "Number of loops (not unrolled) in the final generated "
3207 "shader executable.");
3208 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3209 stat->value.u64 = exe->stats.loops;
3210 }
3211
3212 vk_outarray_append(&out, stat) {
3213 WRITE_STR(stat->name, "Cycle Count");
3214 WRITE_STR(stat->description,
3215 "Estimate of the number of EU cycles required to execute "
3216 "the final generated executable. This is an estimate only "
3217 "and may vary greatly from actual run-time performance.");
3218 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3219 stat->value.u64 = exe->stats.cycles;
3220 }
3221
3222 vk_outarray_append(&out, stat) {
3223 WRITE_STR(stat->name, "Spill Count");
3224 WRITE_STR(stat->description,
3225 "Number of scratch spill operations. This gives a rough "
3226 "estimate of the cost incurred due to spilling temporary "
3227 "values to memory. If this is non-zero, you may want to "
3228 "adjust your shader to reduce register pressure.");
3229 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3230 stat->value.u64 = exe->stats.spills;
3231 }
3232
3233 vk_outarray_append(&out, stat) {
3234 WRITE_STR(stat->name, "Fill Count");
3235 WRITE_STR(stat->description,
3236 "Number of scratch fill operations. This gives a rough "
3237 "estimate of the cost incurred due to spilling temporary "
3238 "values to memory. If this is non-zero, you may want to "
3239 "adjust your shader to reduce register pressure.");
3240 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3241 stat->value.u64 = exe->stats.fills;
3242 }
3243
3244 vk_outarray_append(&out, stat) {
3245 WRITE_STR(stat->name, "Scratch Memory Size");
3246 WRITE_STR(stat->description,
3247 "Number of bytes of scratch memory required by the "
3248 "generated shader executable. If this is non-zero, you "
3249 "may want to adjust your shader to reduce register "
3250 "pressure.");
3251 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3252 stat->value.u64 = prog_data->total_scratch;
3253 }
3254
3255 if (gl_shader_stage_uses_workgroup(exe->stage)) {
3256 vk_outarray_append(&out, stat) {
3257 WRITE_STR(stat->name, "Workgroup Memory Size");
3258 WRITE_STR(stat->description,
3259 "Number of bytes of workgroup shared memory used by this "
3260 "shader including any padding.");
3261 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3262 stat->value.u64 = prog_data->total_shared;
3263 }
3264 }
3265
3266 return vk_outarray_status(&out);
3267 }
3268
3269 static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR * ir,const char * data)3270 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
3271 const char *data)
3272 {
3273 ir->isText = VK_TRUE;
3274
3275 size_t data_len = strlen(data) + 1;
3276
3277 if (ir->pData == NULL) {
3278 ir->dataSize = data_len;
3279 return true;
3280 }
3281
3282 strncpy(ir->pData, data, ir->dataSize);
3283 if (ir->dataSize < data_len)
3284 return false;
3285
3286 ir->dataSize = data_len;
3287 return true;
3288 }
3289
anv_GetPipelineExecutableInternalRepresentationsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pInternalRepresentationCount,VkPipelineExecutableInternalRepresentationKHR * pInternalRepresentations)3290 VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
3291 VkDevice device,
3292 const VkPipelineExecutableInfoKHR* pExecutableInfo,
3293 uint32_t* pInternalRepresentationCount,
3294 VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
3295 {
3296 ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
3297 VK_OUTARRAY_MAKE(out, pInternalRepresentations,
3298 pInternalRepresentationCount);
3299 bool incomplete_text = false;
3300
3301 const struct anv_pipeline_executable *exe =
3302 anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3303
3304 if (exe->nir) {
3305 vk_outarray_append(&out, ir) {
3306 WRITE_STR(ir->name, "Final NIR");
3307 WRITE_STR(ir->description,
3308 "Final NIR before going into the back-end compiler");
3309
3310 if (!write_ir_text(ir, exe->nir))
3311 incomplete_text = true;
3312 }
3313 }
3314
3315 if (exe->disasm) {
3316 vk_outarray_append(&out, ir) {
3317 WRITE_STR(ir->name, "GEN Assembly");
3318 WRITE_STR(ir->description,
3319 "Final GEN assembly for the generated shader binary");
3320
3321 if (!write_ir_text(ir, exe->disasm))
3322 incomplete_text = true;
3323 }
3324 }
3325
3326 return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
3327 }
3328
3329 VkResult
anv_GetRayTracingShaderGroupHandlesKHR(VkDevice _device,VkPipeline _pipeline,uint32_t firstGroup,uint32_t groupCount,size_t dataSize,void * pData)3330 anv_GetRayTracingShaderGroupHandlesKHR(
3331 VkDevice _device,
3332 VkPipeline _pipeline,
3333 uint32_t firstGroup,
3334 uint32_t groupCount,
3335 size_t dataSize,
3336 void* pData)
3337 {
3338 ANV_FROM_HANDLE(anv_device, device, _device);
3339 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
3340
3341 if (pipeline->type != ANV_PIPELINE_RAY_TRACING)
3342 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
3343
3344 struct anv_ray_tracing_pipeline *rt_pipeline =
3345 anv_pipeline_to_ray_tracing(pipeline);
3346
3347 for (uint32_t i = 0; i < groupCount; i++) {
3348 struct anv_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i];
3349 memcpy(pData, group->handle, sizeof(group->handle));
3350 pData += sizeof(group->handle);
3351 }
3352
3353 return VK_SUCCESS;
3354 }
3355
3356 VkResult
anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(VkDevice _device,VkPipeline pipeline,uint32_t firstGroup,uint32_t groupCount,size_t dataSize,void * pData)3357 anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(
3358 VkDevice _device,
3359 VkPipeline pipeline,
3360 uint32_t firstGroup,
3361 uint32_t groupCount,
3362 size_t dataSize,
3363 void* pData)
3364 {
3365 ANV_FROM_HANDLE(anv_device, device, _device);
3366 unreachable("Unimplemented");
3367 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
3368 }
3369
3370 VkDeviceSize
anv_GetRayTracingShaderGroupStackSizeKHR(VkDevice device,VkPipeline _pipeline,uint32_t group,VkShaderGroupShaderKHR groupShader)3371 anv_GetRayTracingShaderGroupStackSizeKHR(
3372 VkDevice device,
3373 VkPipeline _pipeline,
3374 uint32_t group,
3375 VkShaderGroupShaderKHR groupShader)
3376 {
3377 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
3378 assert(pipeline->type == ANV_PIPELINE_RAY_TRACING);
3379
3380 struct anv_ray_tracing_pipeline *rt_pipeline =
3381 anv_pipeline_to_ray_tracing(pipeline);
3382
3383 assert(group < rt_pipeline->group_count);
3384
3385 struct anv_shader_bin *bin;
3386 switch (groupShader) {
3387 case VK_SHADER_GROUP_SHADER_GENERAL_KHR:
3388 bin = rt_pipeline->groups[group].general;
3389 break;
3390
3391 case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR:
3392 bin = rt_pipeline->groups[group].closest_hit;
3393 break;
3394
3395 case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR:
3396 bin = rt_pipeline->groups[group].any_hit;
3397 break;
3398
3399 case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR:
3400 bin = rt_pipeline->groups[group].intersection;
3401 break;
3402
3403 default:
3404 unreachable("Invalid VkShaderGroupShader enum");
3405 }
3406
3407 if (bin == NULL)
3408 return 0;
3409
3410 return brw_bs_prog_data_const(bin->prog_data)->max_stack_size;
3411 }
3412