1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "util/mesa-sha1.h"
31 #include "util/os_time.h"
32 #include "common/intel_l3_config.h"
33 #include "common/intel_sample_positions.h"
34 #include "compiler/elk/elk_disasm.h"
35 #include "anv_private.h"
36 #include "compiler/elk/elk_nir.h"
37 #include "compiler/intel_nir.h"
38 #include "anv_nir.h"
39 #include "nir/nir_xfb_info.h"
40 #include "spirv/nir_spirv.h"
41 #include "vk_pipeline.h"
42 #include "vk_render_pass.h"
43 #include "vk_util.h"
44
45 /* Eventually, this will become part of anv_CreateShader. Unfortunately,
46 * we can't do that yet because we don't have the ability to copy nir.
47 */
48 static nir_shader *
anv_shader_stage_to_nir(struct anv_device * device,const VkPipelineShaderStageCreateInfo * stage_info,enum elk_robustness_flags robust_flags,void * mem_ctx)49 anv_shader_stage_to_nir(struct anv_device *device,
50 const VkPipelineShaderStageCreateInfo *stage_info,
51 enum elk_robustness_flags robust_flags,
52 void *mem_ctx)
53 {
54 const struct anv_physical_device *pdevice = device->physical;
55 const struct anv_instance *instance = pdevice->instance;
56 const struct elk_compiler *compiler = pdevice->compiler;
57 gl_shader_stage stage = vk_to_mesa_shader_stage(stage_info->stage);
58 const nir_shader_compiler_options *nir_options =
59 compiler->nir_options[stage];
60
61 const struct spirv_to_nir_options spirv_options = {
62 .caps = {
63 .demote_to_helper_invocation = true,
64 .derivative_group = true,
65 .descriptor_array_dynamic_indexing = true,
66 .descriptor_array_non_uniform_indexing = true,
67 .descriptor_indexing = true,
68 .device_group = true,
69 .draw_parameters = true,
70 .float16 = pdevice->info.ver >= 8,
71 .float32_atomic_add = pdevice->info.has_lsc,
72 .float32_atomic_min_max = pdevice->info.ver >= 9,
73 .float64 = pdevice->info.ver >= 8,
74 .float64_atomic_min_max = pdevice->info.has_lsc,
75 .fragment_shader_sample_interlock = pdevice->info.ver >= 9,
76 .fragment_shader_pixel_interlock = pdevice->info.ver >= 9,
77 .geometry_streams = true,
78 /* When using Vulkan 1.3 or KHR_format_feature_flags2 is enabled, the
79 * read/write without format is per format, so just report true. It's
80 * up to the application to check.
81 */
82 .image_read_without_format = instance->vk.app_info.api_version >= VK_API_VERSION_1_3 || device->vk.enabled_extensions.KHR_format_feature_flags2,
83 .image_write_without_format = true,
84 .int8 = pdevice->info.ver >= 8,
85 .int16 = pdevice->info.ver >= 8,
86 .int64 = pdevice->info.ver >= 8,
87 .int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin,
88 .integer_functions2 = pdevice->info.ver >= 8,
89 .min_lod = true,
90 .multiview = true,
91 .physical_storage_buffer_address = pdevice->has_a64_buffer_access,
92 .post_depth_coverage = pdevice->info.ver >= 9,
93 .runtime_descriptor_array = true,
94 .float_controls = true,
95 .shader_clock = true,
96 .shader_viewport_index_layer = true,
97 .stencil_export = pdevice->info.ver >= 9,
98 .storage_8bit = pdevice->info.ver >= 8,
99 .storage_16bit = pdevice->info.ver >= 8,
100 .subgroup_arithmetic = true,
101 .subgroup_basic = true,
102 .subgroup_ballot = true,
103 .subgroup_dispatch = true,
104 .subgroup_quad = true,
105 .subgroup_uniform_control_flow = true,
106 .subgroup_shuffle = true,
107 .subgroup_vote = true,
108 .tessellation = true,
109 .transform_feedback = true,
110 .variable_pointers = true,
111 .vk_memory_model = true,
112 .vk_memory_model_device_scope = true,
113 .workgroup_memory_explicit_layout = true,
114 },
115 .ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_flags),
116 .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_flags),
117 .phys_ssbo_addr_format = nir_address_format_64bit_global,
118 .push_const_addr_format = nir_address_format_logical,
119
120 /* TODO: Consider changing this to an address format that has the NULL
121 * pointer equals to 0. That might be a better format to play nice
122 * with certain code / code generators.
123 */
124 .shared_addr_format = nir_address_format_32bit_offset,
125
126 .min_ubo_alignment = ANV_UBO_ALIGNMENT,
127 .min_ssbo_alignment = ANV_SSBO_ALIGNMENT,
128 };
129
130 nir_shader *nir;
131 VkResult result =
132 vk_pipeline_shader_stage_to_nir(&device->vk, stage_info,
133 &spirv_options, nir_options,
134 mem_ctx, &nir);
135 if (result != VK_SUCCESS)
136 return NULL;
137
138 if (INTEL_DEBUG(intel_debug_flag_for_shader_stage(stage))) {
139 fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
140 gl_shader_stage_name(stage));
141 nir_print_shader(nir, stderr);
142 }
143
144 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
145 nir_shader_get_entrypoint(nir), true, false);
146
147 const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
148 .point_coord = true,
149 };
150 NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
151
152 const nir_opt_access_options opt_access_options = {
153 .is_vulkan = true,
154 };
155 NIR_PASS(_, nir, nir_opt_access, &opt_access_options);
156
157 /* Vulkan uses the separate-shader linking model */
158 nir->info.separate_shader = true;
159
160 struct elk_nir_compiler_opts opts = {};
161
162 elk_preprocess_nir(compiler, nir, &opts);
163
164 return nir;
165 }
166
167 VkResult
anv_pipeline_init(struct anv_pipeline * pipeline,struct anv_device * device,enum anv_pipeline_type type,VkPipelineCreateFlags flags,const VkAllocationCallbacks * pAllocator)168 anv_pipeline_init(struct anv_pipeline *pipeline,
169 struct anv_device *device,
170 enum anv_pipeline_type type,
171 VkPipelineCreateFlags flags,
172 const VkAllocationCallbacks *pAllocator)
173 {
174 VkResult result;
175
176 memset(pipeline, 0, sizeof(*pipeline));
177
178 vk_object_base_init(&device->vk, &pipeline->base,
179 VK_OBJECT_TYPE_PIPELINE);
180 pipeline->device = device;
181
182 /* It's the job of the child class to provide actual backing storage for
183 * the batch by setting batch.start, batch.next, and batch.end.
184 */
185 pipeline->batch.alloc = pAllocator ? pAllocator : &device->vk.alloc;
186 pipeline->batch.relocs = &pipeline->batch_relocs;
187 pipeline->batch.status = VK_SUCCESS;
188
189 result = anv_reloc_list_init(&pipeline->batch_relocs,
190 pipeline->batch.alloc);
191 if (result != VK_SUCCESS)
192 return result;
193
194 pipeline->mem_ctx = ralloc_context(NULL);
195
196 pipeline->type = type;
197 pipeline->flags = flags;
198
199 util_dynarray_init(&pipeline->executables, pipeline->mem_ctx);
200
201 return VK_SUCCESS;
202 }
203
204 void
anv_pipeline_finish(struct anv_pipeline * pipeline,struct anv_device * device,const VkAllocationCallbacks * pAllocator)205 anv_pipeline_finish(struct anv_pipeline *pipeline,
206 struct anv_device *device,
207 const VkAllocationCallbacks *pAllocator)
208 {
209 anv_reloc_list_finish(&pipeline->batch_relocs,
210 pAllocator ? pAllocator : &device->vk.alloc);
211 ralloc_free(pipeline->mem_ctx);
212 vk_object_base_finish(&pipeline->base);
213 }
214
anv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)215 void anv_DestroyPipeline(
216 VkDevice _device,
217 VkPipeline _pipeline,
218 const VkAllocationCallbacks* pAllocator)
219 {
220 ANV_FROM_HANDLE(anv_device, device, _device);
221 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
222
223 if (!pipeline)
224 return;
225
226 switch (pipeline->type) {
227 case ANV_PIPELINE_GRAPHICS: {
228 struct anv_graphics_pipeline *gfx_pipeline =
229 anv_pipeline_to_graphics(pipeline);
230
231 for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) {
232 if (gfx_pipeline->shaders[s])
233 anv_shader_bin_unref(device, gfx_pipeline->shaders[s]);
234 }
235 break;
236 }
237
238 case ANV_PIPELINE_COMPUTE: {
239 struct anv_compute_pipeline *compute_pipeline =
240 anv_pipeline_to_compute(pipeline);
241
242 if (compute_pipeline->cs)
243 anv_shader_bin_unref(device, compute_pipeline->cs);
244
245 break;
246 }
247
248 default:
249 unreachable("invalid pipeline type");
250 }
251
252 anv_pipeline_finish(pipeline, device, pAllocator);
253 vk_free2(&device->vk.alloc, pAllocator, pipeline);
254 }
255
256 static void
populate_sampler_prog_key(const struct intel_device_info * devinfo,struct elk_sampler_prog_key_data * key)257 populate_sampler_prog_key(const struct intel_device_info *devinfo,
258 struct elk_sampler_prog_key_data *key)
259 {
260 /* XXX: Handle texture swizzle Pre-HSW */
261 }
262
263 static void
populate_base_prog_key(const struct anv_device * device,enum elk_robustness_flags robust_flags,struct elk_base_prog_key * key)264 populate_base_prog_key(const struct anv_device *device,
265 enum elk_robustness_flags robust_flags,
266 struct elk_base_prog_key *key)
267 {
268 key->robust_flags = robust_flags;
269 key->limit_trig_input_range =
270 device->physical->instance->limit_trig_input_range;
271
272 populate_sampler_prog_key(device->info, &key->tex);
273 }
274
275 static void
populate_vs_prog_key(const struct anv_device * device,enum elk_robustness_flags robust_flags,struct elk_vs_prog_key * key)276 populate_vs_prog_key(const struct anv_device *device,
277 enum elk_robustness_flags robust_flags,
278 struct elk_vs_prog_key *key)
279 {
280 memset(key, 0, sizeof(*key));
281
282 populate_base_prog_key(device, robust_flags, &key->base);
283
284 /* XXX: Handle vertex input work-arounds */
285
286 /* XXX: Handle sampler_prog_key */
287 }
288
289 static void
populate_tcs_prog_key(const struct anv_device * device,enum elk_robustness_flags robust_flags,unsigned input_vertices,struct elk_tcs_prog_key * key)290 populate_tcs_prog_key(const struct anv_device *device,
291 enum elk_robustness_flags robust_flags,
292 unsigned input_vertices,
293 struct elk_tcs_prog_key *key)
294 {
295 memset(key, 0, sizeof(*key));
296
297 populate_base_prog_key(device, robust_flags, &key->base);
298
299 key->input_vertices = input_vertices;
300 }
301
302 static void
populate_tes_prog_key(const struct anv_device * device,enum elk_robustness_flags robust_flags,struct elk_tes_prog_key * key)303 populate_tes_prog_key(const struct anv_device *device,
304 enum elk_robustness_flags robust_flags,
305 struct elk_tes_prog_key *key)
306 {
307 memset(key, 0, sizeof(*key));
308
309 populate_base_prog_key(device, robust_flags, &key->base);
310 }
311
312 static void
populate_gs_prog_key(const struct anv_device * device,bool robust_flags,struct elk_gs_prog_key * key)313 populate_gs_prog_key(const struct anv_device *device,
314 bool robust_flags,
315 struct elk_gs_prog_key *key)
316 {
317 memset(key, 0, sizeof(*key));
318
319 populate_base_prog_key(device, robust_flags, &key->base);
320 }
321
322 static void
populate_wm_prog_key(const struct anv_graphics_pipeline * pipeline,enum elk_robustness_flags robust_flags,const BITSET_WORD * dynamic,const struct vk_multisample_state * ms,const struct vk_render_pass_state * rp,struct elk_wm_prog_key * key)323 populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
324 enum elk_robustness_flags robust_flags,
325 const BITSET_WORD *dynamic,
326 const struct vk_multisample_state *ms,
327 const struct vk_render_pass_state *rp,
328 struct elk_wm_prog_key *key)
329 {
330 const struct anv_device *device = pipeline->base.device;
331
332 memset(key, 0, sizeof(*key));
333
334 populate_base_prog_key(device, robust_flags, &key->base);
335
336 /* We set this to 0 here and set to the actual value before we call
337 * elk_compile_fs.
338 */
339 key->input_slots_valid = 0;
340
341 /* XXX Vulkan doesn't appear to specify */
342 key->clamp_fragment_color = false;
343
344 key->ignore_sample_mask_out = false;
345
346 assert(rp->color_attachment_count <= MAX_RTS);
347 /* Consider all inputs as valid until look at the NIR variables. */
348 key->color_outputs_valid = (1u << rp->color_attachment_count) - 1;
349 key->nr_color_regions = rp->color_attachment_count;
350
351 /* To reduce possible shader recompilations we would need to know if
352 * there is a SampleMask output variable to compute if we should emit
353 * code to workaround the issue that hardware disables alpha to coverage
354 * when there is SampleMask output.
355 */
356 key->alpha_to_coverage = ms != NULL && ms->alpha_to_coverage_enable ?
357 ELK_ALWAYS : ELK_NEVER;
358
359 /* Vulkan doesn't support fixed-function alpha test */
360 key->alpha_test_replicate_alpha = false;
361
362 if (ms != NULL) {
363 /* We should probably pull this out of the shader, but it's fairly
364 * harmless to compute it and then let dead-code take care of it.
365 */
366 if (ms->rasterization_samples > 1) {
367 key->persample_interp =
368 (ms->sample_shading_enable &&
369 (ms->min_sample_shading * ms->rasterization_samples) > 1) ?
370 ELK_ALWAYS : ELK_NEVER;
371 key->multisample_fbo = ELK_ALWAYS;
372 }
373
374 if (device->physical->instance->sample_mask_out_opengl_behaviour)
375 key->ignore_sample_mask_out = !key->multisample_fbo;
376 }
377 }
378
379 static void
populate_cs_prog_key(const struct anv_device * device,enum elk_robustness_flags robust_flags,struct elk_cs_prog_key * key)380 populate_cs_prog_key(const struct anv_device *device,
381 enum elk_robustness_flags robust_flags,
382 struct elk_cs_prog_key *key)
383 {
384 memset(key, 0, sizeof(*key));
385
386 populate_base_prog_key(device, robust_flags, &key->base);
387 }
388
389 struct anv_pipeline_stage {
390 gl_shader_stage stage;
391
392 const VkPipelineShaderStageCreateInfo *info;
393
394 unsigned char shader_sha1[20];
395
396 union elk_any_prog_key key;
397
398 struct {
399 gl_shader_stage stage;
400 unsigned char sha1[20];
401 } cache_key;
402
403 nir_shader *nir;
404
405 struct anv_pipeline_binding surface_to_descriptor[256];
406 struct anv_pipeline_binding sampler_to_descriptor[256];
407 struct anv_pipeline_bind_map bind_map;
408
409 union elk_any_prog_data prog_data;
410
411 uint32_t num_stats;
412 struct elk_compile_stats stats[3];
413 char *disasm[3];
414
415 VkPipelineCreationFeedback feedback;
416
417 const unsigned *code;
418
419 struct anv_shader_bin *bin;
420 };
421
422 static void
anv_pipeline_hash_graphics(struct anv_graphics_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * stages,unsigned char * sha1_out)423 anv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline,
424 struct anv_pipeline_layout *layout,
425 struct anv_pipeline_stage *stages,
426 unsigned char *sha1_out)
427 {
428 struct mesa_sha1 ctx;
429 _mesa_sha1_init(&ctx);
430
431 _mesa_sha1_update(&ctx, &pipeline->view_mask,
432 sizeof(pipeline->view_mask));
433
434 if (layout)
435 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
436
437 for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
438 if (stages[s].info) {
439 _mesa_sha1_update(&ctx, stages[s].shader_sha1,
440 sizeof(stages[s].shader_sha1));
441 _mesa_sha1_update(&ctx, &stages[s].key, elk_prog_key_size(s));
442 }
443 }
444
445 _mesa_sha1_final(&ctx, sha1_out);
446 }
447
448 static void
anv_pipeline_hash_compute(struct anv_compute_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * stage,unsigned char * sha1_out)449 anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
450 struct anv_pipeline_layout *layout,
451 struct anv_pipeline_stage *stage,
452 unsigned char *sha1_out)
453 {
454 struct mesa_sha1 ctx;
455 _mesa_sha1_init(&ctx);
456
457 if (layout)
458 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
459
460 const struct anv_device *device = pipeline->base.device;
461
462 const bool rba = device->vk.enabled_features.robustBufferAccess;
463 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
464
465 const uint8_t afs = device->physical->instance->assume_full_subgroups;
466 _mesa_sha1_update(&ctx, &afs, sizeof(afs));
467
468 _mesa_sha1_update(&ctx, stage->shader_sha1,
469 sizeof(stage->shader_sha1));
470 _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
471
472 _mesa_sha1_final(&ctx, sha1_out);
473 }
474
475 static nir_shader *
anv_pipeline_stage_get_nir(struct anv_pipeline * pipeline,struct vk_pipeline_cache * cache,void * mem_ctx,struct anv_pipeline_stage * stage)476 anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
477 struct vk_pipeline_cache *cache,
478 void *mem_ctx,
479 struct anv_pipeline_stage *stage)
480 {
481 const struct elk_compiler *compiler =
482 pipeline->device->physical->compiler;
483 const nir_shader_compiler_options *nir_options =
484 compiler->nir_options[stage->stage];
485 nir_shader *nir;
486
487 nir = anv_device_search_for_nir(pipeline->device, cache,
488 nir_options,
489 stage->shader_sha1,
490 mem_ctx);
491 if (nir) {
492 assert(nir->info.stage == stage->stage);
493 return nir;
494 }
495
496 nir = anv_shader_stage_to_nir(pipeline->device, stage->info,
497 stage->key.base.robust_flags, mem_ctx);
498 if (nir) {
499 anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);
500 return nir;
501 }
502
503 return NULL;
504 }
505
506 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)507 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
508 {
509 assert(glsl_type_is_vector_or_scalar(type));
510
511 uint32_t comp_size = glsl_type_is_boolean(type)
512 ? 4 : glsl_get_bit_size(type) / 8;
513 unsigned length = glsl_get_vector_elements(type);
514 *size = comp_size * length,
515 *align = comp_size * (length == 3 ? 4 : length);
516 }
517
518 static void
anv_pipeline_lower_nir(struct anv_pipeline * pipeline,void * mem_ctx,struct anv_pipeline_stage * stage,struct anv_pipeline_layout * layout)519 anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
520 void *mem_ctx,
521 struct anv_pipeline_stage *stage,
522 struct anv_pipeline_layout *layout)
523 {
524 const struct anv_physical_device *pdevice = pipeline->device->physical;
525 const struct elk_compiler *compiler = pdevice->compiler;
526
527 struct elk_stage_prog_data *prog_data = &stage->prog_data.base;
528 nir_shader *nir = stage->nir;
529
530 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
531 NIR_PASS(_, nir, nir_lower_wpos_center);
532 NIR_PASS(_, nir, nir_lower_input_attachments,
533 &(nir_input_attachment_options) {
534 .use_fragcoord_sysval = true,
535 .use_layer_id_sysval = true,
536 });
537 }
538
539 NIR_PASS(_, nir, anv_nir_lower_ycbcr_textures, layout);
540
541 if (pipeline->type == ANV_PIPELINE_GRAPHICS) {
542 struct anv_graphics_pipeline *gfx_pipeline =
543 anv_pipeline_to_graphics(pipeline);
544 NIR_PASS(_, nir, anv_nir_lower_multiview, gfx_pipeline->view_mask);
545 }
546
547 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
548
549 NIR_PASS(_, nir, elk_nir_lower_storage_image,
550 &(struct elk_nir_lower_storage_image_opts) {
551 .devinfo = compiler->devinfo,
552 .lower_loads = true,
553 .lower_stores = true,
554 .lower_atomics = true,
555 .lower_get_size = true,
556 });
557
558 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
559 nir_address_format_64bit_global);
560 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const,
561 nir_address_format_32bit_offset);
562
563 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
564 NIR_PASS_V(nir, anv_nir_apply_pipeline_layout,
565 pdevice, stage->key.base.robust_flags,
566 layout, &stage->bind_map);
567
568 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
569 anv_nir_ubo_addr_format(pdevice, stage->key.base.robust_flags));
570 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo,
571 anv_nir_ssbo_addr_format(pdevice, stage->key.base.robust_flags));
572
573 /* First run copy-prop to get rid of all of the vec() that address
574 * calculations often create and then constant-fold so that, when we
575 * get to anv_nir_lower_ubo_loads, we can detect constant offsets.
576 */
577 NIR_PASS(_, nir, nir_copy_prop);
578 NIR_PASS(_, nir, nir_opt_constant_folding);
579
580 NIR_PASS(_, nir, anv_nir_lower_ubo_loads);
581
582 enum nir_lower_non_uniform_access_type lower_non_uniform_access_types =
583 nir_lower_non_uniform_texture_access | nir_lower_non_uniform_image_access;
584
585 /* In practice, most shaders do not have non-uniform-qualified
586 * accesses (see
587 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17558#note_1475069)
588 * thus a cheaper and likely to fail check is run first.
589 */
590 if (nir_has_non_uniform_access(nir, lower_non_uniform_access_types)) {
591 NIR_PASS(_, nir, nir_opt_non_uniform_access);
592
593 /* We don't support non-uniform UBOs and non-uniform SSBO access is
594 * handled naturally by falling back to A64 messages.
595 */
596 NIR_PASS(_, nir, nir_lower_non_uniform_access,
597 &(nir_lower_non_uniform_access_options) {
598 .types = lower_non_uniform_access_types,
599 .callback = NULL,
600 });
601 }
602
603 NIR_PASS_V(nir, anv_nir_compute_push_layout,
604 pdevice, stage->key.base.robust_flags,
605 prog_data, &stage->bind_map, mem_ctx);
606
607 if (gl_shader_stage_uses_workgroup(nir->info.stage)) {
608 if (!nir->info.shared_memory_explicit_layout) {
609 NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
610 nir_var_mem_shared, shared_type_info);
611 }
612
613 NIR_PASS(_, nir, nir_lower_explicit_io,
614 nir_var_mem_shared, nir_address_format_32bit_offset);
615
616 if (nir->info.zero_initialize_shared_memory &&
617 nir->info.shared_size > 0) {
618 /* The effective Shared Local Memory size is at least 1024 bytes and
619 * is always rounded to a power of two, so it is OK to align the size
620 * used by the shader to chunk_size -- which does simplify the logic.
621 */
622 const unsigned chunk_size = 16;
623 const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
624 assert(shared_size <=
625 elk_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size));
626
627 NIR_PASS(_, nir, nir_zero_initialize_shared_memory,
628 shared_size, chunk_size);
629 }
630 }
631
632 if (gl_shader_stage_is_compute(nir->info.stage)) {
633 NIR_PASS(_, nir, elk_nir_lower_cs_intrinsics, compiler->devinfo,
634 &stage->prog_data.cs);
635 }
636
637 stage->nir = nir;
638 }
639
640 static void
anv_pipeline_link_vs(const struct elk_compiler * compiler,struct anv_pipeline_stage * vs_stage,struct anv_pipeline_stage * next_stage)641 anv_pipeline_link_vs(const struct elk_compiler *compiler,
642 struct anv_pipeline_stage *vs_stage,
643 struct anv_pipeline_stage *next_stage)
644 {
645 if (next_stage)
646 elk_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir);
647 }
648
649 static void
anv_pipeline_compile_vs(const struct elk_compiler * compiler,void * mem_ctx,struct anv_graphics_pipeline * pipeline,struct anv_pipeline_stage * vs_stage)650 anv_pipeline_compile_vs(const struct elk_compiler *compiler,
651 void *mem_ctx,
652 struct anv_graphics_pipeline *pipeline,
653 struct anv_pipeline_stage *vs_stage)
654 {
655 /* When using Primitive Replication for multiview, each view gets its own
656 * position slot.
657 */
658 uint32_t pos_slots =
659 (vs_stage->nir->info.per_view_outputs & VARYING_BIT_POS) ?
660 MAX2(1, util_bitcount(pipeline->view_mask)) : 1;
661
662 /* Only position is allowed to be per-view */
663 assert(!(vs_stage->nir->info.per_view_outputs & ~VARYING_BIT_POS));
664
665 elk_compute_vue_map(compiler->devinfo,
666 &vs_stage->prog_data.vs.base.vue_map,
667 vs_stage->nir->info.outputs_written,
668 vs_stage->nir->info.separate_shader,
669 pos_slots);
670
671 vs_stage->num_stats = 1;
672
673 struct elk_compile_vs_params params = {
674 .base = {
675 .nir = vs_stage->nir,
676 .stats = vs_stage->stats,
677 .log_data = pipeline->base.device,
678 .mem_ctx = mem_ctx,
679 },
680 .key = &vs_stage->key.vs,
681 .prog_data = &vs_stage->prog_data.vs,
682 };
683
684 vs_stage->code = elk_compile_vs(compiler, ¶ms);
685 }
686
687 static void
merge_tess_info(struct shader_info * tes_info,const struct shader_info * tcs_info)688 merge_tess_info(struct shader_info *tes_info,
689 const struct shader_info *tcs_info)
690 {
691 /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
692 *
693 * "PointMode. Controls generation of points rather than triangles
694 * or lines. This functionality defaults to disabled, and is
695 * enabled if either shader stage includes the execution mode.
696 *
697 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
698 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
699 * and OutputVertices, it says:
700 *
701 * "One mode must be set in at least one of the tessellation
702 * shader stages."
703 *
704 * So, the fields can be set in either the TCS or TES, but they must
705 * agree if set in both. Our backend looks at TES, so bitwise-or in
706 * the values from the TCS.
707 */
708 assert(tcs_info->tess.tcs_vertices_out == 0 ||
709 tes_info->tess.tcs_vertices_out == 0 ||
710 tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
711 tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
712
713 assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
714 tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
715 tcs_info->tess.spacing == tes_info->tess.spacing);
716 tes_info->tess.spacing |= tcs_info->tess.spacing;
717
718 assert(tcs_info->tess._primitive_mode == 0 ||
719 tes_info->tess._primitive_mode == 0 ||
720 tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
721 tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
722 tes_info->tess.ccw |= tcs_info->tess.ccw;
723 tes_info->tess.point_mode |= tcs_info->tess.point_mode;
724 }
725
726 static void
anv_pipeline_link_tcs(const struct elk_compiler * compiler,struct anv_pipeline_stage * tcs_stage,struct anv_pipeline_stage * tes_stage)727 anv_pipeline_link_tcs(const struct elk_compiler *compiler,
728 struct anv_pipeline_stage *tcs_stage,
729 struct anv_pipeline_stage *tes_stage)
730 {
731 assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
732
733 elk_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
734
735 nir_lower_patch_vertices(tes_stage->nir,
736 tcs_stage->nir->info.tess.tcs_vertices_out,
737 NULL);
738
739 /* Copy TCS info into the TES info */
740 merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
741
742 /* Whacking the key after cache lookup is a bit sketchy, but all of
743 * this comes from the SPIR-V, which is part of the hash used for the
744 * pipeline cache. So it should be safe.
745 */
746 tcs_stage->key.tcs._tes_primitive_mode =
747 tes_stage->nir->info.tess._primitive_mode;
748 tcs_stage->key.tcs.quads_workaround =
749 compiler->devinfo->ver < 9 &&
750 tes_stage->nir->info.tess._primitive_mode == TESS_PRIMITIVE_QUADS &&
751 tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;
752 }
753
754 static void
anv_pipeline_compile_tcs(const struct elk_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * tcs_stage,struct anv_pipeline_stage * prev_stage)755 anv_pipeline_compile_tcs(const struct elk_compiler *compiler,
756 void *mem_ctx,
757 struct anv_device *device,
758 struct anv_pipeline_stage *tcs_stage,
759 struct anv_pipeline_stage *prev_stage)
760 {
761 tcs_stage->key.tcs.outputs_written =
762 tcs_stage->nir->info.outputs_written;
763 tcs_stage->key.tcs.patch_outputs_written =
764 tcs_stage->nir->info.patch_outputs_written;
765
766 tcs_stage->num_stats = 1;
767
768 struct elk_compile_tcs_params params = {
769 .base = {
770 .nir = tcs_stage->nir,
771 .stats = tcs_stage->stats,
772 .log_data = device,
773 .mem_ctx = mem_ctx,
774 },
775 .key = &tcs_stage->key.tcs,
776 .prog_data = &tcs_stage->prog_data.tcs,
777 };
778
779 tcs_stage->code = elk_compile_tcs(compiler, ¶ms);
780 }
781
782 static void
anv_pipeline_link_tes(const struct elk_compiler * compiler,struct anv_pipeline_stage * tes_stage,struct anv_pipeline_stage * next_stage)783 anv_pipeline_link_tes(const struct elk_compiler *compiler,
784 struct anv_pipeline_stage *tes_stage,
785 struct anv_pipeline_stage *next_stage)
786 {
787 if (next_stage)
788 elk_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir);
789 }
790
791 static void
anv_pipeline_compile_tes(const struct elk_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * tes_stage,struct anv_pipeline_stage * tcs_stage)792 anv_pipeline_compile_tes(const struct elk_compiler *compiler,
793 void *mem_ctx,
794 struct anv_device *device,
795 struct anv_pipeline_stage *tes_stage,
796 struct anv_pipeline_stage *tcs_stage)
797 {
798 tes_stage->key.tes.inputs_read =
799 tcs_stage->nir->info.outputs_written;
800 tes_stage->key.tes.patch_inputs_read =
801 tcs_stage->nir->info.patch_outputs_written;
802
803 tes_stage->num_stats = 1;
804
805 struct elk_compile_tes_params params = {
806 .base = {
807 .nir = tes_stage->nir,
808 .stats = tes_stage->stats,
809 .log_data = device,
810 .mem_ctx = mem_ctx,
811 },
812 .key = &tes_stage->key.tes,
813 .prog_data = &tes_stage->prog_data.tes,
814 .input_vue_map = &tcs_stage->prog_data.tcs.base.vue_map,
815 };
816
817 tes_stage->code = elk_compile_tes(compiler, ¶ms);
818 }
819
820 static void
anv_pipeline_link_gs(const struct elk_compiler * compiler,struct anv_pipeline_stage * gs_stage,struct anv_pipeline_stage * next_stage)821 anv_pipeline_link_gs(const struct elk_compiler *compiler,
822 struct anv_pipeline_stage *gs_stage,
823 struct anv_pipeline_stage *next_stage)
824 {
825 if (next_stage)
826 elk_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir);
827 }
828
829 static void
anv_pipeline_compile_gs(const struct elk_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * gs_stage,struct anv_pipeline_stage * prev_stage)830 anv_pipeline_compile_gs(const struct elk_compiler *compiler,
831 void *mem_ctx,
832 struct anv_device *device,
833 struct anv_pipeline_stage *gs_stage,
834 struct anv_pipeline_stage *prev_stage)
835 {
836 elk_compute_vue_map(compiler->devinfo,
837 &gs_stage->prog_data.gs.base.vue_map,
838 gs_stage->nir->info.outputs_written,
839 gs_stage->nir->info.separate_shader, 1);
840
841 gs_stage->num_stats = 1;
842
843 struct elk_compile_gs_params params = {
844 .base = {
845 .nir = gs_stage->nir,
846 .stats = gs_stage->stats,
847 .log_data = device,
848 .mem_ctx = mem_ctx,
849 },
850 .key = &gs_stage->key.gs,
851 .prog_data = &gs_stage->prog_data.gs,
852 };
853
854 gs_stage->code = elk_compile_gs(compiler, ¶ms);
855 }
856
857 static void
anv_pipeline_link_fs(const struct elk_compiler * compiler,struct anv_pipeline_stage * stage,const struct vk_render_pass_state * rp)858 anv_pipeline_link_fs(const struct elk_compiler *compiler,
859 struct anv_pipeline_stage *stage,
860 const struct vk_render_pass_state *rp)
861 {
862 /* Initially the valid outputs value is set to all possible render targets
863 * valid (see populate_wm_prog_key()), before we look at the shader
864 * variables. Here we look at the output variables of the shader an compute
865 * a correct number of render target outputs.
866 */
867 stage->key.wm.color_outputs_valid = 0;
868 nir_foreach_shader_out_variable_safe(var, stage->nir) {
869 if (var->data.location < FRAG_RESULT_DATA0)
870 continue;
871
872 const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
873 const unsigned array_len =
874 glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
875 assert(rt + array_len <= MAX_RTS);
876
877 stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len);
878 }
879 stage->key.wm.color_outputs_valid &=
880 (1u << rp->color_attachment_count) - 1;
881 stage->key.wm.nr_color_regions =
882 util_last_bit(stage->key.wm.color_outputs_valid);
883
884 unsigned num_rt_bindings;
885 struct anv_pipeline_binding rt_bindings[MAX_RTS];
886 if (stage->key.wm.nr_color_regions > 0) {
887 assert(stage->key.wm.nr_color_regions <= MAX_RTS);
888 for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) {
889 if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) {
890 rt_bindings[rt] = (struct anv_pipeline_binding) {
891 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
892 .index = rt,
893 };
894 } else {
895 /* Setup a null render target */
896 rt_bindings[rt] = (struct anv_pipeline_binding) {
897 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
898 .index = UINT32_MAX,
899 };
900 }
901 }
902 num_rt_bindings = stage->key.wm.nr_color_regions;
903 } else {
904 /* Setup a null render target */
905 rt_bindings[0] = (struct anv_pipeline_binding) {
906 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
907 .index = UINT32_MAX,
908 };
909 num_rt_bindings = 1;
910 }
911
912 assert(num_rt_bindings <= MAX_RTS);
913 assert(stage->bind_map.surface_count == 0);
914 typed_memcpy(stage->bind_map.surface_to_descriptor,
915 rt_bindings, num_rt_bindings);
916 stage->bind_map.surface_count += num_rt_bindings;
917 }
918
919 static void
anv_pipeline_compile_fs(const struct elk_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * fs_stage,struct anv_pipeline_stage * prev_stage)920 anv_pipeline_compile_fs(const struct elk_compiler *compiler,
921 void *mem_ctx,
922 struct anv_device *device,
923 struct anv_pipeline_stage *fs_stage,
924 struct anv_pipeline_stage *prev_stage)
925 {
926 /* TODO: we could set this to 0 based on the information in nir_shader, but
927 * we need this before we call spirv_to_nir.
928 */
929 assert(prev_stage);
930
931 struct elk_compile_fs_params params = {
932 .base = {
933 .nir = fs_stage->nir,
934 .stats = fs_stage->stats,
935 .log_data = device,
936 .mem_ctx = mem_ctx,
937 },
938 .key = &fs_stage->key.wm,
939 .prog_data = &fs_stage->prog_data.wm,
940
941 .allow_spilling = true,
942 };
943
944 fs_stage->key.wm.input_slots_valid =
945 prev_stage->prog_data.vue.vue_map.slots_valid;
946
947 fs_stage->code = elk_compile_fs(compiler, ¶ms);
948
949 fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 +
950 (uint32_t)fs_stage->prog_data.wm.dispatch_16 +
951 (uint32_t)fs_stage->prog_data.wm.dispatch_32;
952 }
953
954 static void
anv_pipeline_add_executable(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage,struct elk_compile_stats * stats,uint32_t code_offset)955 anv_pipeline_add_executable(struct anv_pipeline *pipeline,
956 struct anv_pipeline_stage *stage,
957 struct elk_compile_stats *stats,
958 uint32_t code_offset)
959 {
960 char *nir = NULL;
961 if (stage->nir &&
962 (pipeline->flags &
963 VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
964 nir = nir_shader_as_str(stage->nir, pipeline->mem_ctx);
965 }
966
967 char *disasm = NULL;
968 if (stage->code &&
969 (pipeline->flags &
970 VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
971 char *stream_data = NULL;
972 size_t stream_size = 0;
973 FILE *stream = open_memstream(&stream_data, &stream_size);
974
975 uint32_t push_size = 0;
976 for (unsigned i = 0; i < 4; i++)
977 push_size += stage->bind_map.push_ranges[i].length;
978 if (push_size > 0) {
979 fprintf(stream, "Push constant ranges:\n");
980 for (unsigned i = 0; i < 4; i++) {
981 if (stage->bind_map.push_ranges[i].length == 0)
982 continue;
983
984 fprintf(stream, " RANGE%d (%dB): ", i,
985 stage->bind_map.push_ranges[i].length * 32);
986
987 switch (stage->bind_map.push_ranges[i].set) {
988 case ANV_DESCRIPTOR_SET_NULL:
989 fprintf(stream, "NULL");
990 break;
991
992 case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
993 fprintf(stream, "Vulkan push constants and API params");
994 break;
995
996 case ANV_DESCRIPTOR_SET_DESCRIPTORS:
997 fprintf(stream, "Descriptor buffer for set %d (start=%dB)",
998 stage->bind_map.push_ranges[i].index,
999 stage->bind_map.push_ranges[i].start * 32);
1000 break;
1001
1002 case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS:
1003 unreachable("gl_NumWorkgroups is never pushed");
1004
1005 case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS:
1006 fprintf(stream, "Inline shader constant data (start=%dB)",
1007 stage->bind_map.push_ranges[i].start * 32);
1008 break;
1009
1010 case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
1011 unreachable("Color attachments can't be pushed");
1012
1013 default:
1014 fprintf(stream, "UBO (set=%d binding=%d start=%dB)",
1015 stage->bind_map.push_ranges[i].set,
1016 stage->bind_map.push_ranges[i].index,
1017 stage->bind_map.push_ranges[i].start * 32);
1018 break;
1019 }
1020 fprintf(stream, "\n");
1021 }
1022 fprintf(stream, "\n");
1023 }
1024
1025 /* Creating this is far cheaper than it looks. It's perfectly fine to
1026 * do it for every binary.
1027 */
1028 elk_disassemble_with_errors(&pipeline->device->physical->compiler->isa,
1029 stage->code, code_offset, stream);
1030
1031 fclose(stream);
1032
1033 /* Copy it to a ralloc'd thing */
1034 disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1);
1035 memcpy(disasm, stream_data, stream_size);
1036 disasm[stream_size] = 0;
1037
1038 free(stream_data);
1039 }
1040
1041 const struct anv_pipeline_executable exe = {
1042 .stage = stage->stage,
1043 .stats = *stats,
1044 .nir = nir,
1045 .disasm = disasm,
1046 };
1047 util_dynarray_append(&pipeline->executables,
1048 struct anv_pipeline_executable, exe);
1049 }
1050
1051 static void
anv_pipeline_add_executables(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage,struct anv_shader_bin * bin)1052 anv_pipeline_add_executables(struct anv_pipeline *pipeline,
1053 struct anv_pipeline_stage *stage,
1054 struct anv_shader_bin *bin)
1055 {
1056 if (stage->stage == MESA_SHADER_FRAGMENT) {
1057 /* We pull the prog data and stats out of the anv_shader_bin because
1058 * the anv_pipeline_stage may not be fully populated if we successfully
1059 * looked up the shader in a cache.
1060 */
1061 const struct elk_wm_prog_data *wm_prog_data =
1062 (const struct elk_wm_prog_data *)bin->prog_data;
1063 struct elk_compile_stats *stats = bin->stats;
1064
1065 if (wm_prog_data->dispatch_8) {
1066 anv_pipeline_add_executable(pipeline, stage, stats++, 0);
1067 }
1068
1069 if (wm_prog_data->dispatch_16) {
1070 anv_pipeline_add_executable(pipeline, stage, stats++,
1071 wm_prog_data->prog_offset_16);
1072 }
1073
1074 if (wm_prog_data->dispatch_32) {
1075 anv_pipeline_add_executable(pipeline, stage, stats++,
1076 wm_prog_data->prog_offset_32);
1077 }
1078 } else {
1079 anv_pipeline_add_executable(pipeline, stage, bin->stats, 0);
1080 }
1081 }
1082
1083 static enum elk_robustness_flags
anv_device_get_robust_flags(const struct anv_device * device)1084 anv_device_get_robust_flags(const struct anv_device *device)
1085 {
1086 return device->robust_buffer_access ?
1087 (ELK_ROBUSTNESS_UBO | ELK_ROBUSTNESS_SSBO) : 0;
1088 }
1089
1090 static void
anv_graphics_pipeline_init_keys(struct anv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state,struct anv_pipeline_stage * stages)1091 anv_graphics_pipeline_init_keys(struct anv_graphics_pipeline *pipeline,
1092 const struct vk_graphics_pipeline_state *state,
1093 struct anv_pipeline_stage *stages)
1094 {
1095 for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
1096 if (!stages[s].info)
1097 continue;
1098
1099 int64_t stage_start = os_time_get_nano();
1100
1101 vk_pipeline_hash_shader_stage(stages[s].info, NULL, stages[s].shader_sha1);
1102
1103 const struct anv_device *device = pipeline->base.device;
1104 enum elk_robustness_flags robust_flags = anv_device_get_robust_flags(device);
1105 switch (stages[s].stage) {
1106 case MESA_SHADER_VERTEX:
1107 populate_vs_prog_key(device,
1108 robust_flags,
1109 &stages[s].key.vs);
1110 break;
1111 case MESA_SHADER_TESS_CTRL:
1112 populate_tcs_prog_key(device,
1113 robust_flags,
1114 state->ts->patch_control_points,
1115 &stages[s].key.tcs);
1116 break;
1117 case MESA_SHADER_TESS_EVAL:
1118 populate_tes_prog_key(device,
1119 robust_flags,
1120 &stages[s].key.tes);
1121 break;
1122 case MESA_SHADER_GEOMETRY:
1123 populate_gs_prog_key(device,
1124 robust_flags,
1125 &stages[s].key.gs);
1126 break;
1127 case MESA_SHADER_FRAGMENT: {
1128 populate_wm_prog_key(pipeline,
1129 robust_flags,
1130 state->dynamic, state->ms, state->rp,
1131 &stages[s].key.wm);
1132 break;
1133 }
1134 default:
1135 unreachable("Invalid graphics shader stage");
1136 }
1137
1138 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1139 stages[s].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
1140 }
1141
1142 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1143 }
1144
1145 static bool
anv_graphics_pipeline_load_cached_shaders(struct anv_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,struct anv_pipeline_stage * stages,VkPipelineCreationFeedback * pipeline_feedback)1146 anv_graphics_pipeline_load_cached_shaders(struct anv_graphics_pipeline *pipeline,
1147 struct vk_pipeline_cache *cache,
1148 struct anv_pipeline_stage *stages,
1149 VkPipelineCreationFeedback *pipeline_feedback)
1150 {
1151 unsigned found = 0;
1152 unsigned cache_hits = 0;
1153 for (unsigned s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
1154 if (!stages[s].info)
1155 continue;
1156
1157 int64_t stage_start = os_time_get_nano();
1158
1159 bool cache_hit;
1160 struct anv_shader_bin *bin =
1161 anv_device_search_for_kernel(pipeline->base.device, cache,
1162 &stages[s].cache_key,
1163 sizeof(stages[s].cache_key), &cache_hit);
1164 if (bin) {
1165 found++;
1166 pipeline->shaders[s] = bin;
1167 }
1168
1169 if (cache_hit) {
1170 cache_hits++;
1171 stages[s].feedback.flags |=
1172 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1173 }
1174 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1175 }
1176
1177 if (found == __builtin_popcount(pipeline->active_stages)) {
1178 if (cache_hits == found) {
1179 pipeline_feedback->flags |=
1180 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1181 }
1182 /* We found all our shaders in the cache. We're done. */
1183 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1184 if (!stages[s].info)
1185 continue;
1186
1187 anv_pipeline_add_executables(&pipeline->base, &stages[s],
1188 pipeline->shaders[s]);
1189 }
1190 return true;
1191 } else if (found > 0) {
1192 /* We found some but not all of our shaders. This shouldn't happen most
1193 * of the time but it can if we have a partially populated pipeline
1194 * cache.
1195 */
1196 assert(found < __builtin_popcount(pipeline->active_stages));
1197
1198 vk_perf(VK_LOG_OBJS(cache ? &cache->base :
1199 &pipeline->base.device->vk.base),
1200 "Found a partial pipeline in the cache. This is "
1201 "most likely caused by an incomplete pipeline cache "
1202 "import or export");
1203
1204 /* We're going to have to recompile anyway, so just throw away our
1205 * references to the shaders in the cache. We'll get them out of the
1206 * cache again as part of the compilation process.
1207 */
1208 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1209 stages[s].feedback.flags = 0;
1210 if (pipeline->shaders[s]) {
1211 anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1212 pipeline->shaders[s] = NULL;
1213 }
1214 }
1215 }
1216
1217 return false;
1218 }
1219
1220 static const gl_shader_stage graphics_shader_order[] = {
1221 MESA_SHADER_VERTEX,
1222 MESA_SHADER_TESS_CTRL,
1223 MESA_SHADER_TESS_EVAL,
1224 MESA_SHADER_GEOMETRY,
1225
1226 MESA_SHADER_FRAGMENT,
1227 };
1228
1229 static VkResult
anv_graphics_pipeline_load_nir(struct anv_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,struct anv_pipeline_stage * stages,void * pipeline_ctx)1230 anv_graphics_pipeline_load_nir(struct anv_graphics_pipeline *pipeline,
1231 struct vk_pipeline_cache *cache,
1232 struct anv_pipeline_stage *stages,
1233 void *pipeline_ctx)
1234 {
1235 for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1236 gl_shader_stage s = graphics_shader_order[i];
1237 if (!stages[s].info)
1238 continue;
1239
1240 int64_t stage_start = os_time_get_nano();
1241
1242 assert(stages[s].stage == s);
1243 assert(pipeline->shaders[s] == NULL);
1244
1245 stages[s].bind_map = (struct anv_pipeline_bind_map) {
1246 .surface_to_descriptor = stages[s].surface_to_descriptor,
1247 .sampler_to_descriptor = stages[s].sampler_to_descriptor
1248 };
1249
1250 stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
1251 pipeline_ctx,
1252 &stages[s]);
1253 if (stages[s].nir == NULL) {
1254 return vk_error(pipeline, VK_ERROR_UNKNOWN);
1255 }
1256
1257 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1258 }
1259
1260 return VK_SUCCESS;
1261 }
1262
1263 static VkResult
anv_graphics_pipeline_compile(struct anv_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * info,const struct vk_graphics_pipeline_state * state)1264 anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline,
1265 struct vk_pipeline_cache *cache,
1266 const VkGraphicsPipelineCreateInfo *info,
1267 const struct vk_graphics_pipeline_state *state)
1268 {
1269 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1270 VkResult result;
1271
1272 VkPipelineCreationFeedback pipeline_feedback = {
1273 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
1274 };
1275 int64_t pipeline_start = os_time_get_nano();
1276
1277 const struct elk_compiler *compiler = pipeline->base.device->physical->compiler;
1278 struct anv_pipeline_stage stages[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {};
1279 for (uint32_t i = 0; i < info->stageCount; i++) {
1280 gl_shader_stage stage = vk_to_mesa_shader_stage(info->pStages[i].stage);
1281 stages[stage].stage = stage;
1282 stages[stage].info = &info->pStages[i];
1283 }
1284
1285 anv_graphics_pipeline_init_keys(pipeline, state, stages);
1286
1287 unsigned char sha1[20];
1288 anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
1289
1290 for (unsigned s = 0; s < ARRAY_SIZE(stages); s++) {
1291 if (!stages[s].info)
1292 continue;
1293
1294 stages[s].cache_key.stage = s;
1295 memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
1296 }
1297
1298 const bool skip_cache_lookup =
1299 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1300 if (!skip_cache_lookup) {
1301 bool found_all_shaders =
1302 anv_graphics_pipeline_load_cached_shaders(pipeline, cache, stages,
1303 &pipeline_feedback);
1304 if (found_all_shaders)
1305 goto done;
1306 }
1307
1308 if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
1309 return VK_PIPELINE_COMPILE_REQUIRED;
1310
1311 void *pipeline_ctx = ralloc_context(NULL);
1312
1313 result = anv_graphics_pipeline_load_nir(pipeline, cache, stages,
1314 pipeline_ctx);
1315 if (result != VK_SUCCESS)
1316 goto fail;
1317
1318 /* Walk backwards to link */
1319 struct anv_pipeline_stage *next_stage = NULL;
1320 for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; i--) {
1321 gl_shader_stage s = graphics_shader_order[i];
1322 if (!stages[s].info)
1323 continue;
1324
1325 switch (s) {
1326 case MESA_SHADER_VERTEX:
1327 anv_pipeline_link_vs(compiler, &stages[s], next_stage);
1328 break;
1329 case MESA_SHADER_TESS_CTRL:
1330 anv_pipeline_link_tcs(compiler, &stages[s], next_stage);
1331 break;
1332 case MESA_SHADER_TESS_EVAL:
1333 anv_pipeline_link_tes(compiler, &stages[s], next_stage);
1334 break;
1335 case MESA_SHADER_GEOMETRY:
1336 anv_pipeline_link_gs(compiler, &stages[s], next_stage);
1337 break;
1338 case MESA_SHADER_FRAGMENT:
1339 anv_pipeline_link_fs(compiler, &stages[s], state->rp);
1340 break;
1341 default:
1342 unreachable("Invalid graphics shader stage");
1343 }
1344
1345 next_stage = &stages[s];
1346 }
1347
1348 struct anv_pipeline_stage *prev_stage = NULL;
1349 for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1350 gl_shader_stage s = graphics_shader_order[i];
1351 if (!stages[s].info)
1352 continue;
1353
1354 int64_t stage_start = os_time_get_nano();
1355
1356 void *stage_ctx = ralloc_context(NULL);
1357
1358 anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout);
1359
1360 if (prev_stage && compiler->nir_options[s]->unify_interfaces) {
1361 prev_stage->nir->info.outputs_written |= stages[s].nir->info.inputs_read &
1362 ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1363 stages[s].nir->info.inputs_read |= prev_stage->nir->info.outputs_written &
1364 ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1365 prev_stage->nir->info.patch_outputs_written |= stages[s].nir->info.patch_inputs_read;
1366 stages[s].nir->info.patch_inputs_read |= prev_stage->nir->info.patch_outputs_written;
1367 }
1368
1369 ralloc_free(stage_ctx);
1370
1371 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1372
1373 prev_stage = &stages[s];
1374 }
1375
1376 prev_stage = NULL;
1377 for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1378 gl_shader_stage s = graphics_shader_order[i];
1379 if (!stages[s].info)
1380 continue;
1381
1382 int64_t stage_start = os_time_get_nano();
1383
1384 void *stage_ctx = ralloc_context(NULL);
1385
1386 switch (s) {
1387 case MESA_SHADER_VERTEX:
1388 anv_pipeline_compile_vs(compiler, stage_ctx, pipeline,
1389 &stages[s]);
1390 break;
1391 case MESA_SHADER_TESS_CTRL:
1392 anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device,
1393 &stages[s], prev_stage);
1394 break;
1395 case MESA_SHADER_TESS_EVAL:
1396 anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device,
1397 &stages[s], prev_stage);
1398 break;
1399 case MESA_SHADER_GEOMETRY:
1400 anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device,
1401 &stages[s], prev_stage);
1402 break;
1403 case MESA_SHADER_FRAGMENT:
1404 anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device,
1405 &stages[s], prev_stage);
1406 break;
1407 default:
1408 unreachable("Invalid graphics shader stage");
1409 }
1410 if (stages[s].code == NULL) {
1411 ralloc_free(stage_ctx);
1412 result = vk_error(pipeline->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
1413 goto fail;
1414 }
1415
1416 anv_nir_validate_push_layout(&stages[s].prog_data.base,
1417 &stages[s].bind_map);
1418
1419 struct anv_shader_bin *bin =
1420 anv_device_upload_kernel(pipeline->base.device, cache, s,
1421 &stages[s].cache_key,
1422 sizeof(stages[s].cache_key),
1423 stages[s].code,
1424 stages[s].prog_data.base.program_size,
1425 &stages[s].prog_data.base,
1426 elk_prog_data_size(s),
1427 stages[s].stats, stages[s].num_stats,
1428 stages[s].nir->xfb_info,
1429 &stages[s].bind_map);
1430 if (!bin) {
1431 ralloc_free(stage_ctx);
1432 result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1433 goto fail;
1434 }
1435
1436 anv_pipeline_add_executables(&pipeline->base, &stages[s], bin);
1437
1438 pipeline->shaders[s] = bin;
1439 ralloc_free(stage_ctx);
1440
1441 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1442
1443 prev_stage = &stages[s];
1444 }
1445
1446 ralloc_free(pipeline_ctx);
1447
1448 done:
1449
1450 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1451
1452 const VkPipelineCreationFeedbackCreateInfo *create_feedback =
1453 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1454 if (create_feedback) {
1455 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1456
1457 uint32_t stage_count = create_feedback->pipelineStageCreationFeedbackCount;
1458 assert(stage_count == 0 || info->stageCount == stage_count);
1459 for (uint32_t i = 0; i < stage_count; i++) {
1460 gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
1461 create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
1462 }
1463 }
1464
1465 return VK_SUCCESS;
1466
1467 fail:
1468 ralloc_free(pipeline_ctx);
1469
1470 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1471 if (pipeline->shaders[s])
1472 anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1473 }
1474
1475 return result;
1476 }
1477
1478 static VkResult
anv_pipeline_compile_cs(struct anv_compute_pipeline * pipeline,struct vk_pipeline_cache * cache,const VkComputePipelineCreateInfo * info)1479 anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
1480 struct vk_pipeline_cache *cache,
1481 const VkComputePipelineCreateInfo *info)
1482 {
1483 const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
1484 assert(sinfo->stage == VK_SHADER_STAGE_COMPUTE_BIT);
1485
1486 VkPipelineCreationFeedback pipeline_feedback = {
1487 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
1488 };
1489 int64_t pipeline_start = os_time_get_nano();
1490
1491 struct anv_device *device = pipeline->base.device;
1492 const struct elk_compiler *compiler = device->physical->compiler;
1493
1494 struct anv_pipeline_stage stage = {
1495 .stage = MESA_SHADER_COMPUTE,
1496 .info = &info->stage,
1497 .cache_key = {
1498 .stage = MESA_SHADER_COMPUTE,
1499 },
1500 .feedback = {
1501 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
1502 },
1503 };
1504 vk_pipeline_hash_shader_stage(&info->stage, NULL, stage.shader_sha1);
1505
1506 struct anv_shader_bin *bin = NULL;
1507
1508 populate_cs_prog_key(device,
1509 anv_device_get_robust_flags(device),
1510 &stage.key.cs);
1511
1512 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1513
1514 const bool skip_cache_lookup =
1515 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1516
1517 anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
1518
1519 bool cache_hit = false;
1520 if (!skip_cache_lookup) {
1521 bin = anv_device_search_for_kernel(device, cache,
1522 &stage.cache_key,
1523 sizeof(stage.cache_key),
1524 &cache_hit);
1525 }
1526
1527 if (bin == NULL &&
1528 (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT))
1529 return VK_PIPELINE_COMPILE_REQUIRED;
1530
1531 void *mem_ctx = ralloc_context(NULL);
1532 if (bin == NULL) {
1533 int64_t stage_start = os_time_get_nano();
1534
1535 stage.bind_map = (struct anv_pipeline_bind_map) {
1536 .surface_to_descriptor = stage.surface_to_descriptor,
1537 .sampler_to_descriptor = stage.sampler_to_descriptor
1538 };
1539
1540 /* Set up a binding for the gl_NumWorkGroups */
1541 stage.bind_map.surface_count = 1;
1542 stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) {
1543 .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,
1544 };
1545
1546 stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage);
1547 if (stage.nir == NULL) {
1548 ralloc_free(mem_ctx);
1549 return vk_error(pipeline, VK_ERROR_UNKNOWN);
1550 }
1551
1552 anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
1553
1554 unsigned local_size = stage.nir->info.workgroup_size[0] *
1555 stage.nir->info.workgroup_size[1] *
1556 stage.nir->info.workgroup_size[2];
1557
1558 /* Games don't always request full subgroups when they should,
1559 * which can cause bugs, as they may expect bigger size of the
1560 * subgroup than we choose for the execution.
1561 */
1562 if (device->physical->instance->assume_full_subgroups &&
1563 stage.nir->info.uses_wide_subgroup_intrinsics &&
1564 stage.nir->info.subgroup_size == SUBGROUP_SIZE_API_CONSTANT &&
1565 local_size &&
1566 local_size % ELK_SUBGROUP_SIZE == 0)
1567 stage.nir->info.subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
1568
1569 /* If the client requests that we dispatch full subgroups but doesn't
1570 * allow us to pick a subgroup size, we have to smash it to the API
1571 * value of 32. Performance will likely be terrible in this case but
1572 * there's nothing we can do about that. The client should have chosen
1573 * a size.
1574 */
1575 if (stage.nir->info.subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS)
1576 stage.nir->info.subgroup_size =
1577 device->physical->instance->assume_full_subgroups != 0 ?
1578 device->physical->instance->assume_full_subgroups : ELK_SUBGROUP_SIZE;
1579
1580 stage.num_stats = 1;
1581
1582 struct elk_compile_cs_params params = {
1583 .base = {
1584 .nir = stage.nir,
1585 .stats = stage.stats,
1586 .log_data = device,
1587 .mem_ctx = mem_ctx,
1588 },
1589 .key = &stage.key.cs,
1590 .prog_data = &stage.prog_data.cs,
1591 };
1592
1593 stage.code = elk_compile_cs(compiler, ¶ms);
1594 if (stage.code == NULL) {
1595 ralloc_free(mem_ctx);
1596 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1597 }
1598
1599 anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map);
1600
1601 if (!stage.prog_data.cs.uses_num_work_groups) {
1602 assert(stage.bind_map.surface_to_descriptor[0].set ==
1603 ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS);
1604 stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL;
1605 }
1606
1607 const unsigned code_size = stage.prog_data.base.program_size;
1608 bin = anv_device_upload_kernel(device, cache,
1609 MESA_SHADER_COMPUTE,
1610 &stage.cache_key, sizeof(stage.cache_key),
1611 stage.code, code_size,
1612 &stage.prog_data.base,
1613 sizeof(stage.prog_data.cs),
1614 stage.stats, stage.num_stats,
1615 NULL, &stage.bind_map);
1616 if (!bin) {
1617 ralloc_free(mem_ctx);
1618 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1619 }
1620
1621 stage.feedback.duration = os_time_get_nano() - stage_start;
1622 }
1623
1624 anv_pipeline_add_executables(&pipeline->base, &stage, bin);
1625
1626 ralloc_free(mem_ctx);
1627
1628 if (cache_hit) {
1629 stage.feedback.flags |=
1630 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1631 pipeline_feedback.flags |=
1632 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1633 }
1634 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1635
1636 const VkPipelineCreationFeedbackCreateInfo *create_feedback =
1637 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1638 if (create_feedback) {
1639 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1640
1641 if (create_feedback->pipelineStageCreationFeedbackCount) {
1642 assert(create_feedback->pipelineStageCreationFeedbackCount == 1);
1643 create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
1644 }
1645 }
1646
1647 pipeline->cs = bin;
1648
1649 return VK_SUCCESS;
1650 }
1651
1652 static VkResult
anv_compute_pipeline_create(struct anv_device * device,struct vk_pipeline_cache * cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)1653 anv_compute_pipeline_create(struct anv_device *device,
1654 struct vk_pipeline_cache *cache,
1655 const VkComputePipelineCreateInfo *pCreateInfo,
1656 const VkAllocationCallbacks *pAllocator,
1657 VkPipeline *pPipeline)
1658 {
1659 struct anv_compute_pipeline *pipeline;
1660 VkResult result;
1661
1662 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
1663
1664 pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
1665 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1666 if (pipeline == NULL)
1667 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1668
1669 result = anv_pipeline_init(&pipeline->base, device,
1670 ANV_PIPELINE_COMPUTE, pCreateInfo->flags,
1671 pAllocator);
1672 if (result != VK_SUCCESS) {
1673 vk_free2(&device->vk.alloc, pAllocator, pipeline);
1674 return result;
1675 }
1676
1677 anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
1678 pipeline->batch_data, sizeof(pipeline->batch_data));
1679
1680 result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo);
1681 if (result != VK_SUCCESS) {
1682 anv_pipeline_finish(&pipeline->base, device, pAllocator);
1683 vk_free2(&device->vk.alloc, pAllocator, pipeline);
1684 return result;
1685 }
1686
1687 anv_genX(device->info, compute_pipeline_emit)(pipeline);
1688
1689 *pPipeline = anv_pipeline_to_handle(&pipeline->base);
1690
1691 return pipeline->base.batch.status;
1692 }
1693
anv_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1694 VkResult anv_CreateComputePipelines(
1695 VkDevice _device,
1696 VkPipelineCache pipelineCache,
1697 uint32_t count,
1698 const VkComputePipelineCreateInfo* pCreateInfos,
1699 const VkAllocationCallbacks* pAllocator,
1700 VkPipeline* pPipelines)
1701 {
1702 ANV_FROM_HANDLE(anv_device, device, _device);
1703 ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
1704
1705 VkResult result = VK_SUCCESS;
1706
1707 unsigned i;
1708 for (i = 0; i < count; i++) {
1709 VkResult res = anv_compute_pipeline_create(device, pipeline_cache,
1710 &pCreateInfos[i],
1711 pAllocator, &pPipelines[i]);
1712
1713 if (res == VK_SUCCESS)
1714 continue;
1715
1716 /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it
1717 * is not obvious what error should be report upon 2 different failures.
1718 * */
1719 result = res;
1720 if (res != VK_PIPELINE_COMPILE_REQUIRED)
1721 break;
1722
1723 pPipelines[i] = VK_NULL_HANDLE;
1724
1725 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
1726 break;
1727 }
1728
1729 for (; i < count; i++)
1730 pPipelines[i] = VK_NULL_HANDLE;
1731
1732 return result;
1733 }
1734
1735 /**
1736 * Calculate the desired L3 partitioning based on the current state of the
1737 * pipeline. For now this simply returns the conservative defaults calculated
1738 * by get_default_l3_weights(), but we could probably do better by gathering
1739 * more statistics from the pipeline state (e.g. guess of expected URB usage
1740 * and bound surfaces), or by using feed-back from performance counters.
1741 */
1742 void
anv_pipeline_setup_l3_config(struct anv_pipeline * pipeline,bool needs_slm)1743 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
1744 {
1745 const struct intel_device_info *devinfo = pipeline->device->info;
1746
1747 const struct intel_l3_weights w =
1748 intel_get_default_l3_weights(devinfo, true, needs_slm);
1749
1750 pipeline->l3_config = intel_get_l3_config(devinfo, w);
1751 }
1752
1753 static VkResult
anv_graphics_pipeline_init(struct anv_graphics_pipeline * pipeline,struct anv_device * device,struct vk_pipeline_cache * cache,const struct VkGraphicsPipelineCreateInfo * pCreateInfo,const struct vk_graphics_pipeline_state * state,const VkAllocationCallbacks * alloc)1754 anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
1755 struct anv_device *device,
1756 struct vk_pipeline_cache *cache,
1757 const struct VkGraphicsPipelineCreateInfo *pCreateInfo,
1758 const struct vk_graphics_pipeline_state *state,
1759 const VkAllocationCallbacks *alloc)
1760 {
1761 VkResult result;
1762
1763 result = anv_pipeline_init(&pipeline->base, device,
1764 ANV_PIPELINE_GRAPHICS, pCreateInfo->flags,
1765 alloc);
1766 if (result != VK_SUCCESS)
1767 return result;
1768
1769 anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
1770 pipeline->batch_data, sizeof(pipeline->batch_data));
1771
1772 pipeline->active_stages = 0;
1773 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
1774 pipeline->active_stages |= pCreateInfo->pStages[i].stage;
1775
1776 if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1777 pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
1778
1779 pipeline->dynamic_state.ms.sample_locations = &pipeline->sample_locations;
1780 vk_dynamic_graphics_state_fill(&pipeline->dynamic_state, state);
1781
1782 pipeline->depth_clamp_enable = state->rs->depth_clamp_enable;
1783 pipeline->depth_clip_enable =
1784 vk_rasterization_state_depth_clip_enable(state->rs);
1785 pipeline->view_mask = state->rp->view_mask;
1786
1787 result = anv_graphics_pipeline_compile(pipeline, cache, pCreateInfo, state);
1788 if (result != VK_SUCCESS) {
1789 anv_pipeline_finish(&pipeline->base, device, alloc);
1790 return result;
1791 }
1792
1793 anv_pipeline_setup_l3_config(&pipeline->base, false);
1794
1795 const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
1796
1797 u_foreach_bit(a, state->vi->attributes_valid) {
1798 if (inputs_read & BITFIELD64_BIT(VERT_ATTRIB_GENERIC0 + a))
1799 pipeline->vb_used |= BITFIELD64_BIT(state->vi->attributes[a].binding);
1800 }
1801
1802 u_foreach_bit(b, state->vi->bindings_valid) {
1803 pipeline->vb[b].stride = state->vi->bindings[b].stride;
1804 pipeline->vb[b].instanced = state->vi->bindings[b].input_rate ==
1805 VK_VERTEX_INPUT_RATE_INSTANCE;
1806 pipeline->vb[b].instance_divisor = state->vi->bindings[b].divisor;
1807 }
1808
1809 pipeline->instance_multiplier = 1;
1810 if (pipeline->view_mask)
1811 pipeline->instance_multiplier = util_bitcount(pipeline->view_mask);
1812
1813 pipeline->negative_one_to_one =
1814 state->vp != NULL && state->vp->depth_clip_negative_one_to_one;
1815
1816 /* Store line mode, polygon mode and rasterization samples, these are used
1817 * for dynamic primitive topology.
1818 */
1819 pipeline->polygon_mode = state->rs->polygon_mode;
1820 pipeline->rasterization_samples =
1821 state->ms != NULL ? state->ms->rasterization_samples : 1;
1822 pipeline->line_mode = state->rs->line.mode;
1823 if (pipeline->line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT) {
1824 if (pipeline->rasterization_samples > 1) {
1825 pipeline->line_mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;
1826 } else {
1827 pipeline->line_mode = VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;
1828 }
1829 }
1830 pipeline->patch_control_points =
1831 state->ts != NULL ? state->ts->patch_control_points : 0;
1832
1833 /* Store the color write masks, to be merged with color write enable if
1834 * dynamic.
1835 */
1836 if (state->cb != NULL) {
1837 for (unsigned i = 0; i < state->cb->attachment_count; i++)
1838 pipeline->color_comp_writes[i] = state->cb->attachments[i].write_mask;
1839 }
1840
1841 return VK_SUCCESS;
1842 }
1843
1844 static VkResult
anv_graphics_pipeline_create(struct anv_device * device,struct vk_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)1845 anv_graphics_pipeline_create(struct anv_device *device,
1846 struct vk_pipeline_cache *cache,
1847 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1848 const VkAllocationCallbacks *pAllocator,
1849 VkPipeline *pPipeline)
1850 {
1851 struct anv_graphics_pipeline *pipeline;
1852 VkResult result;
1853
1854 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
1855
1856 pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
1857 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1858 if (pipeline == NULL)
1859 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1860
1861 struct vk_graphics_pipeline_all_state all;
1862 struct vk_graphics_pipeline_state state = { };
1863 result = vk_graphics_pipeline_state_fill(&device->vk, &state, pCreateInfo,
1864 NULL /* driver_rp */,
1865 0 /* driver_rp_flags */,
1866 &all, NULL, 0, NULL);
1867 if (result != VK_SUCCESS) {
1868 vk_free2(&device->vk.alloc, pAllocator, pipeline);
1869 return result;
1870 }
1871
1872 result = anv_graphics_pipeline_init(pipeline, device, cache,
1873 pCreateInfo, &state, pAllocator);
1874 if (result != VK_SUCCESS) {
1875 vk_free2(&device->vk.alloc, pAllocator, pipeline);
1876 return result;
1877 }
1878
1879 anv_genX(device->info, graphics_pipeline_emit)(pipeline, &state);
1880
1881 *pPipeline = anv_pipeline_to_handle(&pipeline->base);
1882
1883 return pipeline->base.batch.status;
1884 }
1885
anv_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1886 VkResult anv_CreateGraphicsPipelines(
1887 VkDevice _device,
1888 VkPipelineCache pipelineCache,
1889 uint32_t count,
1890 const VkGraphicsPipelineCreateInfo* pCreateInfos,
1891 const VkAllocationCallbacks* pAllocator,
1892 VkPipeline* pPipelines)
1893 {
1894 ANV_FROM_HANDLE(anv_device, device, _device);
1895 ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
1896
1897 VkResult result = VK_SUCCESS;
1898
1899 unsigned i;
1900 for (i = 0; i < count; i++) {
1901 VkResult res = anv_graphics_pipeline_create(device,
1902 pipeline_cache,
1903 &pCreateInfos[i],
1904 pAllocator, &pPipelines[i]);
1905
1906 if (res == VK_SUCCESS)
1907 continue;
1908
1909 /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it
1910 * is not obvious what error should be report upon 2 different failures.
1911 * */
1912 result = res;
1913 if (res != VK_PIPELINE_COMPILE_REQUIRED)
1914 break;
1915
1916 pPipelines[i] = VK_NULL_HANDLE;
1917
1918 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
1919 break;
1920 }
1921
1922 for (; i < count; i++)
1923 pPipelines[i] = VK_NULL_HANDLE;
1924
1925 return result;
1926 }
1927
1928 #define WRITE_STR(field, ...) ({ \
1929 memset(field, 0, sizeof(field)); \
1930 UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
1931 assert(i > 0 && i < sizeof(field)); \
1932 })
1933
anv_GetPipelineExecutablePropertiesKHR(VkDevice device,const VkPipelineInfoKHR * pPipelineInfo,uint32_t * pExecutableCount,VkPipelineExecutablePropertiesKHR * pProperties)1934 VkResult anv_GetPipelineExecutablePropertiesKHR(
1935 VkDevice device,
1936 const VkPipelineInfoKHR* pPipelineInfo,
1937 uint32_t* pExecutableCount,
1938 VkPipelineExecutablePropertiesKHR* pProperties)
1939 {
1940 ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);
1941 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
1942 pProperties, pExecutableCount);
1943
1944 util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) {
1945 vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
1946 gl_shader_stage stage = exe->stage;
1947 props->stages = mesa_to_vk_shader_stage(stage);
1948
1949 unsigned simd_width = exe->stats.dispatch_width;
1950 if (stage == MESA_SHADER_FRAGMENT) {
1951 WRITE_STR(props->name, "%s%d %s",
1952 simd_width ? "SIMD" : "vec",
1953 simd_width ? simd_width : 4,
1954 _mesa_shader_stage_to_string(stage));
1955 } else {
1956 WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage));
1957 }
1958 WRITE_STR(props->description, "%s%d %s shader",
1959 simd_width ? "SIMD" : "vec",
1960 simd_width ? simd_width : 4,
1961 _mesa_shader_stage_to_string(stage));
1962
1963 /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan
1964 * wants a subgroup size of 1.
1965 */
1966 props->subgroupSize = MAX2(simd_width, 1);
1967 }
1968 }
1969
1970 return vk_outarray_status(&out);
1971 }
1972
1973 static const struct anv_pipeline_executable *
anv_pipeline_get_executable(struct anv_pipeline * pipeline,uint32_t index)1974 anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)
1975 {
1976 assert(index < util_dynarray_num_elements(&pipeline->executables,
1977 struct anv_pipeline_executable));
1978 return util_dynarray_element(
1979 &pipeline->executables, struct anv_pipeline_executable, index);
1980 }
1981
anv_GetPipelineExecutableStatisticsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pStatisticCount,VkPipelineExecutableStatisticKHR * pStatistics)1982 VkResult anv_GetPipelineExecutableStatisticsKHR(
1983 VkDevice device,
1984 const VkPipelineExecutableInfoKHR* pExecutableInfo,
1985 uint32_t* pStatisticCount,
1986 VkPipelineExecutableStatisticKHR* pStatistics)
1987 {
1988 ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
1989 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
1990 pStatistics, pStatisticCount);
1991
1992 const struct anv_pipeline_executable *exe =
1993 anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
1994
1995 const struct elk_stage_prog_data *prog_data;
1996 switch (pipeline->type) {
1997 case ANV_PIPELINE_GRAPHICS: {
1998 prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data;
1999 break;
2000 }
2001 case ANV_PIPELINE_COMPUTE: {
2002 prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;
2003 break;
2004 }
2005 default:
2006 unreachable("invalid pipeline type");
2007 }
2008
2009 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
2010 WRITE_STR(stat->name, "Instruction Count");
2011 WRITE_STR(stat->description,
2012 "Number of GEN instructions in the final generated "
2013 "shader executable.");
2014 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2015 stat->value.u64 = exe->stats.instructions;
2016 }
2017
2018 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
2019 WRITE_STR(stat->name, "SEND Count");
2020 WRITE_STR(stat->description,
2021 "Number of instructions in the final generated shader "
2022 "executable which access external units such as the "
2023 "constant cache or the sampler.");
2024 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2025 stat->value.u64 = exe->stats.sends;
2026 }
2027
2028 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
2029 WRITE_STR(stat->name, "Loop Count");
2030 WRITE_STR(stat->description,
2031 "Number of loops (not unrolled) in the final generated "
2032 "shader executable.");
2033 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2034 stat->value.u64 = exe->stats.loops;
2035 }
2036
2037 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
2038 WRITE_STR(stat->name, "Cycle Count");
2039 WRITE_STR(stat->description,
2040 "Estimate of the number of EU cycles required to execute "
2041 "the final generated executable. This is an estimate only "
2042 "and may vary greatly from actual run-time performance.");
2043 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2044 stat->value.u64 = exe->stats.cycles;
2045 }
2046
2047 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
2048 WRITE_STR(stat->name, "Spill Count");
2049 WRITE_STR(stat->description,
2050 "Number of scratch spill operations. This gives a rough "
2051 "estimate of the cost incurred due to spilling temporary "
2052 "values to memory. If this is non-zero, you may want to "
2053 "adjust your shader to reduce register pressure.");
2054 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2055 stat->value.u64 = exe->stats.spills;
2056 }
2057
2058 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
2059 WRITE_STR(stat->name, "Fill Count");
2060 WRITE_STR(stat->description,
2061 "Number of scratch fill operations. This gives a rough "
2062 "estimate of the cost incurred due to spilling temporary "
2063 "values to memory. If this is non-zero, you may want to "
2064 "adjust your shader to reduce register pressure.");
2065 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2066 stat->value.u64 = exe->stats.fills;
2067 }
2068
2069 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
2070 WRITE_STR(stat->name, "Scratch Memory Size");
2071 WRITE_STR(stat->description,
2072 "Number of bytes of scratch memory required by the "
2073 "generated shader executable. If this is non-zero, you "
2074 "may want to adjust your shader to reduce register "
2075 "pressure.");
2076 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2077 stat->value.u64 = prog_data->total_scratch;
2078 }
2079
2080 if (gl_shader_stage_uses_workgroup(exe->stage)) {
2081 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
2082 WRITE_STR(stat->name, "Workgroup Memory Size");
2083 WRITE_STR(stat->description,
2084 "Number of bytes of workgroup shared memory used by this "
2085 "shader including any padding.");
2086 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2087 stat->value.u64 = prog_data->total_shared;
2088 }
2089 }
2090
2091 return vk_outarray_status(&out);
2092 }
2093
2094 static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR * ir,const char * data)2095 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
2096 const char *data)
2097 {
2098 ir->isText = VK_TRUE;
2099
2100 size_t data_len = strlen(data) + 1;
2101
2102 if (ir->pData == NULL) {
2103 ir->dataSize = data_len;
2104 return true;
2105 }
2106
2107 strncpy(ir->pData, data, ir->dataSize);
2108 if (ir->dataSize < data_len)
2109 return false;
2110
2111 ir->dataSize = data_len;
2112 return true;
2113 }
2114
anv_GetPipelineExecutableInternalRepresentationsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pInternalRepresentationCount,VkPipelineExecutableInternalRepresentationKHR * pInternalRepresentations)2115 VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
2116 VkDevice device,
2117 const VkPipelineExecutableInfoKHR* pExecutableInfo,
2118 uint32_t* pInternalRepresentationCount,
2119 VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
2120 {
2121 ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
2122 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
2123 pInternalRepresentations, pInternalRepresentationCount);
2124 bool incomplete_text = false;
2125
2126 const struct anv_pipeline_executable *exe =
2127 anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
2128
2129 if (exe->nir) {
2130 vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
2131 WRITE_STR(ir->name, "Final NIR");
2132 WRITE_STR(ir->description,
2133 "Final NIR before going into the back-end compiler");
2134
2135 if (!write_ir_text(ir, exe->nir))
2136 incomplete_text = true;
2137 }
2138 }
2139
2140 if (exe->disasm) {
2141 vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
2142 WRITE_STR(ir->name, "GEN Assembly");
2143 WRITE_STR(ir->description,
2144 "Final GEN assembly for the generated shader binary");
2145
2146 if (!write_ir_text(ir, exe->disasm))
2147 incomplete_text = true;
2148 }
2149 }
2150
2151 return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
2152 }
2153